aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-24 01:00:08 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-24 01:00:08 +0000
commitc7dac04c3480f3c20487f912f77343139fce2d99 (patch)
tree21a09bce0171e27bd1e92649db9df797fa097cea
parent044eb2f6afba375a914ac9d8024f8f5142bb912e (diff)
downloadsrc-vendor/llvm/llvm-trunk-r321414.tar.gz
src-vendor/llvm/llvm-trunk-r321414.zip
Vendor import of llvm trunk r321414:vendor/llvm/llvm-trunk-r321414
-rw-r--r--docs/Extensions.rst28
-rw-r--r--docs/MIRLangRef.rst45
-rw-r--r--docs/tutorial/LangImpl09.rst5
-rw-r--r--examples/Kaleidoscope/CMakeLists.txt1
-rw-r--r--examples/Kaleidoscope/Chapter9/toy.cpp5
-rw-r--r--include/llvm-c/lto.h14
-rw-r--r--include/llvm/Analysis/AliasAnalysis.h77
-rw-r--r--include/llvm/Analysis/AliasAnalysisEvaluator.h8
-rw-r--r--include/llvm/Analysis/LoopAccessAnalysis.h15
-rw-r--r--include/llvm/Analysis/MemoryDependenceAnalysis.h6
-rw-r--r--include/llvm/Analysis/ProfileSummaryInfo.h8
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpander.h2
-rw-r--r--include/llvm/Analysis/TargetTransformInfo.h7
-rw-r--r--include/llvm/Analysis/TargetTransformInfoImpl.h2
-rw-r--r--include/llvm/BinaryFormat/Wasm.h2
-rw-r--r--include/llvm/BinaryFormat/WasmRelocs.def (renamed from include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def)0
-rw-r--r--include/llvm/CodeGen/BasicTTIImpl.h14
-rw-r--r--include/llvm/CodeGen/GlobalISel/InstructionSelector.h17
-rw-r--r--include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h6
-rw-r--r--include/llvm/CodeGen/LiveStacks.h (renamed from include/llvm/CodeGen/LiveStackAnalysis.h)8
-rw-r--r--include/llvm/CodeGen/MachineOperand.h7
-rw-r--r--include/llvm/CodeGen/RuntimeLibcalls.def3
-rw-r--r--include/llvm/CodeGen/SDNodeProperties.td34
-rw-r--r--include/llvm/CodeGen/SelectionDAGNodes.h12
-rw-r--r--include/llvm/CodeGen/TargetLowering.h11
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFUnit.h100
-rw-r--r--include/llvm/FuzzMutate/IRMutator.h4
-rw-r--r--include/llvm/IR/Function.h6
-rw-r--r--include/llvm/IR/Intrinsics.td10
-rw-r--r--include/llvm/LTO/legacy/ThinLTOCodeGenerator.h10
-rw-r--r--include/llvm/MC/MCAsmInfo.h3
-rw-r--r--include/llvm/MC/MCStreamer.h11
-rw-r--r--include/llvm/Object/Wasm.h21
-rw-r--r--include/llvm/Support/CachePruning.h5
-rw-r--r--include/llvm/Support/MemoryBuffer.h66
-rw-r--r--include/llvm/Support/YAMLTraits.h8
-rw-r--r--include/llvm/Target/TargetMachine.h18
-rw-r--r--include/llvm/Target/TargetSelectionDAG.td26
-rw-r--r--include/llvm/Transforms/Instrumentation.h2
-rw-r--r--include/llvm/Transforms/Utils/CallPromotionUtils.h18
-rw-r--r--include/llvm/module.modulemap2
-rw-r--r--lib/Analysis/AliasAnalysis.cpp95
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp61
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp13
-rw-r--r--lib/Analysis/CFGPrinter.cpp8
-rw-r--r--lib/Analysis/GlobalsModRef.cpp21
-rw-r--r--lib/Analysis/InlineCost.cpp95
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp71
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp23
-rw-r--r--lib/Analysis/MemorySSA.cpp39
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp2
-rw-r--r--lib/Analysis/ProfileSummaryInfo.cpp60
-rw-r--r--lib/Analysis/ScalarEvolution.cpp1
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp4
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp23
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp1
-rw-r--r--lib/CodeGen/CMakeLists.txt2
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp17
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp3
-rw-r--r--lib/CodeGen/InlineSpiller.cpp2
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp7
-rw-r--r--lib/CodeGen/LiveStacks.cpp (renamed from lib/CodeGen/LiveStackAnalysis.cpp)4
-rw-r--r--lib/CodeGen/MIRPrinter.cpp197
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp6
-rw-r--r--lib/CodeGen/MachineOperand.cpp232
-rw-r--r--lib/CodeGen/MachineVerifier.cpp2
-rw-r--r--lib/CodeGen/README.txt2
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp2
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp2
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp177
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp17
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp32
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp2
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp2
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp68
-rw-r--r--lib/CodeGen/VirtRegMap.cpp2
-rw-r--r--lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp1
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp153
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnit.cpp505
-rw-r--r--lib/Demangle/ItaniumDemangle.cpp3
-rw-r--r--lib/FuzzMutate/IRMutator.cpp19
-rw-r--r--lib/IR/ConstantFold.cpp1
-rw-r--r--lib/IR/Function.cpp4
-rw-r--r--lib/IR/Value.cpp11
-rw-r--r--lib/MC/MCAsmStreamer.cpp47
-rw-r--r--lib/MC/MCStreamer.cpp22
-rw-r--r--lib/MC/WasmObjectWriter.cpp74
-rw-r--r--lib/Object/ELF.cpp1
-rw-r--r--lib/Object/WasmObjectFile.cpp29
-rw-r--r--lib/Object/WindowsResource.cpp6
-rw-r--r--lib/ObjectYAML/WasmYAML.cpp2
-rw-r--r--lib/Passes/LLVMBuild.txt2
-rw-r--r--lib/Support/APFloat.cpp4
-rw-r--r--lib/Support/CachePruning.cpp4
-rw-r--r--lib/Support/MemoryBuffer.cpp162
-rw-r--r--lib/Support/StringRef.cpp2
-rw-r--r--lib/Support/TargetParser.cpp5
-rw-r--r--lib/Support/YAMLTraits.cpp7
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp14
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp5
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp86
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp12
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp13
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp8
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp13
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h7
-rw-r--r--lib/Target/AArch64/AArch64SystemOperands.td3
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp7
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h3
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp36
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h10
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp8
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.h2
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp34
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp35
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h3
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp4
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.td182
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp4
-rw-r--r--lib/Target/ARC/ARCTargetMachine.cpp7
-rw-r--r--lib/Target/ARC/ARCTargetMachine.h2
-rw-r--r--lib/Target/ARM/ARM.h1
-rw-r--r--lib/Target/ARM/ARM.td4
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp134
-rw-r--r--lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td9
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td4
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp58
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp20
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp5
-rw-r--r--lib/Target/ARM/ARMSubtarget.h9
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp8
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h3
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp19
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h2
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp10
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp4
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp8
-rw-r--r--lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp70
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp345
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h12
-rw-r--r--lib/Target/Hexagon/HexagonISelLoweringHVX.cpp134
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td9
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.td5
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h32
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h2
-rw-r--r--lib/Target/Lanai/LanaiTargetMachine.cpp7
-rw-r--r--lib/Target/Lanai/LanaiTargetMachine.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp21
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h2
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp6
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp11
-rw-r--r--lib/Target/PowerPC/PPCMIPeephole.cpp18
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp7
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h2
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h2
-rw-r--r--lib/Target/TargetMachine.cpp13
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegStackify.cpp10
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp8
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.h3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp2
-rw-r--r--lib/Target/X86/X86.td17
-rw-r--r--lib/Target/X86/X86DomainReassignment.cpp167
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp9
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp465
-rw-r--r--lib/Target/X86/X86Instr3DNow.td20
-rw-r--r--lib/Target/X86/X86InstrFormats.td8
-rw-r--r--lib/Target/X86/X86InstrInfo.td3
-rw-r--r--lib/Target/X86/X86InstrSSE.td2
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp7
-rw-r--r--lib/Target/X86/X86Subtarget.cpp24
-rw-r--r--lib/Target/X86/X86Subtarget.h29
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp7
-rw-r--r--lib/Target/X86/X86TargetMachine.h2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp7
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h2
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp9
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp5
-rw-r--r--lib/Transforms/IPO/WholeProgramDevirt.cpp1
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp1
-rw-r--r--lib/Transforms/Instrumentation/HWAddressSanitizer.cpp25
-rw-r--r--lib/Transforms/Scalar/CallSiteSplitting.cpp46
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp19
-rw-r--r--lib/Transforms/Scalar/LoopSink.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp2
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp56
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp29
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp2
-rw-r--r--lib/Transforms/Utils/CallPromotionUtils.cpp255
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp203
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp4
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp279
-rw-r--r--test/Analysis/BasicAA/args-rets-allocas-loads.ll7
-rw-r--r--test/Analysis/BasicAA/call-attrs.ll6
-rw-r--r--test/Analysis/BasicAA/cs-cs-arm.ll6
-rw-r--r--test/Analysis/BasicAA/cs-cs.ll62
-rw-r--r--test/Analysis/MemorySSA/volatile-clobber.ll13
-rw-r--r--test/Analysis/ValueTracking/memory-dereferenceable.ll22
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll2
-rw-r--r--test/CodeGen/AArch64/GlobalISel/fp128-legalize-crash-pr35690.mir44
-rw-r--r--test/CodeGen/AArch64/GlobalISel/translate-gep.ll109
-rw-r--r--test/CodeGen/AArch64/arm64-jumptable.ll16
-rw-r--r--test/CodeGen/AArch64/arm64-memset-to-bzero.ll49
-rw-r--r--test/CodeGen/AArch64/arm64-neon-2velem.ll685
-rw-r--r--test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll9
-rw-r--r--test/CodeGen/AArch64/chkstk.ll25
-rw-r--r--test/CodeGen/AArch64/ldst-paired-aliasing.ll5
-rw-r--r--test/CodeGen/AMDGPU/amdgpu.private-memory.ll3
-rw-r--r--test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll32
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir100
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-legalizer.mir52
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir66
-rw-r--r--test/CodeGen/ARM/avoid-cpsr-rmw.ll4
-rw-r--r--test/CodeGen/ARM/su-addsub-overflow.ll135
-rw-r--r--test/CodeGen/ARM/usat.ll214
-rw-r--r--test/CodeGen/BPF/objdump_imm_hex.ll65
-rw-r--r--test/CodeGen/Hexagon/autohvx/build-vector-i32-type.ll19
-rw-r--r--test/CodeGen/Hexagon/autohvx/isel-bool-vector.ll18
-rw-r--r--test/CodeGen/Hexagon/autohvx/isel-select-const.ll32
-rw-r--r--test/CodeGen/Hexagon/expand-vstorerw-undef.ll2
-rw-r--r--test/CodeGen/Hexagon/v60-cur.ll5
-rw-r--r--test/CodeGen/Hexagon/vect/vect-extract-i1-debug.ll14
-rw-r--r--test/CodeGen/Hexagon/vect/vect-infloop.ll10
-rw-r--r--test/CodeGen/Mips/llvm-ir/extractelement.ll2
-rw-r--r--test/CodeGen/Mips/long-call-mcount.ll19
-rw-r--r--test/CodeGen/Mips/sll-micromips-r6-encoding.mir46
-rw-r--r--test/CodeGen/PowerPC/cmp_elimination.ll32
-rw-r--r--test/CodeGen/PowerPC/uint-to-ppcfp128-crash.ll15
-rw-r--r--test/CodeGen/PowerPC/variable_elem_vec_extracts.ll6
-rw-r--r--test/CodeGen/Thumb2/t2sizereduction.mir83
-rw-r--r--test/CodeGen/X86/avg-mask.ll24
-rw-r--r--test/CodeGen/X86/avg.ll2949
-rw-r--r--test/CodeGen/X86/avx512-calling-conv.ll88
-rw-r--r--test/CodeGen/X86/avx512-ext.ll21
-rw-r--r--test/CodeGen/X86/avx512-extract-subvector-load-store.ll136
-rw-r--r--test/CodeGen/X86/avx512-insert-extract.ll163
-rw-r--r--test/CodeGen/X86/avx512-insert-extract_i1.ll3
-rw-r--r--test/CodeGen/X86/avx512-mask-op.ll22
-rwxr-xr-xtest/CodeGen/X86/avx512-schedule.ll12
-rw-r--r--test/CodeGen/X86/avx512-shuffles/partial_permute.ll299
-rw-r--r--test/CodeGen/X86/avx512-skx-insert-subvec.ll7
-rw-r--r--test/CodeGen/X86/avx512-vec-cmp.ll5
-rw-r--r--test/CodeGen/X86/avx512vl-vec-masked-cmp.ll186
-rw-r--r--test/CodeGen/X86/bitcast-and-setcc-128.ll24
-rw-r--r--test/CodeGen/X86/bitcast-setcc-128.ll12
-rw-r--r--test/CodeGen/X86/combine-and.ll10
-rw-r--r--test/CodeGen/X86/combine-or.ll20
-rw-r--r--test/CodeGen/X86/darwin-bzero.ll5
-rw-r--r--test/CodeGen/X86/extractelement-index.ll12
-rw-r--r--test/CodeGen/X86/fma-fneg-combine.ll166
-rw-r--r--test/CodeGen/X86/fmsubadd-combine.ll96
-rw-r--r--test/CodeGen/X86/fold-vector-sext-crash.ll6
-rw-r--r--test/CodeGen/X86/horizontal-reduce-smax.ll284
-rw-r--r--test/CodeGen/X86/horizontal-reduce-smin.ll284
-rw-r--r--test/CodeGen/X86/horizontal-reduce-umax.ll284
-rw-r--r--test/CodeGen/X86/horizontal-reduce-umin.ll170
-rw-r--r--test/CodeGen/X86/known-bits-vector.ll14
-rw-r--r--test/CodeGen/X86/machinesink-merge-debuginfo.ll60
-rw-r--r--test/CodeGen/X86/machinesink-null-debuginfo.ll10
-rw-r--r--test/CodeGen/X86/masked_gather_scatter.ll121
-rw-r--r--test/CodeGen/X86/popcnt.ll2
-rw-r--r--test/CodeGen/X86/prefetch.ll102
-rw-r--r--test/CodeGen/X86/shuffle-strided-with-offset-128.ll81
-rw-r--r--test/CodeGen/X86/shuffle-strided-with-offset-256.ll149
-rw-r--r--test/CodeGen/X86/shuffle-strided-with-offset-512.ll48
-rw-r--r--test/CodeGen/X86/shuffle-vs-trunc-128.ll55
-rw-r--r--test/CodeGen/X86/shuffle-vs-trunc-256.ll184
-rw-r--r--test/CodeGen/X86/shuffle-vs-trunc-512.ll5
-rw-r--r--test/CodeGen/X86/var-permute-256.ll270
-rw-r--r--test/CodeGen/X86/var-permute-512.ll208
-rw-r--r--test/CodeGen/X86/vector-compare-results.ll56
-rw-r--r--test/CodeGen/X86/vector-half-conversions.ll511
-rw-r--r--test/CodeGen/X86/vector-rotate-128.ll76
-rw-r--r--test/CodeGen/X86/vector-shift-ashr-128.ll154
-rw-r--r--test/CodeGen/X86/vector-shift-lshr-128.ll154
-rw-r--r--test/CodeGen/X86/vector-shift-shl-128.ll154
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v16.ll113
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v8.ll397
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v16.ll849
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v32.ll946
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v4.ll237
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v8.ll264
-rw-r--r--test/CodeGen/X86/vector-shuffle-512-v32.ll18
-rw-r--r--test/CodeGen/X86/vector-shuffle-v1.ll185
-rw-r--r--test/CodeGen/X86/vector-shuffle-variable-128.ll596
-rw-r--r--test/CodeGen/X86/vector-trunc.ll62
-rw-r--r--test/CodeGen/X86/vector-zext.ll30
-rw-r--r--test/CodeGen/X86/vselect.ll6
-rw-r--r--test/DebugInfo/X86/dwarfdump-str-offsets-dwp.s173
-rw-r--r--test/DebugInfo/X86/dwarfdump-str-offsets-invalid-3.s4
-rw-r--r--test/DebugInfo/X86/dwarfdump-str-offsets-invalid-4.s5
-rw-r--r--test/DebugInfo/X86/dwarfdump-str-offsets-invalid-6.s94
-rw-r--r--test/DebugInfo/X86/dwarfdump-str-offsets-macho.s34
-rw-r--r--test/DebugInfo/X86/dwarfdump-str-offsets.s66
-rw-r--r--test/Instrumentation/HWAddressSanitizer/basic.ll72
-rw-r--r--test/Instrumentation/HWAddressSanitizer/with-calls.ll39
-rw-r--r--test/MC/AArch64/arm64-system-encoding.s3
-rw-r--r--test/MC/AArch64/basic-a64-diagnostics.s4
-rw-r--r--test/MC/AArch64/dot-req.s10
-rw-r--r--test/MC/AMDGPU/ds.s4
-rw-r--r--test/MC/AMDGPU/expressions.s8
-rw-r--r--test/MC/AMDGPU/invalid-instructions-spellcheck.s48
-rw-r--r--test/MC/AMDGPU/trap.s42
-rw-r--r--test/MC/AMDGPU/vop1-gfx9-err.s12
-rw-r--r--test/MC/AMDGPU/vop3p-err.s6
-rw-r--r--test/MC/ARM/dfb-neg.s10
-rw-r--r--test/MC/ARM/dfb.s6
-rw-r--r--test/MC/COFF/align-nops.s2
-rw-r--r--test/MC/Disassembler/AArch64/basic-a64-instructions.txt3
-rw-r--r--test/MC/Disassembler/AMDGPU/ds_vi.txt3
-rw-r--r--test/MC/Disassembler/AMDGPU/trap_gfx9.txt32
-rw-r--r--test/MC/Disassembler/AMDGPU/trap_vi.txt16
-rw-r--r--test/MC/Disassembler/ARM/dfb-arm.txt6
-rw-r--r--test/MC/Disassembler/ARM/dfb-thumb.txt6
-rw-r--r--test/MC/Disassembler/X86/x86-32.txt3
-rw-r--r--test/MC/ELF/align-nops.s2
-rw-r--r--test/MC/MachO/x86_32-optimal_nop.s2
-rw-r--r--test/MC/Mips/eva/invalid.s47
-rw-r--r--test/MC/WebAssembly/weak-alias.ll169
-rw-r--r--test/MC/X86/3DNow.s2
-rw-r--r--test/MC/X86/AlignedBundling/different-sections.s4
-rw-r--r--test/MC/X86/AlignedBundling/long-nop-pad.s4
-rw-r--r--test/MC/X86/AlignedBundling/misaligned-bundle-group.s4
-rw-r--r--test/MC/X86/AlignedBundling/misaligned-bundle.s4
-rw-r--r--test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s4
-rw-r--r--test/MC/X86/AlignedBundling/pad-bundle-groups.s4
-rw-r--r--test/MC/X86/AlignedBundling/relax-in-bundle-group.s4
-rw-r--r--test/MC/X86/AlignedBundling/single-inst-bundling.s4
-rw-r--r--test/MC/X86/CLFLUSHOPT-32.s26
-rw-r--r--test/MC/X86/CLFLUSHOPT-64.s26
-rw-r--r--test/MC/X86/CLFSH-32.s26
-rw-r--r--test/MC/X86/CLFSH-64.s26
-rw-r--r--test/MC/X86/x86_long_nop.s8
-rw-r--r--test/TableGen/GlobalISelEmitter.td53
-rw-r--r--test/TableGen/intrinsic-long-name.td2
-rw-r--r--test/TableGen/intrinsic-struct.td2
-rw-r--r--test/TableGen/intrinsic-varargs.td2
-rw-r--r--test/ThinLTO/X86/cache.ll22
-rw-r--r--test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll139
-rw-r--r--test/Transforms/CallSiteSplitting/callsite-no-splitting.ll18
-rw-r--r--test/Transforms/CodeGenPrepare/section.ll47
-rw-r--r--test/Transforms/GVN/tbaa.ll61
-rw-r--r--test/Transforms/Inline/AArch64/binop.ll291
-rw-r--r--test/Transforms/Inline/ARM/inline-fp.ll113
-rw-r--r--test/Transforms/Inline/inline-fp.ll137
-rw-r--r--test/Transforms/Inline/redundant-loads.ll18
-rw-r--r--test/Transforms/InstCombine/2011-09-03-Trampoline.ll23
-rw-r--r--test/Transforms/JumpThreading/guards.ll103
-rw-r--r--test/Transforms/LoopVectorize/legal_preheader_check.ll27
-rw-r--r--test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll48
-rw-r--r--test/Transforms/MemCpyOpt/merge-into-memset.ll45
-rw-r--r--test/Transforms/MemCpyOpt/mixed-sizes.ll36
-rw-r--r--test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll114
-rw-r--r--test/Transforms/NewGVN/tbaa.ll61
-rw-r--r--test/Transforms/PGOProfile/icp_covariant_call_return.ll3
-rw-r--r--test/Transforms/PGOProfile/icp_covariant_invoke_return.ll9
-rw-r--r--test/Transforms/PGOProfile/icp_invoke.ll17
-rw-r--r--test/Transforms/PGOProfile/icp_invoke_nouse.ll3
-rw-r--r--test/Transforms/PGOProfile/icp_vararg.ll3
-rw-r--r--test/Transforms/PGOProfile/indirect_call_promotion.ll3
-rw-r--r--test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll24
-rw-r--r--test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll125
-rw-r--r--test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll225
-rw-r--r--test/Transforms/SLPVectorizer/X86/jumbled-load.ll37
-rw-r--r--test/Transforms/SLPVectorizer/X86/store-jumbled.ll25
-rw-r--r--test/Transforms/SampleProfile/entry_counts.ll4
-rw-r--r--test/Transforms/SimplifyCFG/X86/if-conversion.ll231
-rw-r--r--test/tools/llvm-cvtres/machine.test56
-rw-r--r--test/tools/llvm-cvtres/symbols.test12
-rw-r--r--test/tools/llvm-dwarfdump/X86/lookup.s7
-rw-r--r--test/tools/llvm-objcopy/add-section-remove.test36
-rw-r--r--test/tools/llvm-objcopy/add-section.test37
-rw-r--r--test/tools/llvm-readobj/mips-got.test136
-rw-r--r--test/tools/llvm-readobj/mips-plt.test32
-rw-r--r--tools/llvm-dwarfdump/llvm-dwarfdump.cpp3
-rw-r--r--tools/llvm-lto/llvm-lto.cpp4
-rw-r--r--tools/llvm-objcopy/Object.cpp12
-rw-r--r--tools/llvm-objcopy/Object.h15
-rw-r--r--tools/llvm-objcopy/llvm-objcopy.cpp22
-rw-r--r--tools/llvm-readobj/ELFDumper.cpp706
-rw-r--r--tools/llvm-readobj/WasmDumper.cpp10
-rw-r--r--tools/opt-viewer/optrecord.py2
-rw-r--r--unittests/ADT/APFloatTest.cpp17
-rw-r--r--unittests/ADT/StringRefTest.cpp7
-rw-r--r--unittests/CodeGen/MachineOperandTest.cpp63
-rw-r--r--unittests/ExecutionEngine/Orc/CMakeLists.txt7
-rw-r--r--unittests/Support/CachePruningTest.cpp4
-rw-r--r--unittests/Support/MemoryBufferTest.cpp40
-rw-r--r--unittests/Support/TargetParserTest.cpp6
-rw-r--r--unittests/Support/YAMLIOTest.cpp107
-rw-r--r--utils/TableGen/CMakeLists.txt1
-rw-r--r--utils/TableGen/CodeGenDAGPatterns.cpp94
-rw-r--r--utils/TableGen/CodeGenDAGPatterns.h13
-rw-r--r--utils/TableGen/CodeGenIntrinsics.h8
-rw-r--r--utils/TableGen/CodeGenTarget.cpp7
-rw-r--r--utils/TableGen/CodeGenTarget.h20
-rw-r--r--utils/TableGen/GlobalISelEmitter.cpp115
-rw-r--r--utils/TableGen/IntrinsicEmitter.cpp1
-rw-r--r--utils/TableGen/SDNodeProperties.cpp49
-rw-r--r--utils/TableGen/SDNodeProperties.h40
-rwxr-xr-xutils/docker/build_docker_image.sh6
-rwxr-xr-xutils/docker/scripts/build_install_llvm.sh39
-rwxr-xr-xutils/git-svn/git-llvm11
-rwxr-xr-xutils/update_mir_test_checks.py63
423 files changed, 14374 insertions, 10340 deletions
diff --git a/docs/Extensions.rst b/docs/Extensions.rst
index 14fea30204b4..32eeadd78ba6 100644
--- a/docs/Extensions.rst
+++ b/docs/Extensions.rst
@@ -288,3 +288,31 @@ standard stack probe emission.
The MSVC environment does not emit code for VLAs currently.
+Windows on ARM64
+----------------
+
+Stack Probe Emission
+^^^^^^^^^^^^^^^^^^^^
+
+The reference implementation (Microsoft Visual Studio 2017) emits stack probes
+in the following fashion:
+
+.. code-block:: gas
+
+ mov x15, #constant
+ bl __chkstk
+ sub sp, sp, x15, lsl #4
+
+However, this has the limitation of 256 MiB (±128MiB). In order to accommodate
+larger binaries, LLVM supports the use of ``-mcode-model=large`` to allow a 8GiB
+(±4GiB) range via a slight deviation. It will generate an indirect jump as
+follows:
+
+.. code-block:: gas
+
+ mov x15, #constant
+ adrp x16, __chkstk
+ add x16, x16, :lo12:__chkstk
+ blr x16
+ sub sp, sp, x15, lsl #4
+
diff --git a/docs/MIRLangRef.rst b/docs/MIRLangRef.rst
index f170c7210879..1176435c8761 100644
--- a/docs/MIRLangRef.rst
+++ b/docs/MIRLangRef.rst
@@ -692,6 +692,50 @@ The syntax is:
EH_LABEL <mcsymbol Ltmp1>
+CFIIndex Operands
+^^^^^^^^^^^^^^^^^
+
+A CFI Index operand is holding an index into a per-function side-table,
+``MachineFunction::getFrameInstructions()``, which references all the frame
+instructions in a ``MachineFunction``. A ``CFI_INSTRUCTION`` may look like it
+contains multiple operands, but the only operand it contains is the CFI Index.
+The other operands are tracked by the ``MCCFIInstruction`` object.
+
+The syntax is:
+
+.. code-block:: text
+
+ CFI_INSTRUCTION offset %w30, -16
+
+which may be emitted later in the MC layer as:
+
+.. code-block:: text
+
+ .cfi_offset w30, -16
+
+IntrinsicID Operands
+^^^^^^^^^^^^^^^^^^^^
+
+An Intrinsic ID operand contains a generic intrinsic ID or a target-specific ID.
+
+The syntax for the ``returnaddress`` intrinsic is:
+
+.. code-block:: text
+
+ %x0 = COPY intrinsic(@llvm.returnaddress)
+
+Predicate Operands
+^^^^^^^^^^^^^^^^^^
+
+A Predicate operand contains an IR predicate from ``CmpInst::Predicate``, like
+``ICMP_EQ``, etc.
+
+For an int eq predicate ``ICMP_EQ``, the syntax is:
+
+.. code-block:: text
+
+ %2:gpr(s32) = G_ICMP intpred(eq), %0, %1
+
.. TODO: Describe the parsers default behaviour when optional YAML attributes
are missing.
.. TODO: Describe the syntax for the bundled instructions.
@@ -702,7 +746,6 @@ The syntax is:
.. TODO: Describe the syntax of the stack object machine operands and their
YAML definitions.
.. TODO: Describe the syntax of the block address machine operands.
-.. TODO: Describe the syntax of the CFI index machine operands.
.. TODO: Describe the syntax of the metadata machine operands, and the
instructions debug location attribute.
.. TODO: Describe the syntax of the register live out machine operands.
diff --git a/docs/tutorial/LangImpl09.rst b/docs/tutorial/LangImpl09.rst
index fe5a95a5769e..d81f9fa0001c 100644
--- a/docs/tutorial/LangImpl09.rst
+++ b/docs/tutorial/LangImpl09.rst
@@ -197,7 +197,7 @@ expressions:
if (DblTy)
return DblTy;
- DblTy = DBuilder->createBasicType("double", 64, 64, dwarf::DW_ATE_float);
+ DblTy = DBuilder->createBasicType("double", 64, dwarf::DW_ATE_float);
return DblTy;
}
@@ -208,7 +208,8 @@ And then later on in ``main`` when we're constructing our module:
DBuilder = new DIBuilder(*TheModule);
KSDbgInfo.TheCU = DBuilder->createCompileUnit(
- dwarf::DW_LANG_C, "fib.ks", ".", "Kaleidoscope Compiler", 0, "", 0);
+ dwarf::DW_LANG_C, DBuilder->createFile("fib.ks", "."),
+ "Kaleidoscope Compiler", 0, "", 0);
There are a couple of things to note here. First, while we're producing a
compile unit for a language called Kaleidoscope we used the language
diff --git a/examples/Kaleidoscope/CMakeLists.txt b/examples/Kaleidoscope/CMakeLists.txt
index 543b9f73b4fe..3822cdd9e1c4 100644
--- a/examples/Kaleidoscope/CMakeLists.txt
+++ b/examples/Kaleidoscope/CMakeLists.txt
@@ -14,3 +14,4 @@ add_subdirectory(Chapter5)
add_subdirectory(Chapter6)
add_subdirectory(Chapter7)
add_subdirectory(Chapter8)
+add_subdirectory(Chapter9)
diff --git a/examples/Kaleidoscope/Chapter9/toy.cpp b/examples/Kaleidoscope/Chapter9/toy.cpp
index 1b13e45ec460..821cf4d25a65 100644
--- a/examples/Kaleidoscope/Chapter9/toy.cpp
+++ b/examples/Kaleidoscope/Chapter9/toy.cpp
@@ -823,7 +823,7 @@ DIType *DebugInfo::getDoubleTy() {
if (DblTy)
return DblTy;
- DblTy = DBuilder->createBasicType("double", 64, 64, dwarf::DW_ATE_float);
+ DblTy = DBuilder->createBasicType("double", 64, dwarf::DW_ATE_float);
return DblTy;
}
@@ -1436,7 +1436,8 @@ int main() {
// Currently down as "fib.ks" as a filename since we're redirecting stdin
// but we'd like actual source locations.
KSDbgInfo.TheCU = DBuilder->createCompileUnit(
- dwarf::DW_LANG_C, "fib.ks", ".", "Kaleidoscope Compiler", 0, "", 0);
+ dwarf::DW_LANG_C, DBuilder->createFile("fib.ks", "."),
+ "Kaleidoscope Compiler", 0, "", 0);
// Run the main "interpreter loop" now.
MainLoop();
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 8d45b7832041..55f3e46c45ed 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -757,17 +757,17 @@ extern void thinlto_codegen_add_cross_referenced_symbol(thinlto_code_gen_t cg,
* @ingroup LLVMCTLTO
*
* These entry points control the ThinLTO cache. The cache is intended to
- * support incremental build, and thus needs to be persistent accross build.
- * The client enabled the cache by supplying a path to an existing directory.
+ * support incremental builds, and thus needs to be persistent across builds.
+ * The client enables the cache by supplying a path to an existing directory.
* The code generator will use this to store objects files that may be reused
* during a subsequent build.
* To avoid filling the disk space, a few knobs are provided:
- * - The pruning interval limit the frequency at which the garbage collector
- * will try to scan the cache directory to prune it from expired entries.
- * Setting to -1 disable the pruning (default).
+ * - The pruning interval limits the frequency at which the garbage collector
+ * will try to scan the cache directory to prune expired entries.
+ * Setting to a negative number disables the pruning.
* - The pruning expiration time indicates to the garbage collector how old an
* entry needs to be to be removed.
- * - Finally, the garbage collector can be instructed to prune the cache till
+ * - Finally, the garbage collector can be instructed to prune the cache until
* the occupied space goes below a threshold.
* @{
*/
@@ -782,7 +782,7 @@ extern void thinlto_codegen_set_cache_dir(thinlto_code_gen_t cg,
const char *cache_dir);
/**
- * Sets the cache pruning interval (in seconds). A negative value disable the
+ * Sets the cache pruning interval (in seconds). A negative value disables the
* pruning. An unspecified default value will be applied, and a value of 0 will
* be ignored.
*
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index 9de075dfd681..362096b08e13 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -95,46 +95,81 @@ enum AliasResult {
///
/// This is no access at all, a modification, a reference, or both
/// a modification and a reference. These are specifically structured such that
-/// they form a two bit matrix and bit-tests for 'mod' or 'ref'
+/// they form a three bit matrix and bit-tests for 'mod' or 'ref' or 'must'
/// work with any of the possible values.
-
enum class ModRefInfo {
+ /// Must is provided for completeness, but no routines will return only
+ /// Must today. See definition of Must below.
+ Must = 0,
+ /// The access may reference the value stored in memory,
+ /// a mustAlias relation was found, and no mayAlias or partialAlias found.
+ MustRef = 1,
+ /// The access may modify the value stored in memory,
+ /// a mustAlias relation was found, and no mayAlias or partialAlias found.
+ MustMod = 2,
+ /// The access may reference, modify or both the value stored in memory,
+ /// a mustAlias relation was found, and no mayAlias or partialAlias found.
+ MustModRef = MustRef | MustMod,
/// The access neither references nor modifies the value stored in memory.
- NoModRef = 0,
+ NoModRef = 4,
/// The access may reference the value stored in memory.
- Ref = 1,
+ Ref = NoModRef | MustRef,
/// The access may modify the value stored in memory.
- Mod = 2,
+ Mod = NoModRef | MustMod,
/// The access may reference and may modify the value stored in memory.
ModRef = Ref | Mod,
+
+ /// About Must:
+ /// Must is set in a best effort manner.
+ /// We usually do not try our best to infer Must, instead it is merely
+ /// another piece of "free" information that is presented when available.
+ /// Must set means there was certainly a MustAlias found. For calls,
+ /// where multiple arguments are checked (argmemonly), this translates to
+ /// only MustAlias or NoAlias was found.
+ /// Must is not set for RAR accesses, even if the two locations must
+ /// alias. The reason is that two read accesses translate to an early return
+ /// of NoModRef. An additional alias check to set Must may be
+ /// expensive. Other cases may also not set Must(e.g. callCapturesBefore).
+ /// We refer to Must being *set* when the most significant bit is *cleared*.
+ /// Conversely we *clear* Must information by *setting* the Must bit to 1.
};
LLVM_NODISCARD inline bool isNoModRef(const ModRefInfo MRI) {
- return MRI == ModRefInfo::NoModRef;
+ return (static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef)) ==
+ static_cast<int>(ModRefInfo::Must);
}
LLVM_NODISCARD inline bool isModOrRefSet(const ModRefInfo MRI) {
- return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::ModRef);
+ return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef);
}
LLVM_NODISCARD inline bool isModAndRefSet(const ModRefInfo MRI) {
- return (static_cast<int>(MRI) & static_cast<int>(ModRefInfo::ModRef)) ==
- static_cast<int>(ModRefInfo::ModRef);
+ return (static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef)) ==
+ static_cast<int>(ModRefInfo::MustModRef);
}
LLVM_NODISCARD inline bool isModSet(const ModRefInfo MRI) {
- return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Mod);
+ return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustMod);
}
LLVM_NODISCARD inline bool isRefSet(const ModRefInfo MRI) {
- return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Ref);
+ return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustRef);
+}
+LLVM_NODISCARD inline bool isMustSet(const ModRefInfo MRI) {
+ return !(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::NoModRef));
}
LLVM_NODISCARD inline ModRefInfo setMod(const ModRefInfo MRI) {
- return ModRefInfo(static_cast<int>(MRI) | static_cast<int>(ModRefInfo::Mod));
+ return ModRefInfo(static_cast<int>(MRI) |
+ static_cast<int>(ModRefInfo::MustMod));
}
LLVM_NODISCARD inline ModRefInfo setRef(const ModRefInfo MRI) {
- return ModRefInfo(static_cast<int>(MRI) | static_cast<int>(ModRefInfo::Ref));
+ return ModRefInfo(static_cast<int>(MRI) |
+ static_cast<int>(ModRefInfo::MustRef));
+}
+LLVM_NODISCARD inline ModRefInfo setMust(const ModRefInfo MRI) {
+ return ModRefInfo(static_cast<int>(MRI) &
+ static_cast<int>(ModRefInfo::MustModRef));
}
LLVM_NODISCARD inline ModRefInfo setModAndRef(const ModRefInfo MRI) {
return ModRefInfo(static_cast<int>(MRI) |
- static_cast<int>(ModRefInfo::ModRef));
+ static_cast<int>(ModRefInfo::MustModRef));
}
LLVM_NODISCARD inline ModRefInfo clearMod(const ModRefInfo MRI) {
return ModRefInfo(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Ref));
@@ -142,6 +177,10 @@ LLVM_NODISCARD inline ModRefInfo clearMod(const ModRefInfo MRI) {
LLVM_NODISCARD inline ModRefInfo clearRef(const ModRefInfo MRI) {
return ModRefInfo(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Mod));
}
+LLVM_NODISCARD inline ModRefInfo clearMust(const ModRefInfo MRI) {
+ return ModRefInfo(static_cast<int>(MRI) |
+ static_cast<int>(ModRefInfo::NoModRef));
+}
LLVM_NODISCARD inline ModRefInfo unionModRef(const ModRefInfo MRI1,
const ModRefInfo MRI2) {
return ModRefInfo(static_cast<int>(MRI1) | static_cast<int>(MRI2));
@@ -160,11 +199,11 @@ enum FunctionModRefLocation {
/// Base case is no access to memory.
FMRL_Nowhere = 0,
/// Access to memory via argument pointers.
- FMRL_ArgumentPointees = 4,
+ FMRL_ArgumentPointees = 8,
/// Memory that is inaccessible via LLVM IR.
- FMRL_InaccessibleMem = 8,
+ FMRL_InaccessibleMem = 16,
/// Access to any memory.
- FMRL_Anywhere = 16 | FMRL_InaccessibleMem | FMRL_ArgumentPointees
+ FMRL_Anywhere = 32 | FMRL_InaccessibleMem | FMRL_ArgumentPointees
};
/// Summary of how a function affects memory in the program.
@@ -344,7 +383,7 @@ public:
/// result's bits are set to indicate the allowed aliasing ModRef kinds. Note
/// that these bits do not necessarily account for the overall behavior of
/// the function, but rather only provide additional per-argument
- /// information.
+ /// information. This never sets ModRefInfo::Must.
ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
/// Return the behavior of the given call site.
@@ -624,6 +663,8 @@ public:
/// or reads the specified memory location \p MemLoc before instruction \p I
/// in a BasicBlock. An ordered basic block \p OBB can be used to speed up
/// instruction ordering queries inside the BasicBlock containing \p I.
+ /// Early exits in callCapturesBefore may lead to ModRefInfo::Must not being
+ /// set.
ModRefInfo callCapturesBefore(const Instruction *I,
const MemoryLocation &MemLoc, DominatorTree *DT,
OrderedBasicBlock *OBB = nullptr);
diff --git a/include/llvm/Analysis/AliasAnalysisEvaluator.h b/include/llvm/Analysis/AliasAnalysisEvaluator.h
index 214574852655..cd2f631a01f4 100644
--- a/include/llvm/Analysis/AliasAnalysisEvaluator.h
+++ b/include/llvm/Analysis/AliasAnalysisEvaluator.h
@@ -35,19 +35,23 @@ class AAEvaluator : public PassInfoMixin<AAEvaluator> {
int64_t FunctionCount;
int64_t NoAliasCount, MayAliasCount, PartialAliasCount, MustAliasCount;
int64_t NoModRefCount, ModCount, RefCount, ModRefCount;
+ int64_t MustCount, MustRefCount, MustModCount, MustModRefCount;
public:
AAEvaluator()
: FunctionCount(), NoAliasCount(), MayAliasCount(), PartialAliasCount(),
MustAliasCount(), NoModRefCount(), ModCount(), RefCount(),
- ModRefCount() {}
+ ModRefCount(), MustCount(), MustRefCount(), MustModCount(),
+ MustModRefCount() {}
AAEvaluator(AAEvaluator &&Arg)
: FunctionCount(Arg.FunctionCount), NoAliasCount(Arg.NoAliasCount),
MayAliasCount(Arg.MayAliasCount),
PartialAliasCount(Arg.PartialAliasCount),
MustAliasCount(Arg.MustAliasCount), NoModRefCount(Arg.NoModRefCount),
ModCount(Arg.ModCount), RefCount(Arg.RefCount),
- ModRefCount(Arg.ModRefCount) {
+ ModRefCount(Arg.ModRefCount), MustCount(Arg.MustCount),
+ MustRefCount(Arg.MustRefCount), MustModCount(Arg.MustModCount),
+ MustModRefCount(Arg.MustModRefCount) {
Arg.FunctionCount = 0;
}
~AAEvaluator();
diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h
index 54f151ef82e2..28154c873b70 100644
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -667,21 +667,6 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
const ValueToValueMap &StridesMap = ValueToValueMap(),
bool Assume = false, bool ShouldCheckWrap = true);
-/// \brief Attempt to sort the 'loads' in \p VL and return the sorted values in
-/// \p Sorted.
-///
-/// Returns 'false' if sorting is not legal or feasible, otherwise returns
-/// 'true'. If \p Mask is not null, it also returns the \p Mask which is the
-/// shuffle mask for actual memory access order.
-///
-/// For example, for a given VL of memory accesses in program order, a[i+2],
-/// a[i+0], a[i+1] and a[i+3], this function will sort the VL and save the
-/// sorted value in 'Sorted' as a[i+0], a[i+1], a[i+2], a[i+3] and saves the
-/// mask for actual memory accesses in program order in 'Mask' as <2,0,1,3>
-bool sortLoadAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
- ScalarEvolution &SE, SmallVectorImpl<Value *> &Sorted,
- SmallVectorImpl<unsigned> *Mask = nullptr);
-
/// \brief Returns true if the memory operations \p A and \p B are consecutive.
/// This is a simple API that does not depend on the analysis pass.
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index c2974525a6ff..391a333594e9 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -407,6 +407,12 @@ public:
void getNonLocalPointerDependency(Instruction *QueryInst,
SmallVectorImpl<NonLocalDepResult> &Result);
+ /// Perform a dependency query specifically for QueryInst's access to Loc.
+ /// The other comments for getNonLocalPointerDependency apply here as well.
+ void getNonLocalPointerDependencyFrom(Instruction *QueryInst,
+ const MemoryLocation &Loc, bool isLoad,
+ SmallVectorImpl<NonLocalDepResult> &Result);
+
/// Removes an instruction from the dependence analysis, updating the
/// dependence of instructions that previously depended on it.
void removeInstruction(Instruction *InstToRemove);
diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h
index bd7b00374821..293033458429 100644
--- a/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -92,12 +92,12 @@ public:
bool hasHugeWorkingSetSize();
/// \brief Returns true if \p F has hot function entry.
bool isFunctionEntryHot(const Function *F);
- /// Returns true if \p F has hot function entry or hot call edge.
- bool isFunctionHotInCallGraph(const Function *F);
+ /// Returns true if \p F contains hot code.
+ bool isFunctionHotInCallGraph(const Function *F, BlockFrequencyInfo &BFI);
/// \brief Returns true if \p F has cold function entry.
bool isFunctionEntryCold(const Function *F);
- /// Returns true if \p F has cold function entry or cold call edge.
- bool isFunctionColdInCallGraph(const Function *F);
+ /// Returns true if \p F contains only cold code.
+ bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI);
/// \brief Returns true if \p F is a hot function.
bool isHotCount(uint64_t C);
/// \brief Returns true if count \p C is considered cold.
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 4578e0da8ab2..3df04e98bd24 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -47,7 +47,7 @@ namespace llvm {
ScalarEvolution &SE;
const DataLayout &DL;
- // New instructions receive a name to identifies them with the current pass.
+ // New instructions receive a name to identify them with the current pass.
const char* IVName;
// InsertedExpressions caches Values for reuse, so must track RAUW.
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index c20f20cfbe4d..cecd8958e9d9 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -646,6 +646,9 @@ public:
/// \brief Additional properties of an operand's values.
enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
+ /// \return True if target can execute instructions out of order.
+ bool isOutOfOrder() const;
+
/// \return The number of scalar or vector registers that the target has.
/// If 'Vectors' is true, it returns the number of vector registers. If it is
/// set to false, it returns the number of scalar registers.
@@ -1018,6 +1021,7 @@ public:
Type *Ty) = 0;
virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;
+ virtual bool isOutOfOrder() const = 0;
virtual unsigned getNumberOfRegisters(bool Vector) = 0;
virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
virtual unsigned getMinVectorRegisterBitWidth() = 0;
@@ -1295,6 +1299,9 @@ public:
Type *Ty) override {
return Impl.getIntImmCost(IID, Idx, Imm, Ty);
}
+ bool isOutOfOrder() const override {
+ return Impl.isOutOfOrder();
+ }
unsigned getNumberOfRegisters(bool Vector) override {
return Impl.getNumberOfRegisters(Vector);
}
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index 4c37402278ef..3625675d53de 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -337,6 +337,8 @@ public:
return TTI::TCC_Free;
}
+ bool isOutOfOrder() const { return false; }
+
unsigned getNumberOfRegisters(bool Vector) { return 8; }
unsigned getRegisterBitWidth(bool Vector) const { return 32; }
diff --git a/include/llvm/BinaryFormat/Wasm.h b/include/llvm/BinaryFormat/Wasm.h
index 506cd0393e9a..57a0b441821b 100644
--- a/include/llvm/BinaryFormat/Wasm.h
+++ b/include/llvm/BinaryFormat/Wasm.h
@@ -208,7 +208,7 @@ const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4;
#define WASM_RELOC(name, value) name = value,
enum : unsigned {
-#include "WasmRelocs/WebAssembly.def"
+#include "WasmRelocs.def"
};
#undef WASM_RELOC
diff --git a/include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def b/include/llvm/BinaryFormat/WasmRelocs.def
index d6f0e42b33bf..d6f0e42b33bf 100644
--- a/include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def
+++ b/include/llvm/BinaryFormat/WasmRelocs.def
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index bb5e7f9e8e30..f1f9275b0786 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -302,9 +302,13 @@ public:
}
unsigned getFPOpCost(Type *Ty) {
- // By default, FP instructions are no more expensive since they are
- // implemented in HW. Target specific TTI can override this.
- return TargetTransformInfo::TCC_Basic;
+ // Check whether FADD is available, as a proxy for floating-point in
+ // general.
+ const TargetLoweringBase *TLI = getTLI();
+ EVT VT = TLI->getValueType(DL, Ty);
+ if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
+ return TargetTransformInfo::TCC_Basic;
+ return TargetTransformInfo::TCC_Expensive;
}
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
@@ -398,6 +402,10 @@ public:
return BaseT::getInstructionLatency(I);
}
+ bool isOutOfOrder() const {
+ return getST()->getSchedModel().isOutOfOrder();
+ }
+
/// @}
/// \name Vector TTI Implementations
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index e599a1b179ec..4264a866b6c0 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -282,10 +282,6 @@ enum {
/// Provides the logic to select generic machine instructions.
class InstructionSelector {
public:
- using I64ImmediatePredicateFn = bool (*)(int64_t);
- using APIntImmediatePredicateFn = bool (*)(const APInt &);
- using APFloatImmediatePredicateFn = bool (*)(const APFloat &);
-
virtual ~InstructionSelector() = default;
/// Select the (possibly generic) instruction \p I to only use target-specific
@@ -319,9 +315,6 @@ public:
struct MatcherInfoTy {
const LLT *TypeObjects;
const PredicateBitset *FeatureBitsets;
- const I64ImmediatePredicateFn *I64ImmPredicateFns;
- const APIntImmediatePredicateFn *APIntImmPredicateFns;
- const APFloatImmediatePredicateFn *APFloatImmPredicateFns;
const ComplexMatcherMemFn *ComplexPredicates;
};
@@ -340,6 +333,16 @@ protected:
const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures,
CodeGenCoverage &CoverageInfo) const;
+ virtual bool testImmPredicate_I64(unsigned, int64_t) const {
+ llvm_unreachable("Subclasses must override this to use tablegen");
+ }
+ virtual bool testImmPredicate_APInt(unsigned, const APInt &) const {
+ llvm_unreachable("Subclasses must override this to use tablegen");
+ }
+ virtual bool testImmPredicate_APFloat(unsigned, const APFloat &) const {
+ llvm_unreachable("Subclasses must override this to use tablegen");
+ }
+
/// Constrain a register operand of an instruction \p I to a specified
/// register class. This could involve inserting COPYs before (for uses) or
/// after (for defs) and may replace the operand of \p I.
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
index ac2c055ab145..bf834cf8f5e3 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
@@ -181,7 +181,7 @@ bool InstructionSelector::executeMatchTable(
else
llvm_unreachable("Expected Imm or CImm operand");
- if (!MatcherInfo.I64ImmPredicateFns[Predicate](Value))
+ if (!testImmPredicate_I64(Predicate, Value))
if (handleReject() == RejectAndGiveUp)
return false;
break;
@@ -202,7 +202,7 @@ bool InstructionSelector::executeMatchTable(
else
llvm_unreachable("Expected Imm or CImm operand");
- if (!MatcherInfo.APIntImmPredicateFns[Predicate](Value))
+ if (!testImmPredicate_APInt(Predicate, Value))
if (handleReject() == RejectAndGiveUp)
return false;
break;
@@ -221,7 +221,7 @@ bool InstructionSelector::executeMatchTable(
assert(Predicate > GIPFP_APFloat_Invalid && "Expected a valid predicate");
APFloat Value = State.MIs[InsnID]->getOperand(1).getFPImm()->getValueAPF();
- if (!MatcherInfo.APFloatImmPredicateFns[Predicate](Value))
+ if (!testImmPredicate_APFloat(Predicate, Value))
if (handleReject() == RejectAndGiveUp)
return false;
break;
diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStacks.h
index c90ae7b184f4..44ed785f7b53 100644
--- a/include/llvm/CodeGen/LiveStackAnalysis.h
+++ b/include/llvm/CodeGen/LiveStacks.h
@@ -1,4 +1,4 @@
-//===- LiveStackAnalysis.h - Live Stack Slot Analysis -----------*- C++ -*-===//
+//===- LiveStacks.h - Live Stack Slot Analysis ------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_LIVESTACKANALYSIS_H
-#define LLVM_CODEGEN_LIVESTACKANALYSIS_H
+#ifndef LLVM_CODEGEN_LIVESTACKS_H
+#define LLVM_CODEGEN_LIVESTACKS_H
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -100,4 +100,4 @@ public:
} // end namespace llvm
-#endif // LLVM_CODEGEN_LIVESTACK_ANALYSIS_H
+#endif
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index ccf0917ed085..4be7942c2c64 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -29,6 +29,7 @@ class GlobalValue;
class MachineBasicBlock;
class MachineInstr;
class MachineRegisterInfo;
+class MCCFIInstruction;
class MDNode;
class ModuleSlotTracker;
class TargetMachine;
@@ -250,6 +251,12 @@ public:
static void printStackObjectReference(raw_ostream &OS, unsigned FrameIndex,
bool IsFixed, StringRef Name);
+ /// Print the offset with explicit +/- signs.
+ static void printOperandOffset(raw_ostream &OS, int64_t Offset);
+
+ /// Print an IRSlotNumber.
+ static void printIRSlotNumber(raw_ostream &OS, int Slot);
+
/// Print the MachineOperand to \p os.
/// Providing a valid \p TRI and \p IntrinsicInfo results in a more
/// target-specific printing. If \p TRI and \p IntrinsicInfo are null, the
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.def b/include/llvm/CodeGen/RuntimeLibcalls.def
index e042ae982e86..7695e9d782ef 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.def
+++ b/include/llvm/CodeGen/RuntimeLibcalls.def
@@ -165,6 +165,8 @@ HANDLE_LIBCALL(SINCOS_F64, nullptr)
HANDLE_LIBCALL(SINCOS_F80, nullptr)
HANDLE_LIBCALL(SINCOS_F128, nullptr)
HANDLE_LIBCALL(SINCOS_PPCF128, nullptr)
+HANDLE_LIBCALL(SINCOS_STRET_F32, nullptr)
+HANDLE_LIBCALL(SINCOS_STRET_F64, nullptr)
HANDLE_LIBCALL(POW_F32, "powf")
HANDLE_LIBCALL(POW_F64, "pow")
HANDLE_LIBCALL(POW_F80, "powl")
@@ -334,6 +336,7 @@ HANDLE_LIBCALL(O_PPCF128, "__gcc_qunord")
HANDLE_LIBCALL(MEMCPY, "memcpy")
HANDLE_LIBCALL(MEMMOVE, "memmove")
HANDLE_LIBCALL(MEMSET, "memset")
+HANDLE_LIBCALL(BZERO, nullptr)
// Element-wise unordered-atomic memory of different sizes
HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memcpy_element_unordered_atomic_1")
diff --git a/include/llvm/CodeGen/SDNodeProperties.td b/include/llvm/CodeGen/SDNodeProperties.td
new file mode 100644
index 000000000000..83bbab2fdc8d
--- /dev/null
+++ b/include/llvm/CodeGen/SDNodeProperties.td
@@ -0,0 +1,34 @@
+//===- SDNodeProperties.td - Common code for DAG isels ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class SDNodeProperty;
+
+// Selection DAG Pattern Operations
+class SDPatternOperator {
+ list<SDNodeProperty> Properties = [];
+}
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Node Properties.
+//
+// Note: These are hard coded into tblgen.
+//
+def SDNPCommutative : SDNodeProperty; // X op Y == Y op X
+def SDNPAssociative : SDNodeProperty; // (X op Y) op Z == X op (Y op Z)
+def SDNPHasChain : SDNodeProperty; // R/W chain operand and result
+def SDNPOutGlue : SDNodeProperty; // Write a flag result
+def SDNPInGlue : SDNodeProperty; // Read a flag operand
+def SDNPOptInGlue : SDNodeProperty; // Optionally read a flag operand
+def SDNPMayStore : SDNodeProperty; // May write to memory, sets 'mayStore'.
+def SDNPMayLoad : SDNodeProperty; // May read memory, sets 'mayLoad'.
+def SDNPSideEffect : SDNodeProperty; // Sets 'HasUnmodelledSideEffects'.
+def SDNPMemOperand : SDNodeProperty; // Touches memory, has assoc MemOperand
+def SDNPVariadic : SDNodeProperty; // Node has variable arguments.
+def SDNPWantRoot : SDNodeProperty; // ComplexPattern gets the root of match
+def SDNPWantParent : SDNodeProperty; // ComplexPattern gets the parent
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 7de2e766d521..522c2f1b2cb2 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -189,8 +189,8 @@ public:
inline bool isUndef() const;
inline unsigned getMachineOpcode() const;
inline const DebugLoc &getDebugLoc() const;
- inline void dump() const;
- inline void dumpr() const;
+ inline void dump(const SelectionDAG *G = nullptr) const;
+ inline void dumpr(const SelectionDAG *G = nullptr) const;
/// Return true if this operand (which must be a chain) reaches the
/// specified operand without crossing any side-effecting instructions.
@@ -1089,12 +1089,12 @@ inline const DebugLoc &SDValue::getDebugLoc() const {
return Node->getDebugLoc();
}
-inline void SDValue::dump() const {
- return Node->dump();
+inline void SDValue::dump(const SelectionDAG *G) const {
+ return Node->dump(G);
}
-inline void SDValue::dumpr() const {
- return Node->dumpr();
+inline void SDValue::dumpr(const SelectionDAG *G) const {
+ return Node->dumpr(G);
}
// Define inline functions from the SDUse class.
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index 0fa19d09e776..380e3b19dc80 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -824,8 +824,8 @@ public:
/// also combined within this function. Currently, the minimum size check is
/// performed in findJumpTable() in SelectionDAGBuiler and
/// getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
- bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
- uint64_t Range) const {
+ virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
+ uint64_t Range) const {
const bool OptForSize = SI->getParent()->getParent()->optForSize();
const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
const unsigned MaxJumpTableSize =
@@ -1276,7 +1276,7 @@ public:
}
/// Return lower limit for number of blocks in a jump table.
- unsigned getMinimumJumpTableEntries() const;
+ virtual unsigned getMinimumJumpTableEntries() const;
/// Return lower limit of the density in a jump table.
unsigned getMinimumJumpTableDensity(bool OptForSize) const;
@@ -2429,7 +2429,7 @@ private:
PromoteToType;
/// Stores the name each libcall.
- const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL];
+ const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1];
/// The ISD::CondCode that should be used to test the result of each of the
/// comparison libcall against zero.
@@ -2438,6 +2438,9 @@ private:
/// Stores the CallingConv that should be used for each libcall.
CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL];
+ /// Set default libcall names and calling conventions.
+ void InitLibcalls(const Triple &TT);
+
protected:
/// Return true if the extension represented by \p I is free.
/// \pre \p I is a sign, zero, or fp extension and
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index e9178e03fa8a..3cec58383f87 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -165,6 +165,29 @@ struct BaseAddress {
uint64_t SectionIndex;
};
+/// Represents a unit's contribution to the string offsets table.
+struct StrOffsetsContributionDescriptor {
+ uint64_t Base = 0;
+ uint64_t Size = 0;
+ /// Format and version.
+ DWARFFormParams FormParams = {0, 0, dwarf::DwarfFormat::DWARF32};
+
+ StrOffsetsContributionDescriptor(uint64_t Base, uint64_t Size,
+ uint8_t Version, dwarf::DwarfFormat Format)
+ : Base(Base), Size(Size), FormParams({Version, 0, Format}) {}
+
+ uint8_t getVersion() const { return FormParams.Version; }
+ dwarf::DwarfFormat getFormat() const { return FormParams.Format; }
+ uint8_t getDwarfOffsetByteSize() const {
+ return FormParams.getDwarfOffsetByteSize();
+ }
+ /// Determine whether a contribution to the string offsets table is
+ /// consistent with the relevant section size and that its length is
+ /// a multiple of the size of one of its entries.
+ Optional<StrOffsetsContributionDescriptor>
+ validateContributionSize(DWARFDataExtractor &DA);
+};
+
class DWARFUnit {
DWARFContext &Context;
/// Section containing this DWARFUnit.
@@ -176,7 +199,6 @@ class DWARFUnit {
const DWARFSection &LineSection;
StringRef StringSection;
const DWARFSection &StringOffsetSection;
- uint64_t StringOffsetSectionBase = 0;
const DWARFSection *AddrOffsetSection;
uint32_t AddrOffsetSectionBase = 0;
bool isLittleEndian;
@@ -185,6 +207,9 @@ class DWARFUnit {
// Version, address size, and DWARF format.
DWARFFormParams FormParams;
+ /// Start, length, and DWARF format of the unit's contribution to the string
+ /// offsets table (DWARF v5).
+ Optional<StrOffsetsContributionDescriptor> StringOffsetsTableContribution;
uint32_t Offset;
uint32_t Length;
@@ -195,10 +220,40 @@ class DWARFUnit {
/// The compile unit debug information entry items.
std::vector<DWARFDebugInfoEntry> DieArray;
- /// Map from range's start address to end address and corresponding DIE.
- /// IntervalMap does not support range removal, as a result, we use the
- /// std::map::upper_bound for address range lookup.
- std::map<uint64_t, std::pair<uint64_t, DWARFDie>> AddrDieMap;
+ /// The vector of inlined subroutine DIEs that we can map directly to from
+ /// their subprogram below.
+ std::vector<DWARFDie> InlinedSubroutineDIEs;
+
+ /// A type representing a subprogram DIE and a map (built using a sorted
+ /// vector) into that subprogram's inlined subroutine DIEs.
+ struct SubprogramDIEAddrInfo {
+ DWARFDie SubprogramDIE;
+
+ uint64_t SubprogramBasePC;
+
+ /// A vector sorted to allow mapping from a relative PC to the inlined
+ /// subroutine DIE with the most specific address range covering that PC.
+ ///
+ /// The PCs are relative to the `SubprogramBasePC`.
+ ///
+ /// The vector is sorted in ascending order of the first int which
+ /// represents the relative PC for an interval in the map. The second int
+ /// represents the index into the `InlinedSubroutineDIEs` vector of the DIE
+ /// that interval maps to. An index of '-1` indicates an empty mapping. The
+ /// interval covered is from the `.first` relative PC to the next entry's
+ /// `.first` relative PC.
+ std::vector<std::pair<uint32_t, int32_t>> InlinedSubroutineDIEAddrMap;
+ };
+
+ /// Vector of the subprogram DIEs and their subroutine address maps.
+ std::vector<SubprogramDIEAddrInfo> SubprogramDIEAddrInfos;
+
+ /// A vector sorted to allow mapping from a PC to the subprogram DIE (and
+ /// associated addr map) index. Subprograms with overlapping PC ranges aren't
+ /// supported here. Nothing will crash, but the mapping may be inaccurate.
+ /// This vector may also contain "empty" ranges marked by an address with
+ /// a DIE index of '-1'.
+ std::vector<std::pair<uint64_t, int64_t>> SubprogramDIEAddrMap;
using die_iterator_range =
iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>;
@@ -219,6 +274,21 @@ protected:
/// Size in bytes of the unit header.
virtual uint32_t getHeaderSize() const { return getVersion() <= 4 ? 11 : 12; }
+ /// Find the unit's contribution to the string offsets table and determine its
+ /// length and form. The given offset is expected to be derived from the unit
+ /// DIE's DW_AT_str_offsets_base attribute.
+ Optional<StrOffsetsContributionDescriptor>
+ determineStringOffsetsTableContribution(DWARFDataExtractor &DA,
+ uint64_t Offset);
+
+ /// Find the unit's contribution to the string offsets table and determine its
+ /// length and form. The given offset is expected to be 0 in a dwo file or,
+ /// in a dwp file, the start of the unit's contribution to the string offsets
+ /// table section (as determined by the index table).
+ Optional<StrOffsetsContributionDescriptor>
+ determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA,
+ uint64_t Offset);
+
public:
DWARFUnit(DWARFContext &Context, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, const DWARFSection *RS, StringRef SS,
@@ -242,9 +312,6 @@ public:
AddrOffsetSectionBase = Base;
}
- /// Recursively update address to Die map.
- void updateAddressDieMap(DWARFDie Die);
-
void setRangesSection(const DWARFSection *RS, uint32_t Base) {
RangeSection = RS;
RangeSectionBase = Base;
@@ -272,6 +339,10 @@ public:
uint32_t getNextUnitOffset() const { return Offset + Length + 4; }
uint32_t getLength() const { return Length; }
+ const Optional<StrOffsetsContributionDescriptor> &
+ getStringOffsetsTableContribution() const {
+ return StringOffsetsTableContribution;
+ }
const DWARFFormParams &getFormParams() const { return FormParams; }
uint16_t getVersion() const { return FormParams.Version; }
dwarf::DwarfFormat getFormat() const { return FormParams.Format; }
@@ -281,6 +352,16 @@ public:
return FormParams.getDwarfOffsetByteSize();
}
+ uint8_t getDwarfStringOffsetsByteSize() const {
+ assert(StringOffsetsTableContribution);
+ return StringOffsetsTableContribution->getDwarfOffsetByteSize();
+ }
+
+ uint64_t getStringOffsetsBase() const {
+ assert(StringOffsetsTableContribution);
+ return StringOffsetsTableContribution->Base;
+ }
+
const DWARFAbbreviationDeclarationSet *getAbbreviations() const;
uint8_t getUnitType() const { return UnitType; }
@@ -426,6 +507,9 @@ private:
/// parseDWO - Parses .dwo file for current compile unit. Returns true if
/// it was actually constructed.
bool parseDWO();
+
+ void buildSubprogramDIEAddrMap();
+ void buildInlinedSubroutineDIEAddrMap(SubprogramDIEAddrInfo &SPInfo);
};
} // end namespace llvm
diff --git a/include/llvm/FuzzMutate/IRMutator.h b/include/llvm/FuzzMutate/IRMutator.h
index 65ab871db0ef..9aa9d6d6a4bc 100644
--- a/include/llvm/FuzzMutate/IRMutator.h
+++ b/include/llvm/FuzzMutate/IRMutator.h
@@ -16,6 +16,7 @@
#ifndef LLVM_FUZZMUTATE_IRMUTATOR_H
#define LLVM_FUZZMUTATE_IRMUTATOR_H
+#include "llvm/ADT/Optional.h"
#include "llvm/FuzzMutate/OpDescriptor.h"
#include "llvm/Support/ErrorHandling.h"
@@ -74,7 +75,8 @@ public:
class InjectorIRStrategy : public IRMutationStrategy {
std::vector<fuzzerop::OpDescriptor> Operations;
- fuzzerop::OpDescriptor chooseOperation(Value *Src, RandomIRBuilder &IB);
+ Optional<fuzzerop::OpDescriptor> chooseOperation(Value *Src,
+ RandomIRBuilder &IB);
public:
InjectorIRStrategy(std::vector<fuzzerop::OpDescriptor> &&Operations)
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index e811ae5e215a..79c56abe1c37 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -248,6 +248,12 @@ public:
/// pgo data.
Optional<uint64_t> getEntryCount() const;
+ /// Return true if the function is annotated with profile data.
+ ///
+ /// Presence of entry counts from a profile run implies the function has
+ /// profile annotations.
+ bool hasProfileData() const { return getEntryCount().hasValue(); }
+
/// Returns the set of GUIDs that needs to be imported to the function for
/// sample PGO, to enable the same inlines as the profiled optimized binary.
DenseSet<GlobalValue::GUID> getImportGUIDs() const;
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index 07de0568cab0..a2a1f26292ce 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
include "llvm/CodeGen/ValueTypes.td"
+include "llvm/CodeGen/SDNodeProperties.td"
//===----------------------------------------------------------------------===//
// Properties we keep track of for intrinsics.
@@ -264,16 +265,17 @@ def llvm_vararg_ty : LLVMType<isVoid>; // this means vararg here
// intrinsic.
// * Properties can be set to describe the behavior of the intrinsic.
//
-class SDPatternOperator;
class Intrinsic<list<LLVMType> ret_types,
list<LLVMType> param_types = [],
- list<IntrinsicProperty> properties = [],
- string name = ""> : SDPatternOperator {
+ list<IntrinsicProperty> intr_properties = [],
+ string name = "",
+ list<SDNodeProperty> sd_properties = []> : SDPatternOperator {
string LLVMName = name;
string TargetPrefix = ""; // Set to a prefix for target-specific intrinsics.
list<LLVMType> RetTypes = ret_types;
list<LLVMType> ParamTypes = param_types;
- list<IntrinsicProperty> IntrProperties = properties;
+ list<IntrinsicProperty> IntrProperties = intr_properties;
+ let Properties = sd_properties;
bit isTarget = 0;
}
diff --git a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
index 14f0c48266f0..d794535700e5 100644
--- a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
+++ b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
@@ -148,10 +148,14 @@ public:
/// incremental build.
void setCacheDir(std::string Path) { CacheOptions.Path = std::move(Path); }
- /// Cache policy: interval (seconds) between two prune of the cache. Set to a
- /// negative value (default) to disable pruning. A value of 0 will be ignored.
+ /// Cache policy: interval (seconds) between two prunes of the cache. Set to a
+ /// negative value to disable pruning. A value of 0 will be ignored.
void setCachePruningInterval(int Interval) {
- if (Interval)
+ if (Interval == 0)
+ return;
+ if(Interval < 0)
+ CacheOptions.Policy.Interval.reset();
+ else
CacheOptions.Policy.Interval = std::chrono::seconds(Interval);
}
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 234762f36dd4..c538c46fc072 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -165,7 +165,8 @@ protected:
const char *ZeroDirective;
/// This directive allows emission of an ascii string with the standard C
- /// escape characters embedded into it. Defaults to "\t.ascii\t"
+ /// escape characters embedded into it. If a target doesn't support this, it
+ /// can be set to null. Defaults to "\t.ascii\t"
const char *AsciiDirective;
/// If not null, this allows for special handling of zero terminated strings
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 481d96724d40..a82051700708 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -95,6 +95,17 @@ public:
virtual void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
const MCInst &Inst, const MCSubtargetInfo &STI);
+ virtual void emitDwarfFileDirective(StringRef Directive);
+
+ /// Update streamer for a new active section.
+ ///
+ /// This is called by PopSection and SwitchSection, if the current
+ /// section changes.
+ virtual void changeSection(const MCSection *CurSection, MCSection *Section,
+ const MCExpr *SubSection, raw_ostream &OS);
+
+ virtual void emitValue(const MCExpr *Value);
+
virtual void finish();
};
diff --git a/include/llvm/Object/Wasm.h b/include/llvm/Object/Wasm.h
index 5bb1a3fca3d1..71951d83f3cc 100644
--- a/include/llvm/Object/Wasm.h
+++ b/include/llvm/Object/Wasm.h
@@ -43,9 +43,9 @@ public:
};
WasmSymbol(StringRef Name, SymbolType Type, uint32_t Section,
- uint32_t ElementIndex, uint32_t ImportIndex = 0)
+ uint32_t ElementIndex, uint32_t FunctionType = 0)
: Name(Name), Type(Type), Section(Section), ElementIndex(ElementIndex),
- ImportIndex(ImportIndex) {}
+ FunctionType(FunctionType) {}
StringRef Name;
SymbolType Type;
@@ -55,8 +55,18 @@ public:
// Index into either the function or global index space.
uint32_t ElementIndex;
- // For imports, the index into the import table
- uint32_t ImportIndex;
+ // For function, the type index
+ uint32_t FunctionType;
+
+ // Symbols can be both exported and imported (in the case of the weakly
+ // defined symbol). In this the import index is stored as AltIndex.
+ uint32_t AltIndex = 0;
+ bool HasAltIndex = false;
+
+ void setAltIndex(uint32_t Index) {
+ HasAltIndex = true;
+ AltIndex = Index;
+ }
bool isFunction() const {
return Type == WasmSymbol::SymbolType::FUNCTION_IMPORT ||
@@ -91,8 +101,7 @@ public:
void print(raw_ostream &Out) const {
Out << "Name=" << Name << ", Type=" << static_cast<int>(Type)
- << ", Flags=" << Flags << " ElemIndex=" << ElementIndex
- << ", ImportIndex=" << ImportIndex;
+ << ", Flags=" << Flags << " ElemIndex=" << ElementIndex;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/include/llvm/Support/CachePruning.h b/include/llvm/Support/CachePruning.h
index c577e9b8b631..327c7df4570f 100644
--- a/include/llvm/Support/CachePruning.h
+++ b/include/llvm/Support/CachePruning.h
@@ -27,8 +27,9 @@ template <typename T> class Expected;
struct CachePruningPolicy {
/// The pruning interval. This is intended to be used to avoid scanning the
/// directory too often. It does not impact the decision of which file to
- /// prune. A value of 0 forces the scan to occur.
- std::chrono::seconds Interval = std::chrono::seconds(1200);
+ /// prune. A value of 0 forces the scan to occur. A value of None disables
+ /// pruning.
+ llvm::Optional<std::chrono::seconds> Interval = std::chrono::seconds(1200);
/// The expiration for a file. When a file hasn't been accessed for Expiration
/// seconds, it is removed from the cache. A value of 0 disables the
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index 59c93f15d7b8..7b849fdb8670 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -15,6 +15,7 @@
#define LLVM_SUPPORT_MEMORYBUFFER_H
#include "llvm-c/Types.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/CBindingWrapping.h"
@@ -47,6 +48,9 @@ protected:
void init(const char *BufStart, const char *BufEnd,
bool RequiresNullTerminator);
+
+ static constexpr bool Writable = false;
+
public:
MemoryBuffer(const MemoryBuffer &) = delete;
MemoryBuffer &operator=(const MemoryBuffer &) = delete;
@@ -119,12 +123,6 @@ public:
static std::unique_ptr<MemoryBuffer>
getNewMemBuffer(size_t Size, StringRef BufferName = "");
- /// Allocate a new MemoryBuffer of the specified size that is not initialized.
- /// Note that the caller should initialize the memory allocated by this
- /// method. The memory is owned by the MemoryBuffer object.
- static std::unique_ptr<MemoryBuffer>
- getNewUninitMemBuffer(size_t Size, const Twine &BufferName = "");
-
/// Read all of stdin into a file buffer, and return it.
static ErrorOr<std::unique_ptr<MemoryBuffer>> getSTDIN();
@@ -156,6 +154,62 @@ public:
MemoryBufferRef getMemBufferRef() const;
};
+/// This class is an extension of MemoryBuffer, which allows writing to the
+/// underlying contents. It only supports creation methods that are guaranteed
+/// to produce a writable buffer. For example, mapping a file read-only is not
+/// supported.
+class WritableMemoryBuffer : public MemoryBuffer {
+protected:
+ WritableMemoryBuffer() = default;
+
+ static constexpr bool Writable = true;
+
+public:
+ using MemoryBuffer::getBuffer;
+ using MemoryBuffer::getBufferEnd;
+ using MemoryBuffer::getBufferStart;
+
+ // const_cast is well-defined here, because the underlying buffer is
+ // guaranteed to have been initialized with a mutable buffer.
+ char *getBufferStart() {
+ return const_cast<char *>(MemoryBuffer::getBufferStart());
+ }
+ char *getBufferEnd() {
+ return const_cast<char *>(MemoryBuffer::getBufferEnd());
+ }
+ MutableArrayRef<char> getBuffer() {
+ return {getBufferStart(), getBufferEnd()};
+ }
+
+ static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
+ getFile(const Twine &Filename, int64_t FileSize = -1,
+ bool IsVolatile = false);
+
+ /// Map a subrange of the specified file as a WritableMemoryBuffer.
+ static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
+ getFileSlice(const Twine &Filename, uint64_t MapSize, uint64_t Offset,
+ bool IsVolatile = false);
+
+ /// Allocate a new MemoryBuffer of the specified size that is not initialized.
+ /// Note that the caller should initialize the memory allocated by this
+ /// method. The memory is owned by the MemoryBuffer object.
+ static std::unique_ptr<WritableMemoryBuffer>
+ getNewUninitMemBuffer(size_t Size, const Twine &BufferName = "");
+
+private:
+ // Hide these base class factory function so one can't write
+ // WritableMemoryBuffer::getXXX()
+ // and be surprised that he got a read-only Buffer.
+ using MemoryBuffer::getFileAsStream;
+ using MemoryBuffer::getFileOrSTDIN;
+ using MemoryBuffer::getMemBuffer;
+ using MemoryBuffer::getMemBufferCopy;
+ using MemoryBuffer::getNewMemBuffer;
+ using MemoryBuffer::getOpenFile;
+ using MemoryBuffer::getOpenFileSlice;
+ using MemoryBuffer::getSTDIN;
+};
+
class MemoryBufferRef {
StringRef Buffer;
StringRef Identifier;
diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h
index 83b097a199d6..674c78a11695 100644
--- a/include/llvm/Support/YAMLTraits.h
+++ b/include/llvm/Support/YAMLTraits.h
@@ -549,9 +549,9 @@ inline QuotingType needsQuotes(StringRef S) {
// range.
if (C <= 0x1F)
return QuotingType::Double;
- // C1 control block (0x80 - 0x9F) is excluded from the allowed character
- // range.
- if (C >= 0x80 && C <= 0x9F)
+
+ // Always double quote UTF-8.
+ if ((C & 0x80) != 0)
return QuotingType::Double;
// The character is not safe, at least simple quoting needed.
@@ -1725,7 +1725,7 @@ template <typename T> struct StdMapStringCustomMappingTraitsImpl {
template <> struct ScalarTraits<Type> { \
static void output(const Type &Value, void *ctx, raw_ostream &Out); \
static StringRef input(StringRef Scalar, void *ctxt, Type &Value); \
- static QuotingType mustQuote(StringRef) { return MustQuote; } \
+ static QuotingType mustQuote(StringRef) { return MustQuote; } \
}; \
} \
}
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 5421b22462ae..97442f9a7849 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -24,6 +24,7 @@
namespace llvm {
+class Function;
class GlobalValue;
class MachineModuleInfo;
class Mangler;
@@ -38,6 +39,7 @@ class PassManagerBuilder;
class Target;
class TargetIntrinsicInfo;
class TargetIRAnalysis;
+class TargetTransformInfo;
class TargetLoweringObjectFile;
class TargetPassConfig;
class TargetSubtargetInfo;
@@ -204,7 +206,13 @@ public:
/// This is used to construct the new pass manager's target IR analysis pass,
/// set up appropriately for this target machine. Even the old pass manager
/// uses this to answer queries about the IR.
- virtual TargetIRAnalysis getTargetIRAnalysis();
+ TargetIRAnalysis getTargetIRAnalysis();
+
+ /// \brief Return a TargetTransformInfo for a given function.
+ ///
+ /// The returned TargetTransformInfo is specialized to the subtarget
+ /// corresponding to \p F.
+ virtual TargetTransformInfo getTargetTransformInfo(const Function &F);
/// Allow the target to modify the pass manager, e.g. by calling
/// PassManagerBuilder::addExtension.
@@ -280,11 +288,11 @@ protected: // Can only create subclasses.
void initAsmInfo();
public:
- /// \brief Get a TargetIRAnalysis implementation for the target.
+ /// \brief Get a TargetTransformInfo implementation for the target.
///
- /// This analysis will produce a TTI result which uses the common code
- /// generator to answer queries about the IR.
- TargetIRAnalysis getTargetIRAnalysis() override;
+ /// The TTI returned uses the common code generator to answer queries about
+ /// the IR.
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
/// Create a pass configuration object to be used by addPassToEmitX methods
/// for generating a pipeline of CodeGen passes.
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 06caa21d288c..f6162377b8b7 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -286,32 +286,6 @@ class SDCallSeqEnd<list<SDTypeConstraint> constraints> :
SDTypeProfile<0, 2, constraints>;
//===----------------------------------------------------------------------===//
-// Selection DAG Node Properties.
-//
-// Note: These are hard coded into tblgen.
-//
-class SDNodeProperty;
-def SDNPCommutative : SDNodeProperty; // X op Y == Y op X
-def SDNPAssociative : SDNodeProperty; // (X op Y) op Z == X op (Y op Z)
-def SDNPHasChain : SDNodeProperty; // R/W chain operand and result
-def SDNPOutGlue : SDNodeProperty; // Write a flag result
-def SDNPInGlue : SDNodeProperty; // Read a flag operand
-def SDNPOptInGlue : SDNodeProperty; // Optionally read a flag operand
-def SDNPMayStore : SDNodeProperty; // May write to memory, sets 'mayStore'.
-def SDNPMayLoad : SDNodeProperty; // May read memory, sets 'mayLoad'.
-def SDNPSideEffect : SDNodeProperty; // Sets 'HasUnmodelledSideEffects'.
-def SDNPMemOperand : SDNodeProperty; // Touches memory, has assoc MemOperand
-def SDNPVariadic : SDNodeProperty; // Node has variable arguments.
-def SDNPWantRoot : SDNodeProperty; // ComplexPattern gets the root of match
-def SDNPWantParent : SDNodeProperty; // ComplexPattern gets the parent
-
-//===----------------------------------------------------------------------===//
-// Selection DAG Pattern Operations
-class SDPatternOperator {
- list<SDNodeProperty> Properties = [];
-}
-
-//===----------------------------------------------------------------------===//
// Selection DAG Node definitions.
//
class SDNode<string opcode, SDTypeProfile typeprof,
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index cd6b770f76ac..b1e13f17aef1 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -133,7 +133,7 @@ ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false,
FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0,
bool Recover = false);
-FunctionPass *createHWAddressSanitizerPass();
+FunctionPass *createHWAddressSanitizerPass(bool Recover = false);
// Insert ThreadSanitizer (race detection) instrumentation
FunctionPass *createThreadSanitizerPass();
diff --git a/include/llvm/Transforms/Utils/CallPromotionUtils.h b/include/llvm/Transforms/Utils/CallPromotionUtils.h
index e0bf85781d81..6e8ece723638 100644
--- a/include/llvm/Transforms/Utils/CallPromotionUtils.h
+++ b/include/llvm/Transforms/Utils/CallPromotionUtils.h
@@ -29,13 +29,23 @@ namespace llvm {
bool isLegalToPromote(CallSite CS, Function *Callee,
const char **FailureReason = nullptr);
+/// Promote the given indirect call site to unconditionally call \p Callee.
+///
+/// This function promotes the given call site, returning the direct call or
+/// invoke instruction. If the function type of the call site doesn't match that
+/// of the callee, bitcast instructions are inserted where appropriate. If \p
+/// RetBitCast is non-null, it will be used to store the return value bitcast,
+/// if created.
+Instruction *promoteCall(CallSite CS, Function *Callee,
+ CastInst **RetBitCast = nullptr);
+
/// Promote the given indirect call site to conditionally call \p Callee.
///
/// This function creates an if-then-else structure at the location of the call
-/// site. The original call site is promoted and moved into the "then" block. A
-/// clone of the indirect call site is placed in the "else" block and returned.
-/// If \p BranchWeights is non-null, it will be used to set !prof metadata on
-/// the new conditional branch.
+/// site. The original call site is moved into the "else" block. A clone of the
+/// indirect call site is promoted, placed in the "then" block, and returned. If
+/// \p BranchWeights is non-null, it will be used to set !prof metadata on the
+/// new conditional branch.
Instruction *promoteCallWithIfThenElse(CallSite CS, Function *Callee,
MDNode *BranchWeights = nullptr);
diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap
index 382942be64a1..d8b07c4f54da 100644
--- a/include/llvm/module.modulemap
+++ b/include/llvm/module.modulemap
@@ -61,7 +61,7 @@ module LLVM_BinaryFormat {
textual header "BinaryFormat/ELFRelocs/SystemZ.def"
textual header "BinaryFormat/ELFRelocs/x86_64.def"
textual header "BinaryFormat/ELFRelocs/WebAssembly.def"
- textual header "BinaryFormat/WasmRelocs/WebAssembly.def"
+ textual header "BinaryFormat/WasmRelocs.def"
}
module LLVM_Config { requires cplusplus umbrella "Config" module * { export * } }
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index dd2db1e5b27b..55df66714178 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -133,9 +133,9 @@ ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
}
ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
- // We may have two calls
+ // We may have two calls.
if (auto CS = ImmutableCallSite(I)) {
- // Check if the two calls modify the same memory
+ // Check if the two calls modify the same memory.
return getModRefInfo(CS, Call);
} else if (I->isFenceLike()) {
// If this is a fence, just return ModRef.
@@ -179,6 +179,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
if (onlyAccessesArgPointees(MRB) || onlyAccessesInaccessibleOrArgMem(MRB)) {
bool DoesAlias = false;
+ bool IsMustAlias = true;
ModRefInfo AllArgsMask = ModRefInfo::NoModRef;
if (doesAccessArgPointees(MRB)) {
for (auto AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) {
@@ -193,6 +194,8 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
DoesAlias = true;
AllArgsMask = unionModRef(AllArgsMask, ArgMask);
}
+ // Conservatively clear IsMustAlias unless only MustAlias is found.
+ IsMustAlias &= (ArgAlias == MustAlias);
}
}
// Return NoModRef if no alias found with any argument.
@@ -200,6 +203,8 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
return ModRefInfo::NoModRef;
// Logical & between other AA analyses and argument analysis.
Result = intersectModRef(Result, AllArgsMask);
+ // If only MustAlias found above, set Must bit.
+ Result = IsMustAlias ? setMust(Result) : clearMust(Result);
}
// If Loc is a constant memory location, the call definitely could not
@@ -251,6 +256,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
if (onlyAccessesArgPointees(CS2B)) {
ModRefInfo R = ModRefInfo::NoModRef;
if (doesAccessArgPointees(CS2B)) {
+ bool IsMustAlias = true;
for (auto I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
const Value *Arg = *I;
if (!Arg->getType()->isPointerTy())
@@ -274,10 +280,19 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
ModRefInfo ModRefCS1 = getModRefInfo(CS1, CS2ArgLoc);
ArgMask = intersectModRef(ArgMask, ModRefCS1);
+ // Conservatively clear IsMustAlias unless only MustAlias is found.
+ IsMustAlias &= isMustSet(ModRefCS1);
+
R = intersectModRef(unionModRef(R, ArgMask), Result);
- if (R == Result)
+ if (R == Result) {
+ // On early exit, not all args were checked, cannot set Must.
+ if (I + 1 != E)
+ IsMustAlias = false;
break;
+ }
}
+ // If Alias found and only MustAlias found above, set Must bit.
+ R = IsMustAlias ? setMust(R) : clearMust(R);
}
return R;
}
@@ -287,6 +302,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
if (onlyAccessesArgPointees(CS1B)) {
ModRefInfo R = ModRefInfo::NoModRef;
if (doesAccessArgPointees(CS1B)) {
+ bool IsMustAlias = true;
for (auto I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
const Value *Arg = *I;
if (!Arg->getType()->isPointerTy())
@@ -303,9 +319,18 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
(isRefSet(ArgModRefCS1) && isModSet(ModRefCS2)))
R = intersectModRef(unionModRef(R, ArgModRefCS1), Result);
- if (R == Result)
+ // Conservatively clear IsMustAlias unless only MustAlias is found.
+ IsMustAlias &= isMustSet(ModRefCS2);
+
+ if (R == Result) {
+ // On early exit, not all args were checked, cannot set Must.
+ if (I + 1 != E)
+ IsMustAlias = false;
break;
+ }
}
+ // If Alias found and only MustAlias found above, set Must bit.
+ R = IsMustAlias ? setMust(R) : clearMust(R);
}
return R;
}
@@ -353,9 +378,13 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
// If the load address doesn't alias the given address, it doesn't read
// or write the specified memory.
- if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc))
- return ModRefInfo::NoModRef;
-
+ if (Loc.Ptr) {
+ AliasResult AR = alias(MemoryLocation::get(L), Loc);
+ if (AR == NoAlias)
+ return ModRefInfo::NoModRef;
+ if (AR == MustAlias)
+ return ModRefInfo::MustRef;
+ }
// Otherwise, a load just reads.
return ModRefInfo::Ref;
}
@@ -367,15 +396,20 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
return ModRefInfo::ModRef;
if (Loc.Ptr) {
+ AliasResult AR = alias(MemoryLocation::get(S), Loc);
// If the store address cannot alias the pointer in question, then the
// specified memory cannot be modified by the store.
- if (!alias(MemoryLocation::get(S), Loc))
+ if (AR == NoAlias)
return ModRefInfo::NoModRef;
// If the pointer is a pointer to constant memory, then it could not have
// been modified by this store.
if (pointsToConstantMemory(Loc))
return ModRefInfo::NoModRef;
+
+ // If the store address aliases the pointer as must alias, set Must.
+ if (AR == MustAlias)
+ return ModRefInfo::MustMod;
}
// Otherwise, a store just writes.
@@ -393,15 +427,20 @@ ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Lo
ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
const MemoryLocation &Loc) {
if (Loc.Ptr) {
+ AliasResult AR = alias(MemoryLocation::get(V), Loc);
// If the va_arg address cannot alias the pointer in question, then the
// specified memory cannot be accessed by the va_arg.
- if (!alias(MemoryLocation::get(V), Loc))
+ if (AR == NoAlias)
return ModRefInfo::NoModRef;
// If the pointer is a pointer to constant memory, then it could not have
// been modified by this va_arg.
if (pointsToConstantMemory(Loc))
return ModRefInfo::NoModRef;
+
+ // If the va_arg aliases the pointer as must alias, set Must.
+ if (AR == MustAlias)
+ return ModRefInfo::MustModRef;
}
// Otherwise, a va_arg reads and writes.
@@ -440,9 +479,17 @@ ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
if (isStrongerThanMonotonic(CX->getSuccessOrdering()))
return ModRefInfo::ModRef;
- // If the cmpxchg address does not alias the location, it does not access it.
- if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc))
- return ModRefInfo::NoModRef;
+ if (Loc.Ptr) {
+ AliasResult AR = alias(MemoryLocation::get(CX), Loc);
+ // If the cmpxchg address does not alias the location, it does not access
+ // it.
+ if (AR == NoAlias)
+ return ModRefInfo::NoModRef;
+
+ // If the cmpxchg address aliases the pointer as must alias, set Must.
+ if (AR == MustAlias)
+ return ModRefInfo::MustModRef;
+ }
return ModRefInfo::ModRef;
}
@@ -453,9 +500,17 @@ ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
if (isStrongerThanMonotonic(RMW->getOrdering()))
return ModRefInfo::ModRef;
- // If the atomicrmw address does not alias the location, it does not access it.
- if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc))
- return ModRefInfo::NoModRef;
+ if (Loc.Ptr) {
+ AliasResult AR = alias(MemoryLocation::get(RMW), Loc);
+ // If the atomicrmw address does not alias the location, it does not access
+ // it.
+ if (AR == NoAlias)
+ return ModRefInfo::NoModRef;
+
+ // If the atomicrmw address aliases the pointer as must alias, set Must.
+ if (AR == MustAlias)
+ return ModRefInfo::MustModRef;
+ }
return ModRefInfo::ModRef;
}
@@ -493,6 +548,8 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
unsigned ArgNo = 0;
ModRefInfo R = ModRefInfo::NoModRef;
+ bool MustAlias = true;
+ // Set flag only if no May found and all operands processed.
for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
CI != CE; ++CI, ++ArgNo) {
// Only look at the no-capture or byval pointer arguments. If this
@@ -503,11 +560,14 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
ArgNo < CS.getNumArgOperands() && !CS.isByValArgument(ArgNo)))
continue;
+ AliasResult AR = alias(MemoryLocation(*CI), MemoryLocation(Object));
// If this is a no-capture pointer argument, see if we can tell that it
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
// escape.
- if (isNoAlias(MemoryLocation(*CI), MemoryLocation(Object)))
+ if (AR != MustAlias)
+ MustAlias = false;
+ if (AR == NoAlias)
continue;
if (CS.doesNotAccessMemory(ArgNo))
continue;
@@ -515,9 +575,10 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
R = ModRefInfo::Ref;
continue;
}
+ // Not returning MustModRef since we have not seen all the arguments.
return ModRefInfo::ModRef;
}
- return R;
+ return MustAlias ? setMust(R) : clearMust(R);
}
/// canBasicBlockModify - Return true if it is possible for execution of the
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 423acf739f58..f737cecc43d1 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -31,9 +31,13 @@ static cl::opt<bool> PrintPartialAlias("print-partial-aliases", cl::ReallyHidden
static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden);
static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden);
-static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
+static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
+static cl::opt<bool> PrintMust("print-must", cl::ReallyHidden);
+static cl::opt<bool> PrintMustRef("print-mustref", cl::ReallyHidden);
+static cl::opt<bool> PrintMustMod("print-mustmod", cl::ReallyHidden);
+static cl::opt<bool> PrintMustModRef("print-mustmodref", cl::ReallyHidden);
static cl::opt<bool> EvalAAMD("evaluate-aa-metadata", cl::ReallyHidden);
@@ -262,6 +266,25 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
F.getParent());
++ModRefCount;
break;
+ case ModRefInfo::Must:
+ PrintModRefResults("Must", PrintMust, I, Pointer, F.getParent());
+ ++MustCount;
+ break;
+ case ModRefInfo::MustMod:
+ PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, I, Pointer,
+ F.getParent());
+ ++MustModCount;
+ break;
+ case ModRefInfo::MustRef:
+ PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, I, Pointer,
+ F.getParent());
+ ++MustRefCount;
+ break;
+ case ModRefInfo::MustModRef:
+ PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, I,
+ Pointer, F.getParent());
+ ++MustModRefCount;
+ break;
}
}
}
@@ -288,6 +311,25 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
++ModRefCount;
break;
+ case ModRefInfo::Must:
+ PrintModRefResults("Must", PrintMust, *C, *D, F.getParent());
+ ++MustCount;
+ break;
+ case ModRefInfo::MustMod:
+ PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, *C, *D,
+ F.getParent());
+ ++MustModCount;
+ break;
+ case ModRefInfo::MustRef:
+ PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, *C, *D,
+ F.getParent());
+ ++MustRefCount;
+ break;
+ case ModRefInfo::MustModRef:
+ PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, *C, *D,
+ F.getParent());
+ ++MustModRefCount;
+ break;
}
}
}
@@ -325,7 +367,8 @@ AAEvaluator::~AAEvaluator() {
}
// Display the summary for mod/ref analysis
- int64_t ModRefSum = NoModRefCount + ModCount + RefCount + ModRefCount;
+ int64_t ModRefSum = NoModRefCount + RefCount + ModCount + ModRefCount +
+ MustCount + MustRefCount + MustModCount + MustModRefCount;
if (ModRefSum == 0) {
errs() << " Alias Analysis Mod/Ref Evaluator Summary: no "
"mod/ref!\n";
@@ -339,10 +382,22 @@ AAEvaluator::~AAEvaluator() {
PrintPercent(RefCount, ModRefSum);
errs() << " " << ModRefCount << " mod & ref responses ";
PrintPercent(ModRefCount, ModRefSum);
+ errs() << " " << MustCount << " must responses ";
+ PrintPercent(MustCount, ModRefSum);
+ errs() << " " << MustModCount << " must mod responses ";
+ PrintPercent(MustModCount, ModRefSum);
+ errs() << " " << MustRefCount << " must ref responses ";
+ PrintPercent(MustRefCount, ModRefSum);
+ errs() << " " << MustModRefCount << " must mod & ref responses ";
+ PrintPercent(MustModRefCount, ModRefSum);
errs() << " Alias Analysis Evaluator Mod/Ref Summary: "
<< NoModRefCount * 100 / ModRefSum << "%/"
<< ModCount * 100 / ModRefSum << "%/" << RefCount * 100 / ModRefSum
- << "%/" << ModRefCount * 100 / ModRefSum << "%\n";
+ << "%/" << ModRefCount * 100 / ModRefSum << "%/"
+ << MustCount * 100 / ModRefSum << "%/"
+ << MustRefCount * 100 / ModRefSum << "%/"
+ << MustModCount * 100 / ModRefSum << "%/"
+ << MustModRefCount * 100 / ModRefSum << "%\n";
}
}
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 81b9f842249e..537813b6b752 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -781,6 +781,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// Optimistically assume that call doesn't touch Object and check this
// assumption in the following loop.
ModRefInfo Result = ModRefInfo::NoModRef;
+ bool IsMustAlias = true;
unsigned OperandNo = 0;
for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
@@ -802,7 +803,8 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// is impossible to alias the pointer we're checking.
AliasResult AR =
getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object));
-
+ if (AR != MustAlias)
+ IsMustAlias = false;
// Operand doesnt alias 'Object', continue looking for other aliases
if (AR == NoAlias)
continue;
@@ -818,13 +820,20 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
continue;
}
// This operand aliases 'Object' and call reads and writes into it.
+ // Setting ModRef will not yield an early return below, MustAlias is not
+ // used further.
Result = ModRefInfo::ModRef;
break;
}
+ // No operand aliases, reset Must bit. Add below if at least one aliases
+ // and all aliases found are MustAlias.
+ if (isNoModRef(Result))
+ IsMustAlias = false;
+
// Early return if we improved mod ref information
if (!isModAndRefSet(Result))
- return Result;
+ return IsMustAlias ? setMust(Result) : clearMust(Result);
}
// If the CallSite is to malloc or calloc, we can assume that it doesn't
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index a85af6c9c93f..fb261755e5d1 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -82,7 +82,7 @@ PreservedAnalyses CFGOnlyViewerPass::run(Function &F,
return PreservedAnalyses::all();
}
-static void writeCFGToDotFile(Function &F) {
+static void writeCFGToDotFile(Function &F, bool CFGOnly = false) {
std::string Filename = ("cfg." + F.getName() + ".dot").str();
errs() << "Writing '" << Filename << "'...";
@@ -90,7 +90,7 @@ static void writeCFGToDotFile(Function &F) {
raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
if (!EC)
- WriteGraph(File, (const Function*)&F);
+ WriteGraph(File, (const Function*)&F, CFGOnly);
else
errs() << " error opening file for writing!";
errs() << "\n";
@@ -134,7 +134,7 @@ namespace {
}
bool runOnFunction(Function &F) override {
- writeCFGToDotFile(F);
+ writeCFGToDotFile(F, /*CFGOnly=*/true);
return false;
}
void print(raw_ostream &OS, const Module* = nullptr) const override {}
@@ -152,7 +152,7 @@ INITIALIZE_PASS(CFGOnlyPrinterLegacyPass, "dot-cfg-only",
PreservedAnalyses CFGOnlyPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
- writeCFGToDotFile(F);
+ writeCFGToDotFile(F, /*CFGOnly=*/true);
return PreservedAnalyses::all();
}
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index 23109c67e5c3..daee93267f56 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -85,12 +85,17 @@ class GlobalsAAResult::FunctionInfo {
/// The bit that flags that this function may read any global. This is
/// chosen to mix together with ModRefInfo bits.
/// FIXME: This assumes ModRefInfo lattice will remain 4 bits!
+ /// It overlaps with ModRefInfo::Must bit!
+ /// FunctionInfo.getModRefInfo() masks out everything except ModRef so
+ /// this remains correct, but the Must info is lost.
enum { MayReadAnyGlobal = 4 };
/// Checks to document the invariants of the bit packing here.
- static_assert((MayReadAnyGlobal & static_cast<int>(ModRefInfo::ModRef)) == 0,
+ static_assert((MayReadAnyGlobal & static_cast<int>(ModRefInfo::MustModRef)) ==
+ 0,
"ModRef and the MayReadAnyGlobal flag bits overlap.");
- static_assert(((MayReadAnyGlobal | static_cast<int>(ModRefInfo::ModRef)) >>
+ static_assert(((MayReadAnyGlobal |
+ static_cast<int>(ModRefInfo::MustModRef)) >>
AlignedMapPointerTraits::NumLowBitsAvailable) == 0,
"Insufficient low bits to store our flag and ModRef info.");
@@ -125,14 +130,22 @@ public:
return *this;
}
+ /// This method clears MayReadAnyGlobal bit added by GlobalsAAResult to return
+ /// the corresponding ModRefInfo. It must align in functionality with
+ /// clearMust().
+ ModRefInfo globalClearMayReadAnyGlobal(int I) const {
+ return ModRefInfo((I & static_cast<int>(ModRefInfo::ModRef)) |
+ static_cast<int>(ModRefInfo::NoModRef));
+ }
+
/// Returns the \c ModRefInfo info for this function.
ModRefInfo getModRefInfo() const {
- return ModRefInfo(Info.getInt() & static_cast<int>(ModRefInfo::ModRef));
+ return globalClearMayReadAnyGlobal(Info.getInt());
}
/// Adds new \c ModRefInfo for this function to its state.
void addModRefInfo(ModRefInfo NewMRI) {
- Info.setInt(Info.getInt() | static_cast<int>(NewMRI));
+ Info.setInt(Info.getInt() | static_cast<int>(setMust(NewMRI)));
}
/// Returns whether this function may read any global variable, and we don't
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index fba96c8976a6..b0cb29203a5a 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -249,8 +249,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitCastInst(CastInst &I);
bool visitUnaryInstruction(UnaryInstruction &I);
bool visitCmpInst(CmpInst &I);
- bool visitAnd(BinaryOperator &I);
- bool visitOr(BinaryOperator &I);
bool visitSub(BinaryOperator &I);
bool visitBinaryOperator(BinaryOperator &I);
bool visitLoad(LoadInst &I);
@@ -363,6 +361,7 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
void CallAnalyzer::disableLoadElimination() {
if (EnableLoadElimination) {
Cost += LoadEliminationCost;
+ LoadEliminationCost = 0;
EnableLoadElimination = false;
}
}
@@ -700,6 +699,22 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
// Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
disableSROA(I.getOperand(0));
+ // If this is a floating-point cast, and the target says this operation
+ // is expensive, this may eventually become a library call. Treat the cost
+ // as such.
+ switch (I.getOpcode()) {
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
+ Cost += InlineConstants::CallPenalty;
+ default:
+ break;
+ }
+
return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
}
@@ -1004,34 +1019,6 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) {
return false;
}
-bool CallAnalyzer::visitOr(BinaryOperator &I) {
- // This is necessary because the generic simplify instruction only works if
- // both operands are constants.
- for (unsigned i = 0; i < 2; ++i) {
- if (ConstantInt *C = dyn_cast_or_null<ConstantInt>(
- SimplifiedValues.lookup(I.getOperand(i))))
- if (C->isAllOnesValue()) {
- SimplifiedValues[&I] = C;
- return true;
- }
- }
- return Base::visitOr(I);
-}
-
-bool CallAnalyzer::visitAnd(BinaryOperator &I) {
- // This is necessary because the generic simplify instruction only works if
- // both operands are constants.
- for (unsigned i = 0; i < 2; ++i) {
- if (ConstantInt *C = dyn_cast_or_null<ConstantInt>(
- SimplifiedValues.lookup(I.getOperand(i))))
- if (C->isZero()) {
- SimplifiedValues[&I] = C;
- return true;
- }
- }
- return Base::visitAnd(I);
-}
-
bool CallAnalyzer::visitSub(BinaryOperator &I) {
// Try to handle a special case: we can fold computing the difference of two
// constant-related pointers.
@@ -1061,23 +1048,38 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) {
bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
- auto Evaluate = [&](SmallVectorImpl<Constant *> &COps) {
- Value *SimpleV = nullptr;
- if (auto FI = dyn_cast<FPMathOperator>(&I))
- SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1],
- FI->getFastMathFlags(), DL);
- else
- SimpleV = SimplifyBinOp(I.getOpcode(), COps[0], COps[1], DL);
- return dyn_cast_or_null<Constant>(SimpleV);
- };
+ Constant *CLHS = dyn_cast<Constant>(LHS);
+ if (!CLHS)
+ CLHS = SimplifiedValues.lookup(LHS);
+ Constant *CRHS = dyn_cast<Constant>(RHS);
+ if (!CRHS)
+ CRHS = SimplifiedValues.lookup(RHS);
+
+ Value *SimpleV = nullptr;
+ if (auto FI = dyn_cast<FPMathOperator>(&I))
+ SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
+ CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
+ else
+ SimpleV =
+ SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
- if (simplifyInstruction(I, Evaluate))
+ if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
+ SimplifiedValues[&I] = C;
+
+ if (SimpleV)
return true;
// Disable any SROA on arguments to arbitrary, unsimplified binary operators.
disableSROA(LHS);
disableSROA(RHS);
+ // If the instruction is floating point, and the target says this operation
+ // is expensive, this may eventually become a library call. Treat the cost
+ // as such.
+ if (I.getType()->isFloatingPointTy() &&
+ TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
+ Cost += InlineConstants::CallPenalty;
+
return false;
}
@@ -1097,7 +1099,7 @@ bool CallAnalyzer::visitLoad(LoadInst &I) {
// by any stores or calls, this load is likely to be redundant and can be
// eliminated.
if (EnableLoadElimination &&
- !LoadAddrSet.insert(I.getPointerOperand()).second) {
+ !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
LoadEliminationCost += InlineConstants::InstrCost;
return true;
}
@@ -1547,17 +1549,6 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
- // If the instruction is floating point, and the target says this operation
- // is expensive or the function has the "use-soft-float" attribute, this may
- // eventually become a library call. Treat the cost as such.
- if (I->getType()->isFloatingPointTy()) {
- // If the function has the "use-soft-float" attribute, mark it as
- // expensive.
- if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
- (F.getFnAttribute("use-soft-float").getValueAsString() == "true"))
- Cost += InlineConstants::CallPenalty;
- }
-
// If the instruction simplified to a constant, there is no cost to this
// instruction. Visit the instructions using our InstVisitor to account for
// all of the per-instruction logic. The visit tree returns true if we
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index ed8e5e8cc489..e141d6c58b65 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1107,77 +1107,6 @@ static unsigned getAddressSpaceOperand(Value *I) {
return -1;
}
-// TODO:This API can be improved by using the permutation of given width as the
-// accesses are entered into the map.
-bool llvm::sortLoadAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
- ScalarEvolution &SE,
- SmallVectorImpl<Value *> &Sorted,
- SmallVectorImpl<unsigned> *Mask) {
- SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs;
- OffValPairs.reserve(VL.size());
- Sorted.reserve(VL.size());
-
- // Walk over the pointers, and map each of them to an offset relative to
- // first pointer in the array.
- Value *Ptr0 = getPointerOperand(VL[0]);
- const SCEV *Scev0 = SE.getSCEV(Ptr0);
- Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
- PointerType *PtrTy = dyn_cast<PointerType>(Ptr0->getType());
- uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
-
- for (auto *Val : VL) {
- // The only kind of access we care about here is load.
- if (!isa<LoadInst>(Val))
- return false;
-
- Value *Ptr = getPointerOperand(Val);
- assert(Ptr && "Expected value to have a pointer operand.");
- // If a pointer refers to a different underlying object, bail - the
- // pointers are by definition incomparable.
- Value *CurrObj = GetUnderlyingObject(Ptr, DL);
- if (CurrObj != Obj0)
- return false;
-
- const SCEVConstant *Diff =
- dyn_cast<SCEVConstant>(SE.getMinusSCEV(SE.getSCEV(Ptr), Scev0));
- // The pointers may not have a constant offset from each other, or SCEV
- // may just not be smart enough to figure out they do. Regardless,
- // there's nothing we can do.
- if (!Diff || static_cast<unsigned>(Diff->getAPInt().abs().getSExtValue()) >
- (VL.size() - 1) * Size)
- return false;
-
- OffValPairs.emplace_back(Diff->getAPInt().getSExtValue(), Val);
- }
- SmallVector<unsigned, 4> UseOrder(VL.size());
- for (unsigned i = 0; i < VL.size(); i++) {
- UseOrder[i] = i;
- }
-
- // Sort the memory accesses and keep the order of their uses in UseOrder.
- std::sort(UseOrder.begin(), UseOrder.end(),
- [&OffValPairs](unsigned Left, unsigned Right) {
- return OffValPairs[Left].first < OffValPairs[Right].first;
- });
-
- for (unsigned i = 0; i < VL.size(); i++)
- Sorted.emplace_back(OffValPairs[UseOrder[i]].second);
-
- // Sort UseOrder to compute the Mask.
- if (Mask) {
- Mask->reserve(VL.size());
- for (unsigned i = 0; i < VL.size(); i++)
- Mask->emplace_back(i);
- std::sort(Mask->begin(), Mask->end(),
- [&UseOrder](unsigned Left, unsigned Right) {
- return UseOrder[Left] < UseOrder[Right];
- });
- }
-
- return true;
-}
-
-
/// Returns true if the memory operations \p A and \p B are consecutive.
bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
ScalarEvolution &SE, bool CheckType) {
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index a6c590126c2f..bb7bf967994c 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -647,6 +647,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// Ok, this store might clobber the query pointer. Check to see if it is
// a must alias: in this case, we want to return this as a def.
+ // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
MemoryLocation StoreLoc = MemoryLocation::get(SI);
// If we found a pointer, check if it could be the same as our pointer.
@@ -690,7 +691,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// If necessary, perform additional analysis.
if (isModAndRefSet(MR))
MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
- switch (MR) {
+ switch (clearMust(MR)) {
case ModRefInfo::NoModRef:
// If the call has no effect on the queried pointer, just ignore it.
continue;
@@ -919,6 +920,14 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
const MemoryLocation Loc = MemoryLocation::get(QueryInst);
bool isLoad = isa<LoadInst>(QueryInst);
+ return getNonLocalPointerDependencyFrom(QueryInst, Loc, isLoad, Result);
+}
+
+void MemoryDependenceResults::getNonLocalPointerDependencyFrom(
+ Instruction *QueryInst,
+ const MemoryLocation &Loc,
+ bool isLoad,
+ SmallVectorImpl<NonLocalDepResult> &Result) {
BasicBlock *FromBB = QueryInst->getParent();
assert(FromBB);
@@ -1118,21 +1127,15 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// If we already have a cache entry for this CacheKey, we may need to do some
// work to reconcile the cache entry and the current query.
if (!Pair.second) {
- if (CacheInfo->Size < Loc.Size) {
- // The query's Size is greater than the cached one. Throw out the
- // cached data and proceed with the query at the greater size.
+ if (CacheInfo->Size != Loc.Size) {
+ // The query's Size differs from the cached one. Throw out the
+ // cached data and proceed with the query at the new size.
CacheInfo->Pair = BBSkipFirstBlockPair();
CacheInfo->Size = Loc.Size;
for (auto &Entry : CacheInfo->NonLocalDeps)
if (Instruction *Inst = Entry.getResult().getInst())
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
CacheInfo->NonLocalDeps.clear();
- } else if (CacheInfo->Size > Loc.Size) {
- // This query's Size is less than the cached one. Conservatively restart
- // the query using the greater size.
- return getNonLocalPointerDepFromBB(
- QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
- StartBB, Result, Visited, SkipFirstBlock);
}
// If the query's AATags are inconsistent with the cached one,
diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp
index 8fe190e8bcf8..6e9368c49d65 100644
--- a/lib/Analysis/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@@ -192,8 +192,6 @@ template <> struct DenseMapInfo<MemoryLocOrCall> {
}
};
-enum class Reorderability { Always, IfNoAlias, Never };
-
} // end namespace llvm
/// This does one-way checks to see if Use could theoretically be hoisted above
@@ -202,22 +200,16 @@ enum class Reorderability { Always, IfNoAlias, Never };
/// This assumes that, for the purposes of MemorySSA, Use comes directly after
/// MayClobber, with no potentially clobbering operations in between them.
/// (Where potentially clobbering ops are memory barriers, aliased stores, etc.)
-static Reorderability getLoadReorderability(const LoadInst *Use,
- const LoadInst *MayClobber) {
+static bool areLoadsReorderable(const LoadInst *Use,
+ const LoadInst *MayClobber) {
bool VolatileUse = Use->isVolatile();
bool VolatileClobber = MayClobber->isVolatile();
// Volatile operations may never be reordered with other volatile operations.
if (VolatileUse && VolatileClobber)
- return Reorderability::Never;
-
- // The lang ref allows reordering of volatile and non-volatile operations.
- // Whether an aliasing nonvolatile load and volatile load can be reordered,
- // though, is ambiguous. Because it may not be best to exploit this ambiguity,
- // we only allow volatile/non-volatile reordering if the volatile and
- // non-volatile operations don't alias.
- Reorderability Result = VolatileUse || VolatileClobber
- ? Reorderability::IfNoAlias
- : Reorderability::Always;
+ return false;
+ // Otherwise, volatile doesn't matter here. From the language reference:
+ // 'optimizers may change the order of volatile operations relative to
+ // non-volatile operations.'"
// If a load is seq_cst, it cannot be moved above other loads. If its ordering
// is weaker, it can be moved above other loads. We just need to be sure that
@@ -229,9 +221,7 @@ static Reorderability getLoadReorderability(const LoadInst *Use,
bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent;
bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(),
AtomicOrdering::Acquire);
- if (SeqCstUse || MayClobberIsAcquire)
- return Reorderability::Never;
- return Result;
+ return !(SeqCstUse || MayClobberIsAcquire);
}
static bool instructionClobbersQuery(MemoryDef *MD,
@@ -265,18 +255,9 @@ static bool instructionClobbersQuery(MemoryDef *MD,
return isModOrRefSet(I);
}
- if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) {
- if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) {
- switch (getLoadReorderability(UseLoad, DefLoad)) {
- case Reorderability::Always:
- return false;
- case Reorderability::Never:
- return true;
- case Reorderability::IfNoAlias:
- return !AA.isNoAlias(UseLoc, MemoryLocation::get(DefLoad));
- }
- }
- }
+ if (auto *DefLoad = dyn_cast<LoadInst>(DefInst))
+ if (auto *UseLoad = dyn_cast<LoadInst>(UseInst))
+ return !areLoadsReorderable(UseLoad, DefLoad);
return isModSet(AA.getModRefInfo(DefInst, UseLoc));
}
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index d54fb700200d..10badd89a4a8 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -454,7 +454,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
std::unique_ptr<BlockFrequencyInfo> BFIPtr;
if (GetBFICallback)
BFI = GetBFICallback(F);
- else if (F.getEntryCount().hasValue()) {
+ else if (F.hasProfileData()) {
LoopInfo LI{DominatorTree(const_cast<Function &>(F))};
BranchProbabilityInfo BPI{F, LI};
BFIPtr = llvm::make_unique<BlockFrequencyInfo>(F, BPI, LI);
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index 671744f93fb8..347d093b0f61 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -115,42 +115,62 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) {
return FunctionCount && isHotCount(FunctionCount.getValue());
}
-/// Returns true if the function's entry or total call edge count is hot.
+/// Returns true if the function contains hot code. This can include a hot
+/// function entry count, hot basic block, or (in the case of Sample PGO)
+/// hot total call edge count.
/// If it returns false, it either means it is not hot or it is unknown
-/// whether it is hot or not (for example, no profile data is available).
-bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F) {
+/// (for example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F,
+ BlockFrequencyInfo &BFI) {
if (!F || !computeSummary())
return false;
if (auto FunctionCount = F->getEntryCount())
if (isHotCount(FunctionCount.getValue()))
return true;
- uint64_t TotalCallCount = 0;
+ if (hasSampleProfile()) {
+ uint64_t TotalCallCount = 0;
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ if (auto CallCount = getProfileCount(&I, nullptr))
+ TotalCallCount += CallCount.getValue();
+ if (isHotCount(TotalCallCount))
+ return true;
+ }
for (const auto &BB : *F)
- for (const auto &I : BB)
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- if (auto CallCount = getProfileCount(&I, nullptr))
- TotalCallCount += CallCount.getValue();
- return isHotCount(TotalCallCount);
+ if (isHotBB(&BB, &BFI))
+ return true;
+ return false;
}
-/// Returns true if the function's entry and total call edge count is cold.
+/// Returns true if the function only contains cold code. This means that
+/// the function entry and blocks are all cold, and (in the case of Sample PGO)
+/// the total call edge count is cold.
/// If it returns false, it either means it is not cold or it is unknown
-/// whether it is cold or not (for example, no profile data is available).
-bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F) {
+/// (for example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F,
+ BlockFrequencyInfo &BFI) {
if (!F || !computeSummary())
return false;
if (auto FunctionCount = F->getEntryCount())
if (!isColdCount(FunctionCount.getValue()))
return false;
-
- uint64_t TotalCallCount = 0;
+
+ if (hasSampleProfile()) {
+ uint64_t TotalCallCount = 0;
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ if (auto CallCount = getProfileCount(&I, nullptr))
+ TotalCallCount += CallCount.getValue();
+ if (!isColdCount(TotalCallCount))
+ return false;
+ }
for (const auto &BB : *F)
- for (const auto &I : BB)
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- if (auto CallCount = getProfileCount(&I, nullptr))
- TotalCallCount += CallCount.getValue();
- return isColdCount(TotalCallCount);
+ if (!isColdBB(&BB, &BFI))
+ return false;
+ return true;
}
/// Returns true if the function's entry is a cold. If it returns false, it
@@ -231,7 +251,7 @@ bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
// If there is no profile for the caller, and we know the profile is
// accurate, we consider the callsite as cold.
return (hasSampleProfile() &&
- (CS.getCaller()->getEntryCount() || ProfileSampleAccurate ||
+ (CS.getCaller()->hasProfileData() || ProfileSampleAccurate ||
CS.getCaller()->hasFnAttribute("profile-sample-accurate")));
}
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 0b8604187121..2a8088dc4452 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -4368,6 +4368,7 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
default:
break;
}
+ break;
}
default:
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index b744cae51ed7..c9e9c6d1a419 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -314,6 +314,10 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
return Cost;
}
+bool TargetTransformInfo::isOutOfOrder() const {
+ return TTIImpl->isOutOfOrder();
+}
+
unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
return TTIImpl->getNumberOfRegisters(Vector);
}
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index c9ed026a1e33..173db399b9d6 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -544,21 +544,32 @@ static bool matchAccessTags(const MDNode *A, const MDNode *B,
TBAAStructTagNode TagA(A), TagB(B);
const MDNode *CommonType = getLeastCommonType(TagA.getAccessType(),
TagB.getAccessType());
- if (GenericTag)
- *GenericTag = createAccessTag(CommonType);
// TODO: We need to check if AccessType of TagA encloses AccessType of
// TagB to support aggregate AccessType. If yes, return true.
// Climb the type DAG from base type of A to see if we reach base type of B.
uint64_t OffsetA;
- if (findAccessType(TagA, TagB.getBaseType(), OffsetA))
- return OffsetA == TagB.getOffset();
+ if (findAccessType(TagA, TagB.getBaseType(), OffsetA)) {
+ bool SameMemberAccess = OffsetA == TagB.getOffset();
+ if (GenericTag)
+ *GenericTag = SameMemberAccess ? TagB.getNode() :
+ createAccessTag(CommonType);
+ return SameMemberAccess;
+ }
// Climb the type DAG from base type of B to see if we reach base type of A.
uint64_t OffsetB;
- if (findAccessType(TagB, TagA.getBaseType(), OffsetB))
- return OffsetB == TagA.getOffset();
+ if (findAccessType(TagB, TagA.getBaseType(), OffsetB)) {
+ bool SameMemberAccess = OffsetB == TagA.getOffset();
+ if (GenericTag)
+ *GenericTag = SameMemberAccess ? TagA.getNode() :
+ createAccessTag(CommonType);
+ return SameMemberAccess;
+ }
+
+ if (GenericTag)
+ *GenericTag = createAccessTag(CommonType);
// If the final access types have different roots, they're part of different
// potentially unrelated type systems, so we must be conservative.
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index fd13dbc1f1e2..a7201ed97350 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -3371,7 +3371,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
for (auto &RI : FS->refs())
NameVals.push_back(VE.getValueID(RI.getValue()));
- bool HasProfileData = F.getEntryCount().hasValue();
+ bool HasProfileData = F.hasProfileData();
for (auto &ECI : FS->calls()) {
NameVals.push_back(getValueId(ECI.first));
if (HasProfileData)
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 31037095aa2b..d7995447592c 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2033,6 +2033,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
}
}
// else fallthrough
+ LLVM_FALLTHROUGH;
// The MC library also has a right-shift operator, but it isn't consistently
// signed or unsigned between different targets.
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 07ba5d36cc96..3aeb4910ab10 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -51,7 +51,7 @@ add_llvm_library(LLVMCodeGen
LiveRangeShrink.cpp
LiveRegMatrix.cpp
LiveRegUnits.cpp
- LiveStackAnalysis.cpp
+ LiveStacks.cpp
LiveVariables.cpp
LLVMTargetMachine.cpp
LocalStackSlotAllocation.cpp
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index c4794380f791..d6f55bba716f 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -352,8 +352,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Clear per function information.
InsertedInsts.clear();
PromotedInsts.clear();
- BFI.reset();
- BPI.reset();
ModifiedDT = false;
if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
@@ -365,14 +363,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BPI.reset(new BranchProbabilityInfo(F, *LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
OptSize = F.optForSize();
ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (ProfileGuidedSectionPrefix) {
- if (PSI->isFunctionHotInCallGraph(&F))
+ if (PSI->isFunctionHotInCallGraph(&F, *BFI))
F.setSectionPrefix(".hot");
- else if (PSI->isFunctionColdInCallGraph(&F))
+ else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
F.setSectionPrefix(".unlikely");
}
@@ -652,13 +652,6 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
if (SameIncomingValueBBs.count(Pred))
return true;
- if (!BFI) {
- Function &F = *BB->getParent();
- LoopInfo LI{DominatorTree(F)};
- BPI.reset(new BranchProbabilityInfo(F, LI));
- BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
- }
-
BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
BlockFrequency BBFreq = BFI->getBlockFreq(BB);
@@ -3704,7 +3697,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
} else {
uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
- ConstantOffset += CI->getSExtValue()*TypeSize;
+ ConstantOffset += CI->getSExtValue() * TypeSize;
} else if (TypeSize) { // Scales of zero don't do anything.
// We only allow one variable index at the moment.
if (VariableOperand != -1)
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 87a658be4c29..a3b43c92a7fc 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -835,6 +835,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case 64:
ZeroTy = Type::getDoubleTy(Ctx);
break;
+ case 128:
+ ZeroTy = Type::getFP128Ty(Ctx);
+ break;
default:
llvm_unreachable("unexpected floating-point type");
}
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 1aaf7a0ceef8..86ce4b7a9464 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -28,7 +28,7 @@
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 92edfb059ad6..77a7aaa95732 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -81,10 +81,9 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,
this->OptLevel = OL;
}
-TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(BasicTTIImpl(this, F));
- });
+TargetTransformInfo
+LLVMTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(BasicTTIImpl(this, F));
}
/// addPassesToX helper drives creation and initialization of TargetPassConfig.
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStacks.cpp
index b0e58b0e3e5f..80ecfdb7a507 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStacks.cpp
@@ -1,4 +1,4 @@
-//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===//
+//===-- LiveStacks.cpp - Live Stack Slot Analysis -------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index 3568f96d2b9a..f91cca6e4e50 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -157,18 +157,14 @@ public:
void print(const MachineBasicBlock &MBB);
void print(const MachineInstr &MI);
- void printIRBlockReference(const BasicBlock &BB);
void printIRValueReference(const Value &V);
void printStackObjectReference(int FrameIndex);
- void printOffset(int64_t Offset);
void print(const MachineInstr &MI, unsigned OpIdx,
const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies,
LLT TypeToPrint, bool PrintDef = true);
void print(const LLVMContext &Context, const TargetInstrInfo &TII,
const MachineMemOperand &Op);
void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID);
-
- void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI);
};
} // end namespace llvm
@@ -707,32 +703,6 @@ void MIPrinter::print(const MachineInstr &MI) {
}
}
-static void printIRSlotNumber(raw_ostream &OS, int Slot) {
- if (Slot == -1)
- OS << "<badref>";
- else
- OS << Slot;
-}
-
-void MIPrinter::printIRBlockReference(const BasicBlock &BB) {
- OS << "%ir-block.";
- if (BB.hasName()) {
- printLLVMNameWithoutPrefix(OS, BB.getName());
- return;
- }
- const Function *F = BB.getParent();
- int Slot;
- if (F == MST.getCurrentFunction()) {
- Slot = MST.getLocalSlot(&BB);
- } else {
- ModuleSlotTracker CustomMST(F->getParent(),
- /*ShouldInitializeAllMetadata=*/false);
- CustomMST.incorporateFunction(*F);
- Slot = CustomMST.getLocalSlot(&BB);
- }
- printIRSlotNumber(OS, Slot);
-}
-
void MIPrinter::printIRValueReference(const Value &V) {
if (isa<GlobalValue>(V)) {
V.printAsOperand(OS, /*PrintType=*/false, MST);
@@ -750,7 +720,7 @@ void MIPrinter::printIRValueReference(const Value &V) {
printLLVMNameWithoutPrefix(OS, V.getName());
return;
}
- printIRSlotNumber(OS, MST.getLocalSlot(&V));
+ MachineOperand::printIRSlotNumber(OS, MST.getLocalSlot(&V));
}
void MIPrinter::printStackObjectReference(int FrameIndex) {
@@ -762,16 +732,6 @@ void MIPrinter::printStackObjectReference(int FrameIndex) {
Operand.Name);
}
-void MIPrinter::printOffset(int64_t Offset) {
- if (Offset == 0)
- return;
- if (Offset < 0) {
- OS << " - " << -Offset;
- return;
- }
- OS << " + " << Offset;
-}
-
void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
const TargetRegisterInfo *TRI,
bool ShouldPrintRegisterTies, LLT TypeToPrint,
@@ -787,6 +747,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
LLVM_FALLTHROUGH;
case MachineOperand::MO_Register:
case MachineOperand::MO_CImmediate:
+ case MachineOperand::MO_FPImmediate:
case MachineOperand::MO_MachineBasicBlock:
case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_TargetIndex:
@@ -795,7 +756,11 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_RegisterLiveOut:
case MachineOperand::MO_Metadata:
- case MachineOperand::MO_MCSymbol: {
+ case MachineOperand::MO_MCSymbol:
+ case MachineOperand::MO_CFIIndex:
+ case MachineOperand::MO_IntrinsicID:
+ case MachineOperand::MO_Predicate:
+ case MachineOperand::MO_BlockAddress: {
unsigned TiedOperandIdx = 0;
if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
@@ -804,21 +769,9 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
TiedOperandIdx, TRI, TII);
break;
}
- case MachineOperand::MO_FPImmediate:
- Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST);
- break;
case MachineOperand::MO_FrameIndex:
printStackObjectReference(Op.getIndex());
break;
- case MachineOperand::MO_BlockAddress:
- OS << "blockaddress(";
- Op.getBlockAddress()->getFunction()->printAsOperand(OS, /*PrintType=*/false,
- MST);
- OS << ", ";
- printIRBlockReference(*Op.getBlockAddress()->getBasicBlock());
- OS << ')';
- printOffset(Op.getOffset());
- break;
case MachineOperand::MO_RegisterMask: {
auto RegMaskInfo = RegisterMaskIds.find(Op.getRegMask());
if (RegMaskInfo != RegisterMaskIds.end())
@@ -827,28 +780,6 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
printCustomRegMask(Op.getRegMask(), OS, TRI);
break;
}
- case MachineOperand::MO_CFIIndex: {
- const MachineFunction &MF = *Op.getParent()->getMF();
- print(MF.getFrameInstructions()[Op.getCFIIndex()], TRI);
- break;
- }
- case MachineOperand::MO_IntrinsicID: {
- Intrinsic::ID ID = Op.getIntrinsicID();
- if (ID < Intrinsic::num_intrinsics)
- OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')';
- else {
- const MachineFunction &MF = *Op.getParent()->getMF();
- const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo();
- OS << "intrinsic(@" << TII->getName(ID) << ')';
- }
- break;
- }
- case MachineOperand::MO_Predicate: {
- auto Pred = static_cast<CmpInst::Predicate>(Op.getPredicate());
- OS << (CmpInst::isIntPredicate(Pred) ? "int" : "float") << "pred("
- << CmpInst::getPredicateName(Pred) << ')';
- break;
- }
}
}
@@ -938,7 +869,7 @@ void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII,
break;
}
}
- printOffset(Op.getOffset());
+ MachineOperand::printOperandOffset(OS, Op.getOffset());
if (Op.getBaseAlignment() != Op.getSize())
OS << ", align " << Op.getBaseAlignment();
auto AAInfo = Op.getAAInfo();
@@ -978,118 +909,6 @@ void MIPrinter::printSyncScope(const LLVMContext &Context, SyncScope::ID SSID) {
}
}
-static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
- const TargetRegisterInfo *TRI) {
- int Reg = TRI->getLLVMRegNum(DwarfReg, true);
- if (Reg == -1) {
- OS << "<badreg>";
- return;
- }
- OS << printReg(Reg, TRI);
-}
-
-void MIPrinter::print(const MCCFIInstruction &CFI,
- const TargetRegisterInfo *TRI) {
- switch (CFI.getOperation()) {
- case MCCFIInstruction::OpSameValue:
- OS << "same_value ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- printCFIRegister(CFI.getRegister(), OS, TRI);
- break;
- case MCCFIInstruction::OpRememberState:
- OS << "remember_state ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- break;
- case MCCFIInstruction::OpRestoreState:
- OS << "restore_state ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- break;
- case MCCFIInstruction::OpOffset:
- OS << "offset ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- printCFIRegister(CFI.getRegister(), OS, TRI);
- OS << ", " << CFI.getOffset();
- break;
- case MCCFIInstruction::OpDefCfaRegister:
- OS << "def_cfa_register ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- printCFIRegister(CFI.getRegister(), OS, TRI);
- break;
- case MCCFIInstruction::OpDefCfaOffset:
- OS << "def_cfa_offset ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- OS << CFI.getOffset();
- break;
- case MCCFIInstruction::OpDefCfa:
- OS << "def_cfa ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- printCFIRegister(CFI.getRegister(), OS, TRI);
- OS << ", " << CFI.getOffset();
- break;
- case MCCFIInstruction::OpRelOffset:
- OS << "rel_offset ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- printCFIRegister(CFI.getRegister(), OS, TRI);
- OS << ", " << CFI.getOffset();
- break;
- case MCCFIInstruction::OpAdjustCfaOffset:
- OS << "adjust_cfa_offset ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- OS << CFI.getOffset();
- break;
- case MCCFIInstruction::OpRestore:
- OS << "restore ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- printCFIRegister(CFI.getRegister(), OS, TRI);
- break;
- case MCCFIInstruction::OpEscape: {
- OS << "escape ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- if (!CFI.getValues().empty()) {
- size_t e = CFI.getValues().size() - 1;
- for (size_t i = 0; i < e; ++i)
- OS << format("0x%02x", uint8_t(CFI.getValues()[i])) << ", ";
- OS << format("0x%02x", uint8_t(CFI.getValues()[e])) << ", ";
- }
- break;
- }
- case MCCFIInstruction::OpUndefined:
- OS << "undefined ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- printCFIRegister(CFI.getRegister(), OS, TRI);
- break;
- case MCCFIInstruction::OpRegister:
- OS << "register ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- printCFIRegister(CFI.getRegister(), OS, TRI);
- OS << ", ";
- printCFIRegister(CFI.getRegister2(), OS, TRI);
- break;
- case MCCFIInstruction::OpWindowSave:
- OS << "window_save ";
- if (MCSymbol *Label = CFI.getLabel())
- MachineOperand::printSymbol(OS, *Label);
- break;
- default:
- // TODO: Print the other CFI Operations.
- OS << "<unserializable cfi operation>";
- break;
- }
-}
-
void llvm::printMIR(raw_ostream &OS, const Module &M) {
yaml::Output Out(OS);
Out << const_cast<Module &>(M);
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 4ce689607730..84c808ee7938 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1235,7 +1235,7 @@ void MachineBlockPlacement::precomputeTriangleChains() {
// When profile is available, we need to handle the triangle-shape CFG.
static BranchProbability getLayoutSuccessorProbThreshold(
const MachineBasicBlock *BB) {
- if (!BB->getParent()->getFunction().getEntryCount())
+ if (!BB->getParent()->getFunction().hasProfileData())
return BranchProbability(StaticLikelyProb, 100);
if (BB->succ_size() == 2) {
const MachineBasicBlock *Succ1 = *BB->succ_begin();
@@ -2178,7 +2178,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
// will be merged into the first outer loop chain for which this block is not
// cold anymore. This needs precise profile data and we only do this when
// profile data is available.
- if (F->getFunction().getEntryCount() || ForceLoopColdBlock) {
+ if (F->getFunction().hasProfileData() || ForceLoopColdBlock) {
BlockFrequency LoopFreq(0);
for (auto LoopPred : L.getHeader()->predecessors())
if (!L.contains(LoopPred))
@@ -2220,7 +2220,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// for better layout.
bool RotateLoopWithProfile =
ForcePreciseRotationCost ||
- (PreciseRotationCost && F->getFunction().getEntryCount());
+ (PreciseRotationCost && F->getFunction().hasProfileData());
// First check to see if there is an obviously preferable top block for the
// loop. This will default to the header, but may end up as one of the
diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp
index d17c481862a1..ec81c6391171 100644
--- a/lib/CodeGen/MachineOperand.cpp
+++ b/lib/CodeGen/MachineOperand.cpp
@@ -380,16 +380,6 @@ static void tryToGetTargetInfo(const MachineOperand &MO,
}
}
-static void printOffset(raw_ostream &OS, int64_t Offset) {
- if (Offset == 0)
- return;
- if (Offset < 0) {
- OS << " - " << -Offset;
- return;
- }
- OS << " + " << Offset;
-}
-
static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
const auto *TII = MF.getSubtarget().getInstrInfo();
assert(TII && "expected instruction info");
@@ -412,6 +402,44 @@ static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) {
return nullptr;
}
+static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ if (!TRI) {
+ OS << "%dwarfreg." << DwarfReg;
+ return;
+ }
+
+ int Reg = TRI->getLLVMRegNum(DwarfReg, true);
+ if (Reg == -1) {
+ OS << "<badreg>";
+ return;
+ }
+ OS << printReg(Reg, TRI);
+}
+
+static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB,
+ ModuleSlotTracker &MST) {
+ OS << "%ir-block.";
+ if (BB.hasName()) {
+ printLLVMNameWithoutPrefix(OS, BB.getName());
+ return;
+ }
+ Optional<int> Slot;
+ if (const Function *F = BB.getParent()) {
+ if (F == MST.getCurrentFunction()) {
+ Slot = MST.getLocalSlot(&BB);
+ } else if (const Module *M = F->getParent()) {
+ ModuleSlotTracker CustomMST(M, /*ShouldInitializeAllMetadata=*/false);
+ CustomMST.incorporateFunction(*F);
+ Slot = CustomMST.getLocalSlot(&BB);
+ }
+ }
+ if (Slot)
+ MachineOperand::printIRSlotNumber(OS, *Slot);
+ else
+ OS << "<unknown>";
+}
+
void MachineOperand::printSubregIdx(raw_ostream &OS, uint64_t Index,
const TargetRegisterInfo *TRI) {
OS << "%subreg.";
@@ -490,6 +518,125 @@ void MachineOperand::printStackObjectReference(raw_ostream &OS,
OS << '.' << Name;
}
+void MachineOperand::printOperandOffset(raw_ostream &OS, int64_t Offset) {
+ if (Offset == 0)
+ return;
+ if (Offset < 0) {
+ OS << " - " << -Offset;
+ return;
+ }
+ OS << " + " << Offset;
+}
+
+void MachineOperand::printIRSlotNumber(raw_ostream &OS, int Slot) {
+ if (Slot == -1)
+ OS << "<badref>";
+ else
+ OS << Slot;
+}
+
+static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI,
+ const TargetRegisterInfo *TRI) {
+ switch (CFI.getOperation()) {
+ case MCCFIInstruction::OpSameValue:
+ OS << "same_value ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpRememberState:
+ OS << "remember_state ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ break;
+ case MCCFIInstruction::OpRestoreState:
+ OS << "restore_state ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ break;
+ case MCCFIInstruction::OpOffset:
+ OS << "offset ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfaRegister:
+ OS << "def_cfa_register ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpDefCfaOffset:
+ OS << "def_cfa_offset ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ OS << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfa:
+ OS << "def_cfa ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpRelOffset:
+ OS << "rel_offset ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpAdjustCfaOffset:
+ OS << "adjust_cfa_offset ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ OS << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpRestore:
+ OS << "restore ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpEscape: {
+ OS << "escape ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ if (!CFI.getValues().empty()) {
+ size_t e = CFI.getValues().size() - 1;
+ for (size_t i = 0; i < e; ++i)
+ OS << format("0x%02x", uint8_t(CFI.getValues()[i])) << ", ";
+ OS << format("0x%02x", uint8_t(CFI.getValues()[e])) << ", ";
+ }
+ break;
+ }
+ case MCCFIInstruction::OpUndefined:
+ OS << "undefined ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpRegister:
+ OS << "register ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", ";
+ printCFIRegister(CFI.getRegister2(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpWindowSave:
+ OS << "window_save ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ break;
+ default:
+ // TODO: Print the other CFI Operations.
+ OS << "<unserializable cfi directive>";
+ break;
+ }
+}
+
void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI,
const TargetIntrinsicInfo *IntrinsicInfo) const {
tryToGetTargetInfo(*this, TRI, IntrinsicInfo);
@@ -561,29 +708,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
break;
case MachineOperand::MO_FPImmediate:
- if (getFPImm()->getType()->isFloatTy()) {
- OS << getFPImm()->getValueAPF().convertToFloat();
- } else if (getFPImm()->getType()->isHalfTy()) {
- APFloat APF = getFPImm()->getValueAPF();
- bool Unused;
- APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused);
- OS << "half " << APF.convertToFloat();
- } else if (getFPImm()->getType()->isFP128Ty()) {
- APFloat APF = getFPImm()->getValueAPF();
- SmallString<16> Str;
- getFPImm()->getValueAPF().toString(Str);
- OS << "quad " << Str;
- } else if (getFPImm()->getType()->isX86_FP80Ty()) {
- APFloat APF = getFPImm()->getValueAPF();
- OS << "x86_fp80 0xK";
- APInt API = APF.bitcastToAPInt();
- OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4,
- /*Upper=*/true);
- OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
- /*Upper=*/true);
- } else {
- OS << getFPImm()->getValueAPF().convertToDouble();
- }
+ getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST);
break;
case MachineOperand::MO_MachineBasicBlock:
OS << printMBBReference(*getMBB());
@@ -606,7 +731,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
case MachineOperand::MO_ConstantPoolIndex:
OS << "%const." << getIndex();
- printOffset(OS, getOffset());
+ printOperandOffset(OS, getOffset());
break;
case MachineOperand::MO_TargetIndex: {
OS << "target-index(";
@@ -615,7 +740,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (const auto *TargetIndexName = getTargetIndexName(*MF, getIndex()))
Name = TargetIndexName;
OS << Name << ')';
- printOffset(OS, getOffset());
+ printOperandOffset(OS, getOffset());
break;
}
case MachineOperand::MO_JumpTableIndex:
@@ -623,7 +748,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
break;
case MachineOperand::MO_GlobalAddress:
getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
- printOffset(OS, getOffset());
+ printOperandOffset(OS, getOffset());
break;
case MachineOperand::MO_ExternalSymbol: {
StringRef Name = getSymbolName();
@@ -633,16 +758,19 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
} else {
printLLVMNameWithoutPrefix(OS, Name);
}
- printOffset(OS, getOffset());
+ printOperandOffset(OS, getOffset());
break;
}
- case MachineOperand::MO_BlockAddress:
- OS << '<';
- getBlockAddress()->printAsOperand(OS, /*PrintType=*/false, MST);
- if (getOffset())
- OS << "+" << getOffset();
- OS << '>';
+ case MachineOperand::MO_BlockAddress: {
+ OS << "blockaddress(";
+ getBlockAddress()->getFunction()->printAsOperand(OS, /*PrintType=*/false,
+ MST);
+ OS << ", ";
+ printIRBlockReference(OS, *getBlockAddress()->getBasicBlock(), MST);
+ OS << ')';
+ MachineOperand::printOperandOffset(OS, getOffset());
break;
+ }
case MachineOperand::MO_RegisterMask: {
OS << "<regmask";
if (TRI) {
@@ -693,23 +821,27 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
case MachineOperand::MO_MCSymbol:
printSymbol(OS, *getMCSymbol());
break;
- case MachineOperand::MO_CFIIndex:
- OS << "<call frame instruction>";
+ case MachineOperand::MO_CFIIndex: {
+ if (const MachineFunction *MF = getMFIfAvailable(*this))
+ printCFI(OS, MF->getFrameInstructions()[getCFIIndex()], TRI);
+ else
+ OS << "<cfi directive>";
break;
+ }
case MachineOperand::MO_IntrinsicID: {
Intrinsic::ID ID = getIntrinsicID();
if (ID < Intrinsic::num_intrinsics)
- OS << "<intrinsic:@" << Intrinsic::getName(ID, None) << '>';
+ OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')';
else if (IntrinsicInfo)
- OS << "<intrinsic:@" << IntrinsicInfo->getName(ID) << '>';
+ OS << "intrinsic(@" << IntrinsicInfo->getName(ID) << ')';
else
- OS << "<intrinsic:" << ID << '>';
+ OS << "intrinsic(" << ID << ')';
break;
}
case MachineOperand::MO_Predicate: {
auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
- OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred")
- << CmpInst::getPredicateName(Pred) << '>';
+ OS << (CmpInst::isIntPredicate(Pred) ? "int" : "float") << "pred("
+ << CmpInst::getPredicateName(Pred) << ')';
break;
}
}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index c9fe7681e280..e0cc2ca9a2a2 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -37,7 +37,7 @@
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
index 2fcbd1280da4..3318e109155b 100644
--- a/lib/CodeGen/README.txt
+++ b/lib/CodeGen/README.txt
@@ -164,7 +164,7 @@ synthesize the various copy insertion/inspection methods in TargetInstrInfo.
Stack coloring improvements:
-1. Do proper LiveStackAnalysis on all stack objects including those which are
+1. Do proper LiveStacks analysis on all stack objects including those which are
not spill slots.
2. Reorder objects to fill in gaps between objects.
e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 6e273277804b..1125d2c62bef 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -21,7 +21,7 @@
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 186ef577e31d..e492c481a540 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -39,7 +39,7 @@
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 351e91c932eb..69a879701fae 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -45,7 +45,7 @@
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f97732c1c49d..17f907eb07e8 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3988,10 +3988,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
return RAND;
// fold (and (or x, C), D) -> D if (C & D) == D
- if (N1C && N0.getOpcode() == ISD::OR)
- if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
- if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue()))
- return N1;
+ auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
+ return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
+ };
+ if (N0.getOpcode() == ISD::OR &&
+ matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
+ return N1;
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
@@ -4675,16 +4677,16 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
// iff (c1 & c2) != 0.
- if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) {
- if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) {
- if (SDValue COR =
- DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1))
- return DAG.getNode(
- ISD::AND, SDLoc(N), VT,
- DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
- return SDValue();
- }
+ auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
+ return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
+ };
+ if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
+ if (SDValue COR = DAG.FoldConstantArithmetic(
+ ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
+ SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
+ AddToWorklist(IOR.getNode());
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
}
}
@@ -5380,21 +5382,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
AddToWorklist(NotX.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
}
- // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
- if (N1C && N0.getOpcode() == ISD::XOR) {
- if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
- SDLoc DL(N);
- return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
- DAG.getConstant(N1C->getAPIntValue() ^
- N00C->getAPIntValue(), DL, VT));
- }
- if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
- SDLoc DL(N);
- return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
- DAG.getConstant(N1C->getAPIntValue() ^
- N01C->getAPIntValue(), DL, VT));
- }
- }
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -10201,7 +10188,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
case ISD::SETLT:
case ISD::SETLE:
std::swap(TrueOpnd, FalseOpnd);
- // Fall through
+ LLVM_FALLTHROUGH;
case ISD::SETOGT:
case ISD::SETUGT:
case ISD::SETOGE:
@@ -10555,7 +10542,7 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
// value in one SSE register, but instruction selection cannot handle
// FCOPYSIGN on SSE registers yet.
EVT N1VT = N1->getValueType(0);
- EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0).getValueType();
return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
}
return false;
@@ -13784,30 +13771,30 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
- if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
- if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
- !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
- ST->getMemoryVT() == ST1->getMemoryVT()) {
- // If this is a store followed by a store with the same value to the same
- // location, then the store is dead/noop.
- if (ST1->getValue() == Value) {
- // The store is dead, remove it.
- return Chain;
- }
-
- // If this is a store who's preceeding store to the same location
- // and no one other node is chained to that store we can effectively
- // drop the store. Do not remove stores to undef as they may be used as
- // data sinks.
- if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
- !ST1->getBasePtr().isUndef()) {
- // ST1 is fully overwritten and can be elided. Combine with it's chain
- // value.
+ // Deal with elidable overlapping chained stores.
+ if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain))
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed() &&
+ ST1->isUnindexed() && !ST1->isVolatile() && ST1->hasOneUse() &&
+ !ST1->getBasePtr().isUndef() && !ST->isVolatile()) {
+ BaseIndexOffset STBasePtr = BaseIndexOffset::match(ST->getBasePtr(), DAG);
+ BaseIndexOffset ST1BasePtr =
+ BaseIndexOffset::match(ST1->getBasePtr(), DAG);
+ unsigned STBytes = ST->getMemoryVT().getStoreSize();
+ unsigned ST1Bytes = ST1->getMemoryVT().getStoreSize();
+ int64_t PtrDiff;
+ // If this is a store who's preceeding store to a subset of the same
+ // memory and no one other node is chained to that store we can
+ // effectively drop the store. Do not remove stores to undef as they may
+ // be used as data sinks.
+
+ if (((ST->getBasePtr() == ST1->getBasePtr()) &&
+ (ST->getValue() == ST1->getValue())) ||
+ (STBasePtr.equalBaseIndex(ST1BasePtr, DAG, PtrDiff) &&
+ (0 <= PtrDiff) && (PtrDiff + ST1Bytes <= STBytes))) {
CombineTo(ST1, ST1->getChain());
- return SDValue();
+ return SDValue(N, 0);
}
}
- }
// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
// truncating store. We can do this even if this is already a truncstore.
@@ -15110,7 +15097,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
if (In->getOpcode() == ISD::BITCAST &&
- !In->getOperand(0)->getValueType(0).isVector()) {
+ !In->getOperand(0).getValueType().isVector()) {
SDValue Scalar = In->getOperand(0);
// If the bitcast type isn't legal, it might be a trunc of a legal type;
@@ -15157,7 +15144,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
bool FoundMinVT = false;
for (const SDValue &Op : N->ops())
if (ISD::BUILD_VECTOR == Op.getOpcode()) {
- EVT OpSVT = Op.getOperand(0)->getValueType(0);
+ EVT OpSVT = Op.getOperand(0).getValueType();
MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
FoundMinVT = true;
}
@@ -17418,43 +17405,6 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
return buildSqrtEstimateImpl(Op, Flags, false);
}
-/// Return true if base is a frame index, which is known not to alias with
-/// anything but itself. Provides base object and offset as results.
-static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
- const GlobalValue *&GV, const void *&CV) {
- // Assume it is a primitive operation.
- Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
-
- // If it's an adding a simple constant then integrate the offset.
- if (Base.getOpcode() == ISD::ADD) {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
- Base = Base.getOperand(0);
- Offset += C->getSExtValue();
- }
- }
-
- // Return the underlying GlobalValue, and update the Offset. Return false
- // for GlobalAddressSDNode since the same GlobalAddress may be represented
- // by multiple nodes with different offsets.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
- GV = G->getGlobal();
- Offset += G->getOffset();
- return false;
- }
-
- // Return the underlying Constant value, and update the Offset. Return false
- // for ConstantSDNodes since the same constant pool entry may be represented
- // by multiple nodes with different offsets.
- if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
- CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
- : (const void *)C->getConstVal();
- Offset += C->getOffset();
- return false;
- }
- // If it's any of the following then it can't alias with anything but itself.
- return isa<FrameIndexSDNode>(Base);
-}
-
/// Return true if there is any possibility that the two addresses overlap.
bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
// If they are the same then they must be aliases.
@@ -17496,39 +17446,18 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
return false;
}
- // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
- // modified to use BaseIndexOffset.
-
- // Gather base node and offset information.
- SDValue Base0, Base1;
- int64_t Offset0, Offset1;
- const GlobalValue *GV0, *GV1;
- const void *CV0, *CV1;
- bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(),
- Base0, Offset0, GV0, CV0);
- bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(),
- Base1, Offset1, GV1, CV1);
-
- // If they have the same base address, then check to see if they overlap.
- if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1)))
- return !((Offset0 + NumBytes0) <= Offset1 ||
- (Offset1 + NumBytes1) <= Offset0);
-
- // It is possible for different frame indices to alias each other, mostly
- // when tail call optimization reuses return address slots for arguments.
- // To catch this case, look up the actual index of frame indices to compute
- // the real alias relationship.
- if (IsFrameIndex0 && IsFrameIndex1) {
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex());
- Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
- return !((Offset0 + NumBytes0) <= Offset1 ||
- (Offset1 + NumBytes1) <= Offset0);
- }
-
- // Otherwise, if we know what the bases are, and they aren't identical, then
- // we know they cannot alias.
- if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1))
+ bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
+ bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
+ bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
+ bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
+ bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
+ bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
+
+ // If of mismatched base types or checkable indices we can check
+ // they do not alias.
+ if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
+ (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
+ (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
return false;
// If we know required SrcValue1 and SrcValue2 have relatively large alignment
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index eaf177d0661b..e28a3aa47ca3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1887,7 +1887,7 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) {
SDLoc DL(N);
SDValue Promoted = GetPromotedFloat(Val);
- EVT VT = ST->getOperand(1)->getValueType(0);
+ EVT VT = ST->getOperand(1).getValueType();
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
SDValue NewVal;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index b60d7bca498a..4438ee7878b8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -224,7 +224,7 @@ bool DAGTypeLegalizer::run() {
assert(N->getNodeId() == ReadyToProcess &&
"Node should be ready if on worklist!");
- DEBUG(dbgs() << "Legalizing node: "; N->dump());
+ DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG));
if (IgnoreNodeResults(N)) {
DEBUG(dbgs() << "Ignoring node results\n");
goto ScanOperands;
@@ -296,7 +296,7 @@ ScanOperands:
continue;
const auto Op = N->getOperand(i);
- DEBUG(dbgs() << "Analyzing operand: "; Op.dump());
+ DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG));
EVT OpVT = Op.getValueType();
switch (getTypeAction(OpVT)) {
case TargetLowering::TypeLegal:
@@ -445,7 +445,7 @@ NodeDone:
if (!isTypeLegal(Node.getValueType(i)) &&
!TLI.isTypeLegal(Node.getValueType(i))) {
dbgs() << "Result type " << i << " illegal: ";
- Node.dump();
+ Node.dump(&DAG);
Failed = true;
}
@@ -455,7 +455,7 @@ NodeDone:
!isTypeLegal(Node.getOperand(i).getValueType()) &&
!TLI.isTypeLegal(Node.getOperand(i).getValueType())) {
dbgs() << "Operand type " << i << " illegal: ";
- Node.getOperand(i).dump();
+ Node.getOperand(i).dump(&DAG);
Failed = true;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8f2320f52a0f..ce1c01b621f0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -331,7 +331,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
// At least try the common case where the boolean is generated by a
// comparison.
if (Cond->getOpcode() == ISD::SETCC) {
- EVT OpVT = Cond->getOperand(0)->getValueType(0);
+ EVT OpVT = Cond->getOperand(0).getValueType();
ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
VecBool = TLI.getBooleanContents(OpVT);
} else
@@ -1548,14 +1548,14 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
- if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+ if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
Res = SplitVecOp_TruncateHelper(N);
else
Res = SplitVecOp_UnaryOp(N);
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+ if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
Res = SplitVecOp_TruncateHelper(N);
else
Res = SplitVecOp_UnaryOp(N);
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 379f0dcef513..7f369c746d24 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -252,6 +252,7 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
if (!ResourcesModel->canReserveResources(&TII->get(
SU->getNode()->getMachineOpcode())))
return false;
+ break;
case TargetOpcode::EXTRACT_SUBREG:
case TargetOpcode::INSERT_SUBREG:
case TargetOpcode::SUBREG_TO_REG:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 12a21e74079e..a04c770c51c4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3750,6 +3750,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::TRUNCATE:
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::ABS:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 544da362be69..d5980919d03c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -37,6 +37,23 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
return true;
}
+ // Match Constants
+ if (auto *A = dyn_cast<ConstantPoolSDNode>(Base))
+ if (auto *B = dyn_cast<ConstantPoolSDNode>(Other.Base)) {
+ bool IsMatch =
+ A->isMachineConstantPoolEntry() == B->isMachineConstantPoolEntry();
+ if (IsMatch) {
+ if (A->isMachineConstantPoolEntry())
+ IsMatch = A->getMachineCPVal() == B->getMachineCPVal();
+ else
+ IsMatch = A->getConstVal() == B->getConstVal();
+ }
+ if (IsMatch) {
+ Off += B->getOffset() - A->getOffset();
+ return true;
+ }
+ }
+
const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
// Match non-equal FrameIndexes - If both frame indices are fixed
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 18f6997ef83c..d13ccc263718 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -3117,7 +3117,16 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
}
case OPC_RecordMemRef:
- MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
+ if (auto *MN = dyn_cast<MemSDNode>(N))
+ MatchedMemRefs.push_back(MN->getMemOperand());
+ else {
+ DEBUG(
+ dbgs() << "Expected MemSDNode ";
+ N->dump(CurDAG);
+ dbgs() << '\n'
+ );
+ }
+
continue;
case OPC_CaptureGlueInput:
@@ -3563,7 +3572,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
Ops.push_back(InputGlue);
// Create the node.
- SDNode *Res = nullptr;
+ MachineSDNode *Res = nullptr;
bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo ||
(Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2);
if (!IsMorphNodeTo) {
@@ -3589,7 +3598,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
"Chain node replaced during MorphNode");
Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end());
});
- Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo);
+ Res = cast<MachineSDNode>(MorphNode(NodeToMatch, TargetOpc, VTList,
+ Ops, EmitNodeInfo));
}
// If the node had chain/glue results, update our notion of the current
@@ -3645,13 +3655,19 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
}
}
- cast<MachineSDNode>(Res)
- ->setMemRefs(MemRefs, MemRefs + NumMemRefs);
+ Res->setMemRefs(MemRefs, MemRefs + NumMemRefs);
}
- DEBUG(dbgs() << " "
- << (IsMorphNodeTo ? "Morphed" : "Created")
- << " node: "; Res->dump(CurDAG); dbgs() << "\n");
+ DEBUG(
+ if (!MatchedMemRefs.empty() && Res->memoperands_empty())
+ dbgs() << " Dropping mem operands\n";
+ dbgs() << " "
+ << (IsMorphNodeTo ? "Morphed" : "Created")
+ << " node: ";
+ Res->dump(CurDAG);
+
+ dbgs() << '\n';
+ );
// If this was a MorphNodeTo then we're completely done!
if (IsMorphNodeTo) {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 58276052c10b..d76e52d78870 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3812,7 +3812,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));
- return DAG.getNode(ISD::ADD, dl, IdxVT, Index, VecPtr);
+ return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 62f662d1ade4..8fc7a4a32842 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -16,7 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 543c12eebb45..224ae1a3236a 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -89,6 +89,21 @@ static cl::opt<unsigned> OptsizeJumpTableDensity(
cl::desc("Minimum density for building a jump table in "
"an optsize function"));
+static bool darwinHasSinCos(const Triple &TT) {
+ assert(TT.isOSDarwin() && "should be called with darwin triple");
+ // Don't bother with 32 bit x86.
+ if (TT.getArch() == Triple::x86)
+ return false;
+ // Macos < 10.9 has no sincos_stret.
+ if (TT.isMacOSX())
+ return !TT.isMacOSXVersionLT(10, 9) && TT.isArch64Bit();
+ // iOS < 7.0 has no sincos_stret.
+ if (TT.isiOS())
+ return !TT.isOSVersionLT(7, 0);
+ // Any other darwin such as WatchOS/TvOS is new enough.
+ return true;
+}
+
// Although this default value is arbitrary, it is not random. It is assumed
// that a condition that evaluates the same way by a higher percentage than this
// is best represented as control flow. Therefore, the default value N should be
@@ -100,44 +115,56 @@ static cl::opt<int> MinPercentageForPredictableBranch(
"or false to assume that the condition is predictable"),
cl::Hidden);
-/// InitLibcallNames - Set default libcall names.
-static void InitLibcallNames(const char **Names, const Triple &TT) {
+void TargetLoweringBase::InitLibcalls(const Triple &TT) {
#define HANDLE_LIBCALL(code, name) \
- Names[RTLIB::code] = name;
+ setLibcallName(RTLIB::code, name);
#include "llvm/CodeGen/RuntimeLibcalls.def"
#undef HANDLE_LIBCALL
+ // Initialize calling conventions to their default.
+ for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
+ setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C);
// A few names are different on particular architectures or environments.
if (TT.isOSDarwin()) {
// For f16/f32 conversions, Darwin uses the standard naming scheme, instead
// of the gnueabi-style __gnu_*_ieee.
// FIXME: What about other targets?
- Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2";
- Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2";
+ setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
+ setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
+
+ // Darwin 10 and higher has an optimized __bzero.
+ if (!TT.isMacOSX() || !TT.isMacOSXVersionLT(10, 6) || TT.isArch64Bit()) {
+ setLibcallName(RTLIB::BZERO, TT.isAArch64() ? "bzero" : "__bzero");
+ }
+
+ if (darwinHasSinCos(TT)) {
+ setLibcallName(RTLIB::SINCOS_STRET_F32, "__sincosf_stret");
+ setLibcallName(RTLIB::SINCOS_STRET_F64, "__sincos_stret");
+ if (TT.isWatchABI()) {
+ setLibcallCallingConv(RTLIB::SINCOS_STRET_F32,
+ CallingConv::ARM_AAPCS_VFP);
+ setLibcallCallingConv(RTLIB::SINCOS_STRET_F64,
+ CallingConv::ARM_AAPCS_VFP);
+ }
+ }
} else {
- Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
- Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+ setLibcallName(RTLIB::FPEXT_F16_F32, "__gnu_h2f_ieee");
+ setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee");
}
if (TT.isGNUEnvironment() || TT.isOSFuchsia()) {
- Names[RTLIB::SINCOS_F32] = "sincosf";
- Names[RTLIB::SINCOS_F64] = "sincos";
- Names[RTLIB::SINCOS_F80] = "sincosl";
- Names[RTLIB::SINCOS_F128] = "sincosl";
- Names[RTLIB::SINCOS_PPCF128] = "sincosl";
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ setLibcallName(RTLIB::SINCOS_F80, "sincosl");
+ setLibcallName(RTLIB::SINCOS_F128, "sincosl");
+ setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl");
}
if (TT.isOSOpenBSD()) {
- Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr;
+ setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr);
}
}
-/// Set default libcall CallingConvs.
-static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
- for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
- CCs[LC] = CallingConv::C;
-}
-
/// getFPEXT - Return the FPEXT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
@@ -524,9 +551,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr);
- InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple());
+ InitLibcalls(TM.getTargetTriple());
InitCmpLibcallCCs(CmpLibcallCCs);
- InitLibcallCallingConvs(LibcallCallingConvs);
}
void TargetLoweringBase::initActions() {
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 64bb37a280a6..13f7e83f3dd0 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -22,7 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 17f29737bf93..6a6b7fc6fc20 100644
--- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -83,6 +83,7 @@ bool DWARFAcceleratorTable::validateForms() {
!FormValue.isFormClass(DWARFFormValue::FC_Flag)) ||
FormValue.getForm() == dwarf::DW_FORM_sdata)
return false;
+ break;
default:
break;
}
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index a5defa90eb35..eb23ca8229a3 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -88,70 +88,101 @@ static void dumpUUID(raw_ostream &OS, const ObjectFile &Obj) {
}
}
-static void
-dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName,
- const DWARFObject &Obj,
- const DWARFSection &StringOffsetsSection,
- StringRef StringSection, bool LittleEndian) {
+using ContributionCollection =
+ std::vector<Optional<StrOffsetsContributionDescriptor>>;
+
+// Collect all the contributions to the string offsets table from all units,
+// sort them by their starting offsets and remove duplicates.
+static ContributionCollection
+collectContributionData(DWARFContext::cu_iterator_range CUs,
+ DWARFContext::tu_section_iterator_range TUSs) {
+ ContributionCollection Contributions;
+ for (const auto &CU : CUs)
+ Contributions.push_back(CU->getStringOffsetsTableContribution());
+ for (const auto &TUS : TUSs)
+ for (const auto &TU : TUS)
+ Contributions.push_back(TU->getStringOffsetsTableContribution());
+
+ // Sort the contributions so that any invalid ones are placed at
+ // the start of the contributions vector. This way they are reported
+ // first.
+ std::sort(Contributions.begin(), Contributions.end(),
+ [](const Optional<StrOffsetsContributionDescriptor> &L,
+ const Optional<StrOffsetsContributionDescriptor> &R) {
+ if (L && R) return L->Base < R->Base;
+ return R.hasValue();
+ });
+
+ // Uniquify contributions, as it is possible that units (specifically
+ // type units in dwo or dwp files) share contributions. We don't want
+ // to report them more than once.
+ Contributions.erase(
+ std::unique(Contributions.begin(), Contributions.end(),
+ [](const Optional<StrOffsetsContributionDescriptor> &L,
+ const Optional<StrOffsetsContributionDescriptor> &R) {
+ if (L && R)
+ return L->Base == R->Base && L->Size == R->Size;
+ return false;
+ }),
+ Contributions.end());
+ return Contributions;
+}
+
+static void dumpDWARFv5StringOffsetsSection(
+ raw_ostream &OS, StringRef SectionName, const DWARFObject &Obj,
+ const DWARFSection &StringOffsetsSection, StringRef StringSection,
+ DWARFContext::cu_iterator_range CUs,
+ DWARFContext::tu_section_iterator_range TUSs, bool LittleEndian) {
+ auto Contributions = collectContributionData(CUs, TUSs);
DWARFDataExtractor StrOffsetExt(Obj, StringOffsetsSection, LittleEndian, 0);
- uint32_t Offset = 0;
+ DataExtractor StrData(StringSection, LittleEndian, 0);
uint64_t SectionSize = StringOffsetsSection.Data.size();
-
- while (Offset < SectionSize) {
- unsigned Version = 0;
- DwarfFormat Format = DWARF32;
- unsigned EntrySize = 4;
- // Perform validation and extract the segment size from the header.
- if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 4)) {
+ uint32_t Offset = 0;
+ for (auto &Contribution : Contributions) {
+ // Report an ill-formed contribution.
+ if (!Contribution) {
OS << "error: invalid contribution to string offsets table in section ."
<< SectionName << ".\n";
return;
}
- uint32_t ContributionStart = Offset;
- uint64_t ContributionSize = StrOffsetExt.getU32(&Offset);
- // A contribution size of 0xffffffff indicates DWARF64, with the actual size
- // in the following 8 bytes. Otherwise, the DWARF standard mandates that
- // the contribution size must be at most 0xfffffff0.
- if (ContributionSize == 0xffffffff) {
- if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 8)) {
- OS << "error: invalid contribution to string offsets table in section ."
- << SectionName << ".\n";
- return;
- }
- Format = DWARF64;
- EntrySize = 8;
- ContributionSize = StrOffsetExt.getU64(&Offset);
- } else if (ContributionSize > 0xfffffff0) {
- OS << "error: invalid contribution to string offsets table in section ."
+
+ dwarf::DwarfFormat Format = Contribution->getFormat();
+ uint16_t Version = Contribution->getVersion();
+ uint64_t ContributionHeader = Contribution->Base;
+ // In DWARF v5 there is a contribution header that immediately precedes
+ // the string offsets base (the location we have previously retrieved from
+ // the CU DIE's DW_AT_str_offsets attribute). The header is located either
+ // 8 or 16 bytes before the base, depending on the contribution's format.
+ if (Version >= 5)
+ ContributionHeader -= Format == DWARF32 ? 8 : 16;
+
+ // Detect overlapping contributions.
+ if (Offset > ContributionHeader) {
+ OS << "error: overlapping contributions to string offsets table in "
+ "section ."
<< SectionName << ".\n";
return;
}
-
- // We must ensure that we don't read a partial record at the end, so we
- // validate for a multiple of EntrySize. Also, we're expecting a version
- // number and padding, which adds an additional 4 bytes.
- uint64_t ValidationSize =
- 4 + ((ContributionSize + EntrySize - 1) & (-(uint64_t)EntrySize));
- if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, ValidationSize)) {
- OS << "error: contribution to string offsets table in section ."
- << SectionName << " has invalid length.\n";
- return;
+ // Report a gap in the table.
+ if (Offset < ContributionHeader) {
+ OS << format("0x%8.8x: Gap, length = ", Offset);
+ OS << (ContributionHeader - Offset) << "\n";
}
-
- Version = StrOffsetExt.getU16(&Offset);
- Offset += 2;
- OS << format("0x%8.8x: ", ContributionStart);
- OS << "Contribution size = " << ContributionSize
+ OS << format("0x%8.8x: ", (uint32_t)ContributionHeader);
+ OS << "Contribution size = " << Contribution->Size
+ << ", Format = " << (Format == DWARF32 ? "DWARF32" : "DWARF64")
<< ", Version = " << Version << "\n";
- uint32_t ContributionBase = Offset;
- DataExtractor StrData(StringSection, LittleEndian, 0);
- while (Offset - ContributionBase < ContributionSize) {
+ Offset = Contribution->Base;
+ unsigned EntrySize = Contribution->getDwarfOffsetByteSize();
+ while (Offset - Contribution->Base < Contribution->Size) {
OS << format("0x%8.8x: ", Offset);
- // FIXME: We can only extract strings in DWARF32 format at the moment.
+ // FIXME: We can only extract strings if the offset fits in 32 bits.
uint64_t StringOffset =
StrOffsetExt.getRelocatedValue(EntrySize, &Offset);
- if (Format == DWARF32) {
+ // Extract the string if we can and display it. Otherwise just report
+ // the offset.
+ if (StringOffset <= std::numeric_limits<uint32_t>::max()) {
uint32_t StringOffset32 = (uint32_t)StringOffset;
OS << format("%8.8x ", StringOffset32);
const char *S = StrData.getCStr(&StringOffset32);
@@ -162,6 +193,11 @@ dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName,
OS << "\n";
}
}
+ // Report a gap at the end of the table.
+ if (Offset < SectionSize) {
+ OS << format("0x%8.8x: Gap, length = ", Offset);
+ OS << (SectionSize - Offset) << "\n";
+ }
}
// Dump a DWARF string offsets section. This may be a DWARF v5 formatted
@@ -170,17 +206,18 @@ dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName,
// a header containing size and version number. Alternatively, it may be a
// monolithic series of string offsets, as generated by the pre-DWARF v5
// implementation of split DWARF.
-static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName,
- const DWARFObject &Obj,
- const DWARFSection &StringOffsetsSection,
- StringRef StringSection, bool LittleEndian,
- unsigned MaxVersion) {
+static void dumpStringOffsetsSection(
+ raw_ostream &OS, StringRef SectionName, const DWARFObject &Obj,
+ const DWARFSection &StringOffsetsSection, StringRef StringSection,
+ DWARFContext::cu_iterator_range CUs,
+ DWARFContext::tu_section_iterator_range TUSs, bool LittleEndian,
+ unsigned MaxVersion) {
// If we have at least one (compile or type) unit with DWARF v5 or greater,
// we assume that the section is formatted like a DWARF v5 string offsets
// section.
if (MaxVersion >= 5)
dumpDWARFv5StringOffsetsSection(OS, SectionName, Obj, StringOffsetsSection,
- StringSection, LittleEndian);
+ StringSection, CUs, TUSs, LittleEndian);
else {
DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0);
uint32_t offset = 0;
@@ -468,12 +505,14 @@ void DWARFContext::dump(
DObj->getStringOffsetSection().Data))
dumpStringOffsetsSection(
OS, "debug_str_offsets", *DObj, DObj->getStringOffsetSection(),
- DObj->getStringSection(), isLittleEndian(), getMaxVersion());
+ DObj->getStringSection(), compile_units(), type_unit_sections(),
+ isLittleEndian(), getMaxVersion());
if (shouldDump(ExplicitDWO, ".debug_str_offsets.dwo", DIDT_ID_DebugStrOffsets,
DObj->getStringOffsetDWOSection().Data))
dumpStringOffsetsSection(
OS, "debug_str_offsets.dwo", *DObj, DObj->getStringOffsetDWOSection(),
- DObj->getStringDWOSection(), isLittleEndian(), getMaxVersion());
+ DObj->getStringDWOSection(), dwo_compile_units(),
+ dwo_type_unit_sections(), isLittleEndian(), getMaxVersion());
if (shouldDump(Explicit, ".gnu_index", DIDT_ID_GdbIndex,
DObj->getGdbIndexSection())) {
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index c3d8ff2cbc29..df55d7debf92 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
@@ -79,8 +80,10 @@ bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index,
bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index,
uint64_t &Result) const {
- unsigned ItemSize = getDwarfOffsetByteSize();
- uint32_t Offset = StringOffsetSectionBase + Index * ItemSize;
+ if (!StringOffsetsTableContribution)
+ return false;
+ unsigned ItemSize = getDwarfStringOffsetsByteSize();
+ uint32_t Offset = getStringOffsetsBase() + Index * ItemSize;
if (StringOffsetSection.Data.size() < Offset + ItemSize)
return false;
DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
@@ -251,15 +254,28 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
}
- // In general, we derive the offset of the unit's contibution to the
- // debug_str_offsets{.dwo} section from the unit DIE's
- // DW_AT_str_offsets_base attribute. In dwp files we add to it the offset
- // we get from the index table.
- StringOffsetSectionBase =
- toSectionOffset(UnitDie.find(DW_AT_str_offsets_base), 0);
+ // In general, in DWARF v5 and beyond we derive the start of the unit's
+ // contribution to the string offsets table from the unit DIE's
+ // DW_AT_str_offsets_base attribute. Split DWARF units do not use this
+ // attribute, so we assume that there is a contribution to the string
+ // offsets table starting at offset 0 of the debug_str_offsets.dwo section.
+ // In both cases we need to determine the format of the contribution,
+ // which may differ from the unit's format.
+ uint64_t StringOffsetsContributionBase =
+ isDWO ? 0 : toSectionOffset(UnitDie.find(DW_AT_str_offsets_base), 0);
if (IndexEntry)
if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS))
- StringOffsetSectionBase += C->Offset;
+ StringOffsetsContributionBase += C->Offset;
+
+ DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
+ isLittleEndian, 0);
+ if (isDWO)
+ StringOffsetsTableContribution =
+ determineStringOffsetsTableContributionDWO(
+ DA, StringOffsetsContributionBase);
+ else if (getVersion() >= 5)
+ StringOffsetsTableContribution = determineStringOffsetsTableContribution(
+ DA, StringOffsetsContributionBase);
// Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
// skeleton CU DIE, so that DWARF users not aware of it are not broken.
@@ -344,45 +360,378 @@ void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) {
clearDIEs(true);
}
-void DWARFUnit::updateAddressDieMap(DWARFDie Die) {
- if (Die.isSubroutineDIE()) {
+// Populates a map from PC addresses to subprogram DIEs.
+//
+// This routine tries to look at the smallest amount of the debug info it can
+// to locate the DIEs. This is because many subprograms will never end up being
+// read or needed at all. We want to be as lazy as possible.
+void DWARFUnit::buildSubprogramDIEAddrMap() {
+ assert(SubprogramDIEAddrMap.empty() && "Must only build this map once!");
+ SmallVector<DWARFDie, 16> Worklist;
+ Worklist.push_back(getUnitDIE());
+ do {
+ DWARFDie Die = Worklist.pop_back_val();
+
+ // Queue up child DIEs to recurse through.
+ // FIXME: This causes us to read a lot more debug info than we really need.
+ // We should look at pruning out DIEs which cannot transitively hold
+ // separate subprograms.
+ for (DWARFDie Child : Die.children())
+ Worklist.push_back(Child);
+
+ // If handling a non-subprogram DIE, nothing else to do.
+ if (!Die.isSubprogramDIE())
+ continue;
+
+ // For subprogram DIEs, store them, and insert relevant markers into the
+ // address map. We don't care about overlap at all here as DWARF doesn't
+ // meaningfully support that, so we simply will insert a range with no DIE
+ // starting from the high PC. In the event there are overlaps, sorting
+ // these may truncate things in surprising ways but still will allow
+ // lookups to proceed.
+ int DIEIndex = SubprogramDIEAddrInfos.size();
+ SubprogramDIEAddrInfos.push_back({Die, (uint64_t)-1, {}});
for (const auto &R : Die.getAddressRanges()) {
// Ignore 0-sized ranges.
if (R.LowPC == R.HighPC)
continue;
- auto B = AddrDieMap.upper_bound(R.LowPC);
- if (B != AddrDieMap.begin() && R.LowPC < (--B)->second.first) {
- // The range is a sub-range of existing ranges, we need to split the
- // existing range.
- if (R.HighPC < B->second.first)
- AddrDieMap[R.HighPC] = B->second;
- if (R.LowPC > B->first)
- AddrDieMap[B->first].first = R.LowPC;
+
+ SubprogramDIEAddrMap.push_back({R.LowPC, DIEIndex});
+ SubprogramDIEAddrMap.push_back({R.HighPC, -1});
+
+ if (R.LowPC < SubprogramDIEAddrInfos.back().SubprogramBasePC)
+ SubprogramDIEAddrInfos.back().SubprogramBasePC = R.LowPC;
+ }
+ } while (!Worklist.empty());
+
+ if (SubprogramDIEAddrMap.empty()) {
+ // If we found no ranges, create a no-op map so that lookups remain simple
+ // but never find anything.
+ SubprogramDIEAddrMap.push_back({0, -1});
+ return;
+ }
+
+ // Next, sort the ranges and remove both exact duplicates and runs with the
+ // same DIE index. We order the ranges so that non-empty ranges are
+ // preferred. Because there may be ties, we also need to use stable sort.
+ std::stable_sort(SubprogramDIEAddrMap.begin(), SubprogramDIEAddrMap.end(),
+ [](const std::pair<uint64_t, int64_t> &LHS,
+ const std::pair<uint64_t, int64_t> &RHS) {
+ if (LHS.first < RHS.first)
+ return true;
+ if (LHS.first > RHS.first)
+ return false;
+
+ // For ranges that start at the same address, keep the one
+ // with a DIE.
+ if (LHS.second != -1 && RHS.second == -1)
+ return true;
+
+ return false;
+ });
+ SubprogramDIEAddrMap.erase(
+ std::unique(SubprogramDIEAddrMap.begin(), SubprogramDIEAddrMap.end(),
+ [](const std::pair<uint64_t, int64_t> &LHS,
+ const std::pair<uint64_t, int64_t> &RHS) {
+ // If the start addresses are exactly the same, we can
+ // remove all but the first one as it is the only one that
+ // will be found and used.
+ //
+ // If the DIE indices are the same, we can "merge" the
+ // ranges by eliminating the second.
+ return LHS.first == RHS.first || LHS.second == RHS.second;
+ }),
+ SubprogramDIEAddrMap.end());
+
+ assert(SubprogramDIEAddrMap.back().second == -1 &&
+ "The last interval must not have a DIE as each DIE's address range is "
+ "bounded.");
+}
+
+// Build the second level of mapping from PC to DIE, specifically one that maps
+// a PC *within* a particular DWARF subprogram into a precise, maximally nested
+// inlined subroutine DIE (if any exists). We build a separate map for each
+// subprogram because many subprograms will never get queried for an address
+// and this allows us to be significantly lazier in reading the DWARF itself.
+void DWARFUnit::buildInlinedSubroutineDIEAddrMap(
+ SubprogramDIEAddrInfo &SPInfo) {
+ auto &AddrMap = SPInfo.InlinedSubroutineDIEAddrMap;
+ uint64_t BasePC = SPInfo.SubprogramBasePC;
+
+ auto SubroutineAddrMapSorter = [](const std::pair<int, int> &LHS,
+ const std::pair<int, int> &RHS) {
+ if (LHS.first < RHS.first)
+ return true;
+ if (LHS.first > RHS.first)
+ return false;
+
+ // For ranges that start at the same address, keep the
+ // non-empty one.
+ if (LHS.second != -1 && RHS.second == -1)
+ return true;
+
+ return false;
+ };
+ auto SubroutineAddrMapUniquer = [](const std::pair<int, int> &LHS,
+ const std::pair<int, int> &RHS) {
+ // If the start addresses are exactly the same, we can
+ // remove all but the first one as it is the only one that
+ // will be found and used.
+ //
+ // If the DIE indices are the same, we can "merge" the
+ // ranges by eliminating the second.
+ return LHS.first == RHS.first || LHS.second == RHS.second;
+ };
+
+ struct DieAndParentIntervalRange {
+ DWARFDie Die;
+ int ParentIntervalsBeginIdx, ParentIntervalsEndIdx;
+ };
+
+ SmallVector<DieAndParentIntervalRange, 16> Worklist;
+ auto EnqueueChildDIEs = [&](const DWARFDie &Die, int ParentIntervalsBeginIdx,
+ int ParentIntervalsEndIdx) {
+ for (DWARFDie Child : Die.children())
+ Worklist.push_back(
+ {Child, ParentIntervalsBeginIdx, ParentIntervalsEndIdx});
+ };
+ EnqueueChildDIEs(SPInfo.SubprogramDIE, 0, 0);
+ while (!Worklist.empty()) {
+ DWARFDie Die = Worklist.back().Die;
+ int ParentIntervalsBeginIdx = Worklist.back().ParentIntervalsBeginIdx;
+ int ParentIntervalsEndIdx = Worklist.back().ParentIntervalsEndIdx;
+ Worklist.pop_back();
+
+ // If we encounter a nested subprogram, simply ignore it. We map to
+ // (disjoint) subprograms before arriving here and we don't want to examine
+ // any inlined subroutines of an unrelated subpragram.
+ if (Die.getTag() == DW_TAG_subprogram)
+ continue;
+
+ // For non-subroutines, just recurse to keep searching for inlined
+ // subroutines.
+ if (Die.getTag() != DW_TAG_inlined_subroutine) {
+ EnqueueChildDIEs(Die, ParentIntervalsBeginIdx, ParentIntervalsEndIdx);
+ continue;
+ }
+
+ // Capture the inlined subroutine DIE that we will reference from the map.
+ int DIEIndex = InlinedSubroutineDIEs.size();
+ InlinedSubroutineDIEs.push_back(Die);
+
+ int DieIntervalsBeginIdx = AddrMap.size();
+ // First collect the PC ranges for this DIE into our subroutine interval
+ // map.
+ for (auto R : Die.getAddressRanges()) {
+ // Clamp the PCs to be above the base.
+ R.LowPC = std::max(R.LowPC, BasePC);
+ R.HighPC = std::max(R.HighPC, BasePC);
+ // Compute relative PCs from the subprogram base and drop down to an
+ // unsigned 32-bit int to represent them within the data structure. This
+ // lets us cover a 4gb single subprogram. Because subprograms may be
+ // partitioned into distant parts of a binary (think hot/cold
+ // partitioning) we want to preserve as much as we can here without
+ // burning extra memory. Past that, we will simply truncate and lose the
+ // ability to map those PCs to a DIE more precise than the subprogram.
+ const uint32_t MaxRelativePC = std::numeric_limits<uint32_t>::max();
+ uint32_t RelativeLowPC = (R.LowPC - BasePC) > (uint64_t)MaxRelativePC
+ ? MaxRelativePC
+ : (uint32_t)(R.LowPC - BasePC);
+ uint32_t RelativeHighPC = (R.HighPC - BasePC) > (uint64_t)MaxRelativePC
+ ? MaxRelativePC
+ : (uint32_t)(R.HighPC - BasePC);
+ // Ignore empty or bogus ranges.
+ if (RelativeLowPC >= RelativeHighPC)
+ continue;
+ AddrMap.push_back({RelativeLowPC, DIEIndex});
+ AddrMap.push_back({RelativeHighPC, -1});
+ }
+
+ // If there are no address ranges, there is nothing to do to map into them
+ // and there cannot be any child subroutine DIEs with address ranges of
+ // interest as those would all be required to nest within this DIE's
+ // non-existent ranges, so we can immediately continue to the next DIE in
+ // the worklist.
+ if (DieIntervalsBeginIdx == (int)AddrMap.size())
+ continue;
+
+ // The PCs from this DIE should never overlap, so we can easily sort them
+ // here.
+ std::sort(AddrMap.begin() + DieIntervalsBeginIdx, AddrMap.end(),
+ SubroutineAddrMapSorter);
+ // Remove any dead ranges. These should only come from "empty" ranges that
+ // were clobbered by some other range.
+ AddrMap.erase(std::unique(AddrMap.begin() + DieIntervalsBeginIdx,
+ AddrMap.end(), SubroutineAddrMapUniquer),
+ AddrMap.end());
+
+ // Compute the end index of this DIE's addr map intervals.
+ int DieIntervalsEndIdx = AddrMap.size();
+
+ assert(DieIntervalsBeginIdx != DieIntervalsEndIdx &&
+ "Must not have an empty map for this layer!");
+ assert(AddrMap.back().second == -1 && "Must end with an empty range!");
+ assert(std::is_sorted(AddrMap.begin() + DieIntervalsBeginIdx, AddrMap.end(),
+ less_first()) &&
+ "Failed to sort this DIE's interals!");
+
+ // If we have any parent intervals, walk the newly added ranges and find
+ // the parent ranges they were inserted into. Both of these are sorted and
+ // neither has any overlaps. We need to append new ranges to split up any
+ // parent ranges these new ranges would overlap when we merge them.
+ if (ParentIntervalsBeginIdx != ParentIntervalsEndIdx) {
+ int ParentIntervalIdx = ParentIntervalsBeginIdx;
+ for (int i = DieIntervalsBeginIdx, e = DieIntervalsEndIdx - 1; i < e;
+ ++i) {
+ const uint32_t IntervalStart = AddrMap[i].first;
+ const uint32_t IntervalEnd = AddrMap[i + 1].first;
+ const int IntervalDieIdx = AddrMap[i].second;
+ if (IntervalDieIdx == -1) {
+ // For empty intervals, nothing is required. This is a bit surprising
+ // however. If the prior interval overlaps a parent interval and this
+ // would be necessary to mark the end, we will synthesize a new end
+ // that switches back to the parent DIE below. And this interval will
+ // get dropped in favor of one with a DIE attached. However, we'll
+ // still include this and so worst-case, it will still end the prior
+ // interval.
+ continue;
+ }
+
+ // We are walking the new ranges in order, so search forward from the
+ // last point for a parent range that might overlap.
+ auto ParentIntervalsRange =
+ make_range(AddrMap.begin() + ParentIntervalIdx,
+ AddrMap.begin() + ParentIntervalsEndIdx);
+ assert(std::is_sorted(ParentIntervalsRange.begin(),
+ ParentIntervalsRange.end(), less_first()) &&
+ "Unsorted parent intervals can't be searched!");
+ auto PI = std::upper_bound(
+ ParentIntervalsRange.begin(), ParentIntervalsRange.end(),
+ IntervalStart,
+ [](uint32_t LHS, const std::pair<uint32_t, int32_t> &RHS) {
+ return LHS < RHS.first;
+ });
+ if (PI == ParentIntervalsRange.begin() ||
+ PI == ParentIntervalsRange.end())
+ continue;
+
+ ParentIntervalIdx = PI - AddrMap.begin();
+ int32_t &ParentIntervalDieIdx = std::prev(PI)->second;
+ uint32_t &ParentIntervalStart = std::prev(PI)->first;
+ const uint32_t ParentIntervalEnd = PI->first;
+
+ // If the new range starts exactly at the position of the parent range,
+ // we need to adjust the parent range. Note that these collisions can
+ // only happen with the original parent range because we will merge any
+ // adjacent ranges in the child.
+ if (IntervalStart == ParentIntervalStart) {
+ // If there will be a tail, just shift the start of the parent
+ // forward. Note that this cannot change the parent ordering.
+ if (IntervalEnd < ParentIntervalEnd) {
+ ParentIntervalStart = IntervalEnd;
+ continue;
+ }
+ // Otherwise, mark this as becoming empty so we'll remove it and
+ // prefer the child range.
+ ParentIntervalDieIdx = -1;
+ continue;
+ }
+
+ // Finally, if the parent interval will need to remain as a prefix to
+ // this one, insert a new interval to cover any tail.
+ if (IntervalEnd < ParentIntervalEnd)
+ AddrMap.push_back({IntervalEnd, ParentIntervalDieIdx});
}
- AddrDieMap[R.LowPC] = std::make_pair(R.HighPC, Die);
}
+
+ // Note that we don't need to re-sort even this DIE's address map intervals
+ // after this. All of the newly added intervals actually fill in *gaps* in
+ // this DIE's address map, and we know that children won't need to lookup
+ // into those gaps.
+
+ // Recurse through its children, giving them the interval map range of this
+ // DIE to use as their parent intervals.
+ EnqueueChildDIEs(Die, DieIntervalsBeginIdx, DieIntervalsEndIdx);
+ }
+
+ if (AddrMap.empty()) {
+ AddrMap.push_back({0, -1});
+ return;
}
- // Parent DIEs are added to the AddrDieMap prior to the Children DIEs to
- // simplify the logic to update AddrDieMap. The child's range will always
- // be equal or smaller than the parent's range. With this assumption, when
- // adding one range into the map, it will at most split a range into 3
- // sub-ranges.
- for (DWARFDie Child = Die.getFirstChild(); Child; Child = Child.getSibling())
- updateAddressDieMap(Child);
+
+ // Now that we've added all of the intervals needed, we need to resort and
+ // unique them. Most notably, this will remove all the empty ranges that had
+ // a parent range covering, etc. We only expect a single non-empty interval
+ // at any given start point, so we just use std::sort. This could potentially
+ // produce non-deterministic maps for invalid DWARF.
+ std::sort(AddrMap.begin(), AddrMap.end(), SubroutineAddrMapSorter);
+ AddrMap.erase(
+ std::unique(AddrMap.begin(), AddrMap.end(), SubroutineAddrMapUniquer),
+ AddrMap.end());
}
DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) {
extractDIEsIfNeeded(false);
- if (AddrDieMap.empty())
- updateAddressDieMap(getUnitDIE());
- auto R = AddrDieMap.upper_bound(Address);
- if (R == AddrDieMap.begin())
+
+ // We use a two-level mapping structure to locate subroutines for a given PC
+ // address.
+ //
+ // First, we map the address to a subprogram. This can be done more cheaply
+ // because subprograms cannot nest within each other. It also allows us to
+ // avoid detailed examination of many subprograms, instead only focusing on
+ // the ones which we end up actively querying.
+ if (SubprogramDIEAddrMap.empty())
+ buildSubprogramDIEAddrMap();
+
+ assert(!SubprogramDIEAddrMap.empty() &&
+ "We must always end up with a non-empty map!");
+
+ auto I = std::upper_bound(
+ SubprogramDIEAddrMap.begin(), SubprogramDIEAddrMap.end(), Address,
+ [](uint64_t LHS, const std::pair<uint64_t, int64_t> &RHS) {
+ return LHS < RHS.first;
+ });
+ // If we find the beginning, then the address is before the first subprogram.
+ if (I == SubprogramDIEAddrMap.begin())
return DWARFDie();
- // upper_bound's previous item contains Address.
- --R;
- if (Address >= R->second.first)
+ // Back up to the interval containing the address and see if it
+ // has a DIE associated with it.
+ --I;
+ if (I->second == -1)
return DWARFDie();
- return R->second.second;
+
+ auto &SPInfo = SubprogramDIEAddrInfos[I->second];
+
+ // Now that we have the subprogram for this address, we do the second level
+ // mapping by building a map within a subprogram's PC range to any specific
+ // inlined subroutine.
+ if (SPInfo.InlinedSubroutineDIEAddrMap.empty())
+ buildInlinedSubroutineDIEAddrMap(SPInfo);
+
+ // We lookup within the inlined subroutine using a subprogram-relative
+ // address.
+ assert(Address >= SPInfo.SubprogramBasePC &&
+ "Address isn't above the start of the subprogram!");
+ uint32_t RelativeAddr = ((Address - SPInfo.SubprogramBasePC) >
+ (uint64_t)std::numeric_limits<uint32_t>::max())
+ ? std::numeric_limits<uint32_t>::max()
+ : (uint32_t)(Address - SPInfo.SubprogramBasePC);
+
+ auto J =
+ std::upper_bound(SPInfo.InlinedSubroutineDIEAddrMap.begin(),
+ SPInfo.InlinedSubroutineDIEAddrMap.end(), RelativeAddr,
+ [](uint32_t LHS, const std::pair<uint32_t, int32_t> &RHS) {
+ return LHS < RHS.first;
+ });
+ // If we find the beginning, the address is before any inlined subroutine so
+ // return the subprogram DIE.
+ if (J == SPInfo.InlinedSubroutineDIEAddrMap.begin())
+ return SPInfo.SubprogramDIE;
+ // Back up `J` and return the inlined subroutine if we have one or the
+ // subprogram if we don't.
+ --J;
+ return J->second == -1 ? SPInfo.SubprogramDIE
+ : InlinedSubroutineDIEs[J->second];
}
void
@@ -466,3 +815,89 @@ const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset);
return Abbrevs;
}
+
+Optional<StrOffsetsContributionDescriptor>
+StrOffsetsContributionDescriptor::validateContributionSize(
+ DWARFDataExtractor &DA) {
+ uint8_t EntrySize = getDwarfOffsetByteSize();
+ // In order to ensure that we don't read a partial record at the end of
+ // the section we validate for a multiple of the entry size.
+ uint64_t ValidationSize = alignTo(Size, EntrySize);
+ // Guard against overflow.
+ if (ValidationSize >= Size)
+ if (DA.isValidOffsetForDataOfSize((uint32_t)Base, ValidationSize))
+ return *this;
+ return Optional<StrOffsetsContributionDescriptor>();
+}
+
+// Look for a DWARF64-formatted contribution to the string offsets table
+// starting at a given offset and record it in a descriptor.
+static Optional<StrOffsetsContributionDescriptor>
+parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
+ if (!DA.isValidOffsetForDataOfSize(Offset, 16))
+ return Optional<StrOffsetsContributionDescriptor>();
+
+ if (DA.getU32(&Offset) != 0xffffffff)
+ return Optional<StrOffsetsContributionDescriptor>();
+
+ uint64_t Size = DA.getU64(&Offset);
+ uint8_t Version = DA.getU16(&Offset);
+ (void)DA.getU16(&Offset); // padding
+ return StrOffsetsContributionDescriptor(Offset, Size, Version, DWARF64);
+ //return Optional<StrOffsetsContributionDescriptor>(Descriptor);
+}
+
+// Look for a DWARF32-formatted contribution to the string offsets table
+// starting at a given offset and record it in a descriptor.
+static Optional<StrOffsetsContributionDescriptor>
+parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
+ if (!DA.isValidOffsetForDataOfSize(Offset, 8))
+ return Optional<StrOffsetsContributionDescriptor>();
+ uint32_t ContributionSize = DA.getU32(&Offset);
+ if (ContributionSize >= 0xfffffff0)
+ return Optional<StrOffsetsContributionDescriptor>();
+ uint8_t Version = DA.getU16(&Offset);
+ (void)DA.getU16(&Offset); // padding
+ return StrOffsetsContributionDescriptor(Offset, ContributionSize, Version, DWARF32);
+ //return Optional<StrOffsetsContributionDescriptor>(Descriptor);
+}
+
+Optional<StrOffsetsContributionDescriptor>
+DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA,
+ uint64_t Offset) {
+ Optional<StrOffsetsContributionDescriptor> Descriptor;
+ // Attempt to find a DWARF64 contribution 16 bytes before the base.
+ if (Offset >= 16)
+ Descriptor =
+ parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16);
+ // Try to find a DWARF32 contribution 8 bytes before the base.
+ if (!Descriptor && Offset >= 8)
+ Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8);
+ return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor;
+}
+
+Optional<StrOffsetsContributionDescriptor>
+DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA,
+ uint64_t Offset) {
+ if (getVersion() >= 5) {
+ // Look for a valid contribution at the given offset.
+ auto Descriptor =
+ parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset);
+ if (!Descriptor)
+ Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset);
+ return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor;
+ }
+ // Prior to DWARF v5, we derive the contribution size from the
+ // index table (in a package file). In a .dwo file it is simply
+ // the length of the string offsets section.
+ uint64_t Size = 0;
+ if (!IndexEntry)
+ Size = StringOffsetSection.Data.size();
+ else if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS))
+ Size = C->Length;
+ // Return a descriptor with the given offset as base, version 4 and
+ // DWARF32 format.
+ //return Optional<StrOffsetsContributionDescriptor>(
+ //StrOffsetsContributionDescriptor(Offset, Size, 4, DWARF32));
+ return StrOffsetsContributionDescriptor(Offset, Size, 4, DWARF32);
+}
diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp
index 34f4017d9828..9c2258f5b933 100644
--- a/lib/Demangle/ItaniumDemangle.cpp
+++ b/lib/Demangle/ItaniumDemangle.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Demangle/Demangle.h"
+#include "llvm/Support/Compiler.h"
// This file exports a single function: llvm::itanium_demangle.
// It also has no dependencies on the rest of llvm. It is implemented this way
@@ -1947,7 +1948,7 @@ static const char *parse_type(const char *first, const char *last, C &db) {
break;
}
}
- // falls through
+ LLVM_FALLTHROUGH;
default:
// must check for builtin-types before class-enum-types to avoid
// ambiguities with operator-names
diff --git a/lib/FuzzMutate/IRMutator.cpp b/lib/FuzzMutate/IRMutator.cpp
index 15e7f86d1cdf..00b558ac4dcb 100644
--- a/lib/FuzzMutate/IRMutator.cpp
+++ b/lib/FuzzMutate/IRMutator.cpp
@@ -8,15 +8,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/FuzzMutate/IRMutator.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/FuzzMutate/Operations.h"
#include "llvm/FuzzMutate/Random.h"
#include "llvm/FuzzMutate/RandomIRBuilder.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar/DCE.h"
using namespace llvm;
@@ -90,14 +92,14 @@ std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
return Ops;
}
-fuzzerop::OpDescriptor
+Optional<fuzzerop::OpDescriptor>
InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
return Op.SourcePreds[0].matches({}, Src);
};
auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred));
if (RS.isEmpty())
- report_fatal_error("No available operations for src type");
+ return None;
return *RS;
}
@@ -120,10 +122,15 @@ void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
// Choose an operation that's constrained to be valid for the type of the
// source, collect any other sources it needs, and then build it.
- fuzzerop::OpDescriptor OpDesc = chooseOperation(Srcs[0], IB);
- for (const auto &Pred : makeArrayRef(OpDesc.SourcePreds).slice(1))
+ auto OpDesc = chooseOperation(Srcs[0], IB);
+ // Bail if no operation was found
+ if (!OpDesc)
+ return;
+
+ for (const auto &Pred : makeArrayRef(OpDesc->SourcePreds).slice(1))
Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
- if (Value *Op = OpDesc.BuilderFunc(Srcs, Insts[IP])) {
+
+ if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP])) {
// Find a sink and wire up the results of the operation.
IB.connectToSink(BB, InstsAfter, Op);
}
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 90b10309b58b..59818a1425f1 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -1674,6 +1674,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
}
}
}
+ break;
}
default:
break;
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index 1fff912ecf2f..7063f6f40a30 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -1333,7 +1333,9 @@ Optional<uint64_t> Function::getEntryCount() const {
if (MDS->getString().equals("function_entry_count")) {
ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(1));
uint64_t Count = CI->getValue().getZExtValue();
- if (Count == 0)
+ // A value of -1 is used for SamplePGO when there were no samples.
+ // Treat this the same as unknown.
+ if (Count == (uint64_t)-1)
return None;
return Count;
}
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index eae697b2e4b9..163c785f5d76 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -627,9 +627,10 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL,
CanBeNull = false;
if (const Argument *A = dyn_cast<Argument>(this)) {
DerefBytes = A->getDereferenceableBytes();
- if (DerefBytes == 0 && A->hasByValAttr()) {
+ if (DerefBytes == 0 && (A->hasByValAttr() || A->hasStructRetAttr())) {
Type *PT = cast<PointerType>(A->getType())->getElementType();
- DerefBytes = DL.getTypeStoreSize(PT);
+ if (PT->isSized())
+ DerefBytes = DL.getTypeStoreSize(PT);
}
if (DerefBytes == 0) {
DerefBytes = A->getDereferenceableOrNullBytes();
@@ -655,10 +656,8 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL,
CanBeNull = true;
}
} else if (auto *AI = dyn_cast<AllocaInst>(this)) {
- const ConstantInt *ArraySize = dyn_cast<ConstantInt>(AI->getArraySize());
- if (ArraySize && AI->getAllocatedType()->isSized()) {
- DerefBytes = DL.getTypeStoreSize(AI->getAllocatedType()) *
- ArraySize->getZExtValue();
+ if (!AI->isArrayAllocation()) {
+ DerefBytes = DL.getTypeStoreSize(AI->getAllocatedType());
CanBeNull = false;
}
} else if (auto *GV = dyn_cast<GlobalVariable>(this)) {
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 3357553cf19f..e521b6e7c704 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -405,9 +405,13 @@ void MCAsmStreamer::emitExplicitComments() {
void MCAsmStreamer::ChangeSection(MCSection *Section,
const MCExpr *Subsection) {
assert(Section && "Cannot switch to a null section!");
- Section->PrintSwitchToSection(
- *MAI, getContext().getObjectFileInfo()->getTargetTriple(), OS,
- Subsection);
+ if (MCTargetStreamer *TS = getTargetStreamer()) {
+ TS->changeSection(getCurrentSectionOnly(), Section, Subsection, OS);
+ } else {
+ Section->PrintSwitchToSection(
+ *MAI, getContext().getObjectFileInfo()->getTargetTriple(), OS,
+ Subsection);
+ }
}
void MCAsmStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) {
@@ -796,10 +800,15 @@ void MCAsmStreamer::EmitBytes(StringRef Data) {
"Cannot emit contents before setting section!");
if (Data.empty()) return;
- if (Data.size() == 1) {
- OS << MAI->getData8bitsDirective();
- OS << (unsigned)(unsigned char)Data[0];
- EmitEOL();
+ // If only single byte is provided or no ascii or asciz directives is
+ // supported, emit as vector of 8bits data.
+ if (Data.size() == 1 ||
+ !(MAI->getAscizDirective() || MAI->getAsciiDirective())) {
+ const char *Directive = MAI->getData8bitsDirective();
+ for (const unsigned char C : Data.bytes()) {
+ OS << Directive << (unsigned)C;
+ EmitEOL();
+ }
return;
}
@@ -884,8 +893,12 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
assert(Directive && "Invalid size for machine code value!");
OS << Directive;
- Value->print(OS, MAI);
- EmitEOL();
+ if (MCTargetStreamer *TS = getTargetStreamer()) {
+ TS->emitValue(Value);
+ } else {
+ Value->print(OS, MAI);
+ EmitEOL();
+ }
}
void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
@@ -1097,13 +1110,19 @@ unsigned MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
}
}
- OS << "\t.file\t" << FileNo << ' ';
+ SmallString<128> Str;
+ raw_svector_ostream OS1(Str);
+ OS1 << "\t.file\t" << FileNo << ' ';
if (!Directory.empty()) {
- PrintQuotedString(Directory, OS);
- OS << ' ';
+ PrintQuotedString(Directory, OS1);
+ OS1 << ' ';
+ }
+ PrintQuotedString(Filename, OS1);
+ if (MCTargetStreamer *TS = getTargetStreamer()) {
+ TS->emitDwarfFileDirective(OS1.str());
+ } else {
+ EmitRawText(OS1.str());
}
- PrintQuotedString(Filename, OS);
- EmitEOL();
return FileNo;
}
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 6f3647d61932..6e801ed8777c 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -49,6 +49,28 @@ void MCTargetStreamer::emitLabel(MCSymbol *Symbol) {}
void MCTargetStreamer::finish() {}
+void MCTargetStreamer::changeSection(const MCSection *CurSection,
+ MCSection *Section,
+ const MCExpr *Subsection,
+ raw_ostream &OS) {
+ Section->PrintSwitchToSection(
+ *Streamer.getContext().getAsmInfo(),
+ Streamer.getContext().getObjectFileInfo()->getTargetTriple(), OS,
+ Subsection);
+}
+
+void MCTargetStreamer::emitDwarfFileDirective(StringRef Directive) {
+ Streamer.EmitRawText(Directive);
+}
+
+void MCTargetStreamer::emitValue(const MCExpr *Value) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+
+ Value->print(OS, Streamer.getContext().getAsmInfo());
+ Streamer.EmitRawText(OS.str());
+}
+
void MCTargetStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
MCStreamer::MCStreamer(MCContext &Ctx)
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 6e76c5fac35f..0f0b645492ee 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -553,7 +553,7 @@ uint32_t WasmObjectWriter::getRelocationIndexValue(
case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
if (!IndirectSymbolIndices.count(RelEntry.Symbol))
- report_fatal_error("symbol not found table index space: " +
+ report_fatal_error("symbol not found in table index space: " +
RelEntry.Symbol->getName());
return IndirectSymbolIndices[RelEntry.Symbol];
case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
@@ -562,7 +562,7 @@ uint32_t WasmObjectWriter::getRelocationIndexValue(
case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
if (!SymbolIndices.count(RelEntry.Symbol))
- report_fatal_error("symbol not found function/global index space: " +
+ report_fatal_error("symbol not found in function/global index space: " +
RelEntry.Symbol->getName());
return SymbolIndices[RelEntry.Symbol];
case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
@@ -994,33 +994,10 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
SmallVector<WasmExport, 4> Exports;
SmallVector<std::pair<StringRef, uint32_t>, 4> SymbolFlags;
SmallVector<std::pair<uint16_t, uint32_t>, 2> InitFuncs;
- SmallPtrSet<const MCSymbolWasm *, 4> IsAddressTaken;
unsigned NumFuncImports = 0;
SmallVector<WasmDataSegment, 4> DataSegments;
uint32_t DataSize = 0;
- // Populate the IsAddressTaken set.
- for (const WasmRelocationEntry &RelEntry : CodeRelocations) {
- switch (RelEntry.Type) {
- case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
- IsAddressTaken.insert(RelEntry.Symbol);
- break;
- default:
- break;
- }
- }
- for (const WasmRelocationEntry &RelEntry : DataRelocations) {
- switch (RelEntry.Type) {
- case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
- IsAddressTaken.insert(RelEntry.Symbol);
- break;
- default:
- break;
- }
- }
-
// In the special .global_variables section, we've encoded global
// variables used by the function. Translate them into the Globals
// list.
@@ -1116,7 +1093,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
continue;
// If the symbol is not defined in this translation unit, import it.
- if (!WS.isDefined(/*SetUsed=*/false)) {
+ if (!WS.isDefined(/*SetUsed=*/false) || WS.isVariable()) {
WasmImport Import;
Import.ModuleName = WS.getModuleName();
Import.FieldName = WS.getName();
@@ -1132,8 +1109,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
Import.IsMutable = false;
SymbolIndices[&WS] = NumGlobalImports;
- // If this global is the stack pointer, make it mutable and remember it
- // so that we can emit metadata for it.
+ // If this global is the stack pointer, make it mutable.
if (WS.getName() == "__stack_pointer")
Import.IsMutable = true;
@@ -1218,14 +1194,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
}
DEBUG(dbgs() << " -> function index: " << Index << "\n");
-
- // If needed, prepare the function to be called indirectly.
- if (IsAddressTaken.count(&WS) != 0) {
- IndirectSymbolIndices[&WS] = TableElems.size();
- DEBUG(dbgs() << " -> adding to table: " << TableElems.size() << "\n");
- TableElems.push_back(Index);
- }
- } else {
+ } else {
if (WS.isTemporary() && !WS.getSize())
continue;
@@ -1289,7 +1258,6 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
uint32_t Index = SymbolIndices.find(ResolvedSym)->second;
DEBUG(dbgs() << " -> index:" << Index << "\n");
- SymbolIndices[&WS] = Index;
WasmExport Export;
Export.FieldName = WS.getName();
Export.Index = Index;
@@ -1304,12 +1272,34 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
SymbolFlags.emplace_back(WS.getName(), wasm::WASM_SYMBOL_BINDING_LOCAL);
}
- // Add types for indirect function calls.
- for (const WasmRelocationEntry &Fixup : CodeRelocations) {
- if (Fixup.Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB)
- continue;
+ {
+ auto HandleReloc = [&](const WasmRelocationEntry &Rel) {
+ // Functions referenced by a relocation need to prepared to be called
+ // indirectly.
+ const MCSymbolWasm& WS = *Rel.Symbol;
+ if (WS.isFunction() && IndirectSymbolIndices.count(&WS) == 0) {
+ switch (Rel.Type) {
+ case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
+ case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+ case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
+ case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB: {
+ uint32_t Index = SymbolIndices.find(&WS)->second;
+ IndirectSymbolIndices[&WS] = TableElems.size();
+ DEBUG(dbgs() << " -> adding to table: " << TableElems.size() << "\n");
+ TableElems.push_back(Index);
+ registerFunctionType(WS);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ };
- registerFunctionType(*Fixup.Symbol);
+ for (const WasmRelocationEntry &RelEntry : CodeRelocations)
+ HandleReloc(RelEntry);
+ for (const WasmRelocationEntry &RelEntry : DataRelocations)
+ HandleReloc(RelEntry);
}
// Translate .init_array section contents into start functions.
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
index c72a1258c1ee..5906dc5f5307 100644
--- a/lib/Object/ELF.cpp
+++ b/lib/Object/ELF.cpp
@@ -138,6 +138,7 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
default:
break;
}
+ break;
case ELF::EM_BPF:
switch (Type) {
#include "llvm/BinaryFormat/ELFRelocs/BPF.def"
diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp
index 7a0c05ed8a15..48f98df6f34d 100644
--- a/lib/Object/WasmObjectFile.cpp
+++ b/lib/Object/WasmObjectFile.cpp
@@ -303,7 +303,6 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) {
void WasmObjectFile::populateSymbolTable() {
// Add imports to symbol table
- size_t ImportIndex = 0;
size_t GlobalIndex = 0;
size_t FunctionIndex = 0;
for (const wasm::WasmImport& Import : Imports) {
@@ -312,7 +311,7 @@ void WasmObjectFile::populateSymbolTable() {
assert(Import.Global.Type == wasm::WASM_TYPE_I32);
SymbolMap.try_emplace(Import.Field, Symbols.size());
Symbols.emplace_back(Import.Field, WasmSymbol::SymbolType::GLOBAL_IMPORT,
- ImportSection, GlobalIndex++, ImportIndex);
+ ImportSection, GlobalIndex++);
DEBUG(dbgs() << "Adding import: " << Symbols.back()
<< " sym index:" << Symbols.size() << "\n");
break;
@@ -320,14 +319,13 @@ void WasmObjectFile::populateSymbolTable() {
SymbolMap.try_emplace(Import.Field, Symbols.size());
Symbols.emplace_back(Import.Field,
WasmSymbol::SymbolType::FUNCTION_IMPORT,
- ImportSection, FunctionIndex++, ImportIndex);
+ ImportSection, FunctionIndex++, Import.SigIndex);
DEBUG(dbgs() << "Adding import: " << Symbols.back()
<< " sym index:" << Symbols.size() << "\n");
break;
default:
break;
}
- ImportIndex++;
}
// Add exports to symbol table
@@ -338,11 +336,22 @@ void WasmObjectFile::populateSymbolTable() {
Export.Kind == wasm::WASM_EXTERNAL_FUNCTION
? WasmSymbol::SymbolType::FUNCTION_EXPORT
: WasmSymbol::SymbolType::GLOBAL_EXPORT;
- SymbolMap.try_emplace(Export.Name, Symbols.size());
- Symbols.emplace_back(Export.Name, ExportType,
- ExportSection, Export.Index);
- DEBUG(dbgs() << "Adding export: " << Symbols.back()
- << " sym index:" << Symbols.size() << "\n");
+ auto Pair = SymbolMap.try_emplace(Export.Name, Symbols.size());
+ if (Pair.second) {
+ Symbols.emplace_back(Export.Name, ExportType,
+ ExportSection, Export.Index);
+ DEBUG(dbgs() << "Adding export: " << Symbols.back()
+ << " sym index:" << Symbols.size() << "\n");
+ } else {
+ uint32_t SymIndex = Pair.first->second;
+ const WasmSymbol &OldSym = Symbols[SymIndex];
+ WasmSymbol NewSym(Export.Name, ExportType, ExportSection, Export.Index);
+ NewSym.setAltIndex(OldSym.ElementIndex);
+ Symbols[SymIndex] = NewSym;
+
+ DEBUG(dbgs() << "Replacing existing symbol: " << NewSym
+ << " sym index:" << SymIndex << "\n");
+ }
}
}
}
@@ -1017,7 +1026,7 @@ void WasmObjectFile::getRelocationTypeName(
break;
switch (Rel.Type) {
-#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def"
+#include "llvm/BinaryFormat/WasmRelocs.def"
}
#undef WASM_RELOC
diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp
index 9ca584a4a1ae..271224ec6312 100644
--- a/lib/Object/WindowsResource.cpp
+++ b/lib/Object/WindowsResource.cpp
@@ -14,6 +14,7 @@
#include "llvm/Object/WindowsResource.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
#include <ctime>
#include <queue>
@@ -560,10 +561,9 @@ void WindowsResourceCOFFWriter::writeSymbolTable() {
// Now write a symbol for each relocation.
for (unsigned i = 0; i < Data.size(); i++) {
- char RelocationName[9];
- sprintf(RelocationName, "$R%06X", DataOffsets[i]);
+ auto RelocationName = formatv("$R{0:X-6}", i & 0xffffff).sstr<COFF::NameSize>();
Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset);
- strncpy(Symbol->Name.ShortName, RelocationName, (size_t)COFF::NameSize);
+ memcpy(Symbol->Name.ShortName, RelocationName.data(), (size_t) COFF::NameSize);
Symbol->Value = DataOffsets[i];
Symbol->SectionNumber = 2;
Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL;
diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp
index 8687f22949a2..b2411395dc0f 100644
--- a/lib/ObjectYAML/WasmYAML.cpp
+++ b/lib/ObjectYAML/WasmYAML.cpp
@@ -439,7 +439,7 @@ void ScalarEnumerationTraits<WasmYAML::TableType>::enumeration(
void ScalarEnumerationTraits<WasmYAML::RelocType>::enumeration(
IO &IO, WasmYAML::RelocType &Type) {
#define WASM_RELOC(name, value) IO.enumCase(Type, #name, wasm::name);
-#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def"
+#include "llvm/BinaryFormat/WasmRelocs.def"
#undef WASM_RELOC
}
diff --git a/lib/Passes/LLVMBuild.txt b/lib/Passes/LLVMBuild.txt
index 4d8c7f85d3aa..e2378a84328e 100644
--- a/lib/Passes/LLVMBuild.txt
+++ b/lib/Passes/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = Passes
parent = Libraries
-required_libraries = Analysis CodeGen Core IPO InstCombine Scalar Support TransformUtils Vectorize Instrumentation
+required_libraries = Analysis CodeGen Core IPO InstCombine Scalar Support Target TransformUtils Vectorize Instrumentation
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index f7fb0cef16bf..3489feb93a02 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -2546,12 +2546,12 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
}
bool IEEEFloat::convertFromStringSpecials(StringRef str) {
- if (str.equals("inf") || str.equals("INFINITY")) {
+ if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
makeInf(false);
return true;
}
- if (str.equals("-inf") || str.equals("-INFINITY")) {
+ if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) {
makeInf(true);
return true;
}
diff --git a/lib/Support/CachePruning.cpp b/lib/Support/CachePruning.cpp
index 3e97c991f504..141573c2a1c7 100644
--- a/lib/Support/CachePruning.cpp
+++ b/lib/Support/CachePruning.cpp
@@ -165,12 +165,14 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
return false;
}
} else {
+ if (!Policy.Interval)
+ return false;
if (Policy.Interval != seconds(0)) {
// Check whether the time stamp is older than our pruning interval.
// If not, do nothing.
const auto TimeStampModTime = FileStatus.getLastModificationTime();
auto TimeStampAge = CurrentTime - TimeStampModTime;
- if (TimeStampAge <= Policy.Interval) {
+ if (TimeStampAge <= *Policy.Interval) {
DEBUG(dbgs() << "Timestamp file too recent ("
<< duration_cast<seconds>(TimeStampAge).count()
<< "s old), do not prune.\n");
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 85e782b2c048..c709fc416df6 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -80,10 +80,12 @@ void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
namespace {
/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
-class MemoryBufferMem : public MemoryBuffer {
+template<typename MB>
+class MemoryBufferMem : public MB {
public:
MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) {
- init(InputData.begin(), InputData.end(), RequiresNullTerminator);
+ MemoryBuffer::init(InputData.begin(), InputData.end(),
+ RequiresNullTerminator);
}
/// Disable sized deallocation for MemoryBufferMem, because it has
@@ -95,21 +97,22 @@ public:
return StringRef(reinterpret_cast<const char *>(this + 1));
}
- BufferKind getBufferKind() const override {
- return MemoryBuffer_Malloc;
+ MemoryBuffer::BufferKind getBufferKind() const override {
+ return MemoryBuffer::MemoryBuffer_Malloc;
}
};
}
-static ErrorOr<std::unique_ptr<MemoryBuffer>>
-getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize,
+template <typename MB>
+static ErrorOr<std::unique_ptr<MB>>
+getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize,
uint64_t Offset, bool RequiresNullTerminator, bool IsVolatile);
std::unique_ptr<MemoryBuffer>
MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName,
bool RequiresNullTerminator) {
auto *Ret = new (NamedBufferAlloc(BufferName))
- MemoryBufferMem(InputData, RequiresNullTerminator);
+ MemoryBufferMem<MemoryBuffer>(InputData, RequiresNullTerminator);
return std::unique_ptr<MemoryBuffer>(Ret);
}
@@ -119,50 +122,30 @@ MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) {
Ref.getBuffer(), Ref.getBufferIdentifier(), RequiresNullTerminator));
}
-std::unique_ptr<MemoryBuffer>
-MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) {
- std::unique_ptr<MemoryBuffer> Buf =
- getNewUninitMemBuffer(InputData.size(), BufferName);
+static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
+getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) {
+ auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName);
if (!Buf)
- return nullptr;
- memcpy(const_cast<char*>(Buf->getBufferStart()), InputData.data(),
- InputData.size());
- return Buf;
+ return make_error_code(errc::not_enough_memory);
+ memcpy(Buf->getBufferStart(), InputData.data(), InputData.size());
+ return std::move(Buf);
}
std::unique_ptr<MemoryBuffer>
-MemoryBuffer::getNewUninitMemBuffer(size_t Size, const Twine &BufferName) {
- // Allocate space for the MemoryBuffer, the data and the name. It is important
- // that MemoryBuffer and data are aligned so PointerIntPair works with them.
- // TODO: Is 16-byte alignment enough? We copy small object files with large
- // alignment expectations into this buffer.
- SmallString<256> NameBuf;
- StringRef NameRef = BufferName.toStringRef(NameBuf);
- size_t AlignedStringLen =
- alignTo(sizeof(MemoryBufferMem) + NameRef.size() + 1, 16);
- size_t RealLen = AlignedStringLen + Size + 1;
- char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
- if (!Mem)
- return nullptr;
-
- // The name is stored after the class itself.
- CopyStringRef(Mem + sizeof(MemoryBufferMem), NameRef);
-
- // The buffer begins after the name and must be aligned.
- char *Buf = Mem + AlignedStringLen;
- Buf[Size] = 0; // Null terminate buffer.
-
- auto *Ret = new (Mem) MemoryBufferMem(StringRef(Buf, Size), true);
- return std::unique_ptr<MemoryBuffer>(Ret);
+MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) {
+ auto Buf = getMemBufferCopyImpl(InputData, BufferName);
+ if (Buf)
+ return std::move(*Buf);
+ return nullptr;
}
std::unique_ptr<MemoryBuffer>
MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
- std::unique_ptr<MemoryBuffer> SB = getNewUninitMemBuffer(Size, BufferName);
+ auto SB = WritableMemoryBuffer::getNewUninitMemBuffer(Size, BufferName);
if (!SB)
return nullptr;
- memset(const_cast<char*>(SB->getBufferStart()), 0, Size);
- return SB;
+ memset(SB->getBufferStart(), 0, Size);
+ return std::move(SB);
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
@@ -179,10 +162,10 @@ MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize,
ErrorOr<std::unique_ptr<MemoryBuffer>>
MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize,
uint64_t Offset, bool IsVolatile) {
- return getFileAux(FilePath, -1, MapSize, Offset, false, IsVolatile);
+ return getFileAux<MemoryBuffer>(FilePath, -1, MapSize, Offset, false,
+ IsVolatile);
}
-
//===----------------------------------------------------------------------===//
// MemoryBuffer::getFile implementation.
//===----------------------------------------------------------------------===//
@@ -191,7 +174,8 @@ namespace {
/// \brief Memory maps a file descriptor using sys::fs::mapped_file_region.
///
/// This handles converting the offset into a legal offset on the platform.
-class MemoryBufferMMapFile : public MemoryBuffer {
+template<typename MB>
+class MemoryBufferMMapFile : public MB {
sys::fs::mapped_file_region MFR;
static uint64_t getLegalMapOffset(uint64_t Offset) {
@@ -209,11 +193,13 @@ class MemoryBufferMMapFile : public MemoryBuffer {
public:
MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len,
uint64_t Offset, std::error_code &EC)
- : MFR(FD, sys::fs::mapped_file_region::readonly,
+ : MFR(FD,
+ MB::Writable ? sys::fs::mapped_file_region::priv
+ : sys::fs::mapped_file_region::readonly,
getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) {
if (!EC) {
const char *Start = getStart(Len, Offset);
- init(Start, Start + Len, RequiresNullTerminator);
+ MemoryBuffer::init(Start, Start + Len, RequiresNullTerminator);
}
}
@@ -226,13 +212,13 @@ public:
return StringRef(reinterpret_cast<const char *>(this + 1));
}
- BufferKind getBufferKind() const override {
- return MemoryBuffer_MMap;
+ MemoryBuffer::BufferKind getBufferKind() const override {
+ return MemoryBuffer::MemoryBuffer_MMap;
}
};
}
-static ErrorOr<std::unique_ptr<MemoryBuffer>>
+static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
getMemoryBufferForStream(int FD, const Twine &BufferName) {
const ssize_t ChunkSize = 4096*4;
SmallString<ChunkSize> Buffer;
@@ -246,37 +232,80 @@ getMemoryBufferForStream(int FD, const Twine &BufferName) {
Buffer.set_size(Buffer.size() + ReadBytes);
} while (ReadBytes != 0);
- return MemoryBuffer::getMemBufferCopy(Buffer, BufferName);
+ return getMemBufferCopyImpl(Buffer, BufferName);
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
MemoryBuffer::getFile(const Twine &Filename, int64_t FileSize,
bool RequiresNullTerminator, bool IsVolatile) {
- return getFileAux(Filename, FileSize, FileSize, 0,
- RequiresNullTerminator, IsVolatile);
+ return getFileAux<MemoryBuffer>(Filename, FileSize, FileSize, 0,
+ RequiresNullTerminator, IsVolatile);
}
-static ErrorOr<std::unique_ptr<MemoryBuffer>>
+template <typename MB>
+static ErrorOr<std::unique_ptr<MB>>
getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
bool IsVolatile);
-static ErrorOr<std::unique_ptr<MemoryBuffer>>
+template <typename MB>
+static ErrorOr<std::unique_ptr<MB>>
getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize,
uint64_t Offset, bool RequiresNullTerminator, bool IsVolatile) {
int FD;
std::error_code EC = sys::fs::openFileForRead(Filename, FD);
+
if (EC)
return EC;
- ErrorOr<std::unique_ptr<MemoryBuffer>> Ret =
- getOpenFileImpl(FD, Filename, FileSize, MapSize, Offset,
- RequiresNullTerminator, IsVolatile);
+ auto Ret = getOpenFileImpl<MB>(FD, Filename, FileSize, MapSize, Offset,
+ RequiresNullTerminator, IsVolatile);
close(FD);
return Ret;
}
+ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
+WritableMemoryBuffer::getFile(const Twine &Filename, int64_t FileSize,
+ bool IsVolatile) {
+ return getFileAux<WritableMemoryBuffer>(Filename, FileSize, FileSize, 0,
+ /*RequiresNullTerminator*/ false,
+ IsVolatile);
+}
+
+ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
+WritableMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
+ uint64_t Offset, bool IsVolatile) {
+ return getFileAux<WritableMemoryBuffer>(Filename, -1, MapSize, Offset, false,
+ IsVolatile);
+}
+
+std::unique_ptr<WritableMemoryBuffer>
+WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size, const Twine &BufferName) {
+ using MemBuffer = MemoryBufferMem<WritableMemoryBuffer>;
+ // Allocate space for the MemoryBuffer, the data and the name. It is important
+ // that MemoryBuffer and data are aligned so PointerIntPair works with them.
+ // TODO: Is 16-byte alignment enough? We copy small object files with large
+ // alignment expectations into this buffer.
+ SmallString<256> NameBuf;
+ StringRef NameRef = BufferName.toStringRef(NameBuf);
+ size_t AlignedStringLen = alignTo(sizeof(MemBuffer) + NameRef.size() + 1, 16);
+ size_t RealLen = AlignedStringLen + Size + 1;
+ char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
+ if (!Mem)
+ return nullptr;
+
+ // The name is stored after the class itself.
+ CopyStringRef(Mem + sizeof(MemBuffer), NameRef);
+
+ // The buffer begins after the name and must be aligned.
+ char *Buf = Mem + AlignedStringLen;
+ Buf[Size] = 0; // Null terminate buffer.
+
+ auto *Ret = new (Mem) MemBuffer(StringRef(Buf, Size), true);
+ return std::unique_ptr<WritableMemoryBuffer>(Ret);
+}
+
static bool shouldUseMmap(int FD,
size_t FileSize,
size_t MapSize,
@@ -332,7 +361,8 @@ static bool shouldUseMmap(int FD,
return true;
}
-static ErrorOr<std::unique_ptr<MemoryBuffer>>
+template <typename MB>
+static ErrorOr<std::unique_ptr<MB>>
getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
bool IsVolatile) {
@@ -364,22 +394,21 @@ getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
PageSize, IsVolatile)) {
std::error_code EC;
- std::unique_ptr<MemoryBuffer> Result(
- new (NamedBufferAlloc(Filename))
- MemoryBufferMMapFile(RequiresNullTerminator, FD, MapSize, Offset, EC));
+ std::unique_ptr<MB> Result(
+ new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile<MB>(
+ RequiresNullTerminator, FD, MapSize, Offset, EC));
if (!EC)
return std::move(Result);
}
- std::unique_ptr<MemoryBuffer> Buf =
- MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
+ auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
if (!Buf) {
// Failed to create a buffer. The only way it can fail is if
// new(std::nothrow) returns 0.
return make_error_code(errc::not_enough_memory);
}
- char *BufPtr = const_cast<char *>(Buf->getBufferStart());
+ char *BufPtr = Buf.get()->getBufferStart();
size_t BytesLeft = MapSize;
#ifndef HAVE_PREAD
@@ -412,7 +441,7 @@ getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
ErrorOr<std::unique_ptr<MemoryBuffer>>
MemoryBuffer::getOpenFile(int FD, const Twine &Filename, uint64_t FileSize,
bool RequiresNullTerminator, bool IsVolatile) {
- return getOpenFileImpl(FD, Filename, FileSize, FileSize, 0,
+ return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0,
RequiresNullTerminator, IsVolatile);
}
@@ -420,7 +449,8 @@ ErrorOr<std::unique_ptr<MemoryBuffer>>
MemoryBuffer::getOpenFileSlice(int FD, const Twine &Filename, uint64_t MapSize,
int64_t Offset, bool IsVolatile) {
assert(MapSize != uint64_t(-1));
- return getOpenFileImpl(FD, Filename, -1, MapSize, Offset, false, IsVolatile);
+ return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false,
+ IsVolatile);
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index 90992fce0bcc..9ba7a09f9962 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -586,7 +586,7 @@ bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
APFloat::opStatus Status =
F.convertFromString(*this, APFloat::rmNearestTiesToEven);
if (Status != APFloat::opOK) {
- if (!AllowInexact || Status != APFloat::opInexact)
+ if (!AllowInexact || !(Status & APFloat::opInexact))
return true;
}
diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp
index c59068cb3550..b96ca084e9bf 100644
--- a/lib/Support/TargetParser.cpp
+++ b/lib/Support/TargetParser.cpp
@@ -537,7 +537,7 @@ StringRef llvm::AArch64::getDefaultCPU(StringRef Arch) {
}
unsigned llvm::AArch64::checkArchVersion(StringRef Arch) {
- if (Arch[0] == 'v' && std::isdigit(Arch[1]))
+ if (Arch.size() >= 2 && Arch[0] == 'v' && std::isdigit(Arch[1]))
return (Arch[1] - 48);
return 0;
}
@@ -633,7 +633,7 @@ StringRef llvm::ARM::getCanonicalArchName(StringRef Arch) {
// Only match non-marketing names
if (offset != StringRef::npos) {
// Must start with 'vN'.
- if (A[0] != 'v' || !std::isdigit(A[1]))
+ if (A.size() >= 2 && (A[0] != 'v' || !std::isdigit(A[1])))
return Error;
// Can't have an extra 'eb'.
if (A.find("eb") != StringRef::npos)
@@ -739,7 +739,6 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
case ARM::ArchKind::ARMV8_2A:
case ARM::ArchKind::ARMV8_3A:
return ARM::ProfileKind::A;
- LLVM_FALLTHROUGH;
case ARM::ArchKind::ARMV2:
case ARM::ArchKind::ARMV2A:
case ARM::ArchKind::ARMV3:
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index 05ca40f03018..f8a80ba87873 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -657,7 +657,12 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) {
}
i = j + 1;
} else if (MustQuote == QuotingType::Double &&
- !sys::unicode::isPrintable(S[j])) {
+ !sys::unicode::isPrintable(S[j]) && (S[j] & 0x80) == 0) {
+ // If we're double quoting non-printable characters, we prefer printing
+ // them as "\x" + their hex representation. Note that special casing is
+ // needed for UTF-8, where a byte may be part of a UTF-8 sequence and
+ // appear as non-printable, in which case we want to print the correct
+ // unicode character and not its hex representation.
output(StringRef(&Base[i], j - i)); // "flush"
output(StringLiteral("\\x"));
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 67138f41dda8..2ff2ee347f56 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -583,6 +583,20 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
break;
+ case AArch64::MOVIv2d_ns:
+ // If the target has <rdar://problem/16473581>, lower this
+ // instruction to movi.16b instead.
+ if (STI->hasZeroCycleZeroingFPWorkaround() &&
+ MI->getOperand(1).getImm() == 0) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(AArch64::MOVIv16b_ns);
+ TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::createImm(MI->getOperand(1).getImm()));
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ break;
+
case AArch64::DBG_VALUE: {
if (isVerbose() && OutStreamer->hasRawTextSupport()) {
SmallString<128> TmpStr;
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index fd1699fd363d..022200986d2b 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -5135,11 +5135,12 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
}
- // fall-back to target-independent instruction selection.
- return selectOperator(I, I->getOpcode());
// Silence warnings.
(void)&CC_AArch64_DarwinPCS_VarArg;
(void)&CC_AArch64_Win64_VarArg;
+
+ // fall-back to target-independent instruction selection.
+ return selectOperator(I, I->getOpcode());
}
namespace llvm {
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 73944359223a..d66f7b59a4b5 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -97,6 +97,7 @@
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -335,6 +336,22 @@ bool AArch64FrameLowering::canUseAsPrologue(
return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
}
+static bool windowsRequiresStackProbe(MachineFunction &MF,
+ unsigned StackSizeInBytes) {
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ if (!Subtarget.isTargetWindows())
+ return false;
+ const Function &F = MF.getFunction();
+ // TODO: When implementing stack protectors, take that into account
+ // for the probe threshold.
+ unsigned StackProbeSize = 4096;
+ if (F.hasFnAttribute("stack-probe-size"))
+ F.getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
+ return StackSizeInBytes >= StackProbeSize;
+}
+
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
MachineFunction &MF, unsigned StackBumpBytes) const {
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -347,7 +364,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
// 512 is the maximum immediate for stp/ldp that will be used for
// callee-save save/restores
- if (StackBumpBytes >= 512)
+ if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
return false;
if (MFI.hasVarSizedObjects())
@@ -478,7 +495,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
return;
int NumBytes = (int)MFI.getStackSize();
- if (!AFI->hasStackFrame()) {
+ if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
assert(!HasFP && "unexpected function without stack frame but with FP");
// All of the stack allocation is for locals.
@@ -550,6 +567,44 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr::FrameSetup);
}
+ if (windowsRequiresStackProbe(MF, NumBytes)) {
+ uint32_t NumWords = NumBytes >> 4;
+
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ switch (MF.getTarget().getCodeModel()) {
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Kernel:
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
+ .addExternalSymbol("__chkstk")
+ .addReg(AArch64::X15, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ case CodeModel::Large:
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
+ .addReg(AArch64::X16, RegState::Define)
+ .addExternalSymbol("__chkstk")
+ .addExternalSymbol("__chkstk")
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
+ .addReg(AArch64::X16, RegState::Kill)
+ .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
+
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
+ .addReg(AArch64::SP, RegState::Kill)
+ .addReg(AArch64::X15, RegState::Kill)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
+ .setMIFlags(MachineInstr::FrameSetup);
+ NumBytes = 0;
+ }
+
// Allocate space for the rest of the frame.
if (NumBytes) {
const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
@@ -1164,18 +1219,32 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+
+ unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
+ ? RegInfo->getBaseRegister()
+ : (unsigned)AArch64::NoRegister;
+
+ unsigned SpillEstimate = SavedRegs.count();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ unsigned PairedReg = CSRegs[i ^ 1];
+ if (Reg == BasePointerReg)
+ SpillEstimate++;
+ if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
+ SpillEstimate++;
+ }
+ SpillEstimate += 2; // Conservatively include FP+LR in the estimate
+ unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
+
// The frame record needs to be created by saving the appropriate registers
- if (hasFP(MF)) {
+ if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
SavedRegs.set(AArch64::FP);
SavedRegs.set(AArch64::LR);
}
- unsigned BasePointerReg = AArch64::NoRegister;
- if (RegInfo->hasBasePointer(MF))
- BasePointerReg = RegInfo->getBaseRegister();
-
unsigned ExtraCSSpill = 0;
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
const unsigned Reg = CSRegs[i];
@@ -1217,7 +1286,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
- MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1242cf5be188..6f7b2b6fd5b5 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -470,10 +470,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (Subtarget->hasPerfMon())
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
- if (Subtarget->isTargetMachO()) {
- // For iOS, we don't want to the normal expansion of a libcall to
- // sincos. We want to issue a libcall to __sincos_stret to avoid memory
- // traffic.
+ if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
+ getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
+ // Issue __sincos_stret if available.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
} else {
@@ -2328,8 +2327,9 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
Entry.IsZExt = false;
Args.push_back(Entry);
- const char *LibcallName =
- (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
+ RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
+ : RTLIB::SINCOS_STRET_F32;
+ const char *LibcallName = getLibcallName(LC);
SDValue Callee =
DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index c7c560a81328..abbba7d1d5a9 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4963,16 +4963,9 @@ void AArch64InstrInfo::insertOutlinerEpilogue(
MachineBasicBlock &MBB, MachineFunction &MF,
const MachineOutlinerInfo &MInfo) const {
- bool ContainsCalls = false;
-
- for (MachineInstr &MI : MBB) {
- if (MI.isCall()) {
- ContainsCalls = true;
- break;
- }
- }
-
- if (ContainsCalls) {
+ // Is there a call in the outlined range?
+ if (std::any_of(MBB.instr_begin(), MBB.instr_end(),
+ [](MachineInstr &MI) { return MI.isCall(); })) {
// Fix up the instructions in the range, since we're going to modify the
// stack.
fixupPostOutline(MBB);
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 7f5507371fa0..a719d47618e5 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -25,11 +25,11 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
- const char *bzeroEntry =
- (V && V->isNullValue()) ? STI.getBZeroEntry() : nullptr;
+ const char *bzeroName = (V && V->isNullValue())
+ ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) : nullptr;
// For small size (< 256), it is not beneficial to use bzero
// instead of memset.
- if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
+ if (bzeroName && (!SizeValue || SizeValue->getZExtValue() > 256)) {
const AArch64TargetLowering &TLI = *STI.getTargetLowering();
EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
@@ -45,7 +45,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
CLI.setDebugLoc(dl)
.setChain(Chain)
.setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroEntry, IntPtr),
+ DAG.getExternalSymbol(bzeroName, IntPtr),
std::move(Args))
.setDiscardResult();
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index e397d585ae77..688bb936d0ca 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -217,19 +217,6 @@ unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
return AArch64II::MO_NO_FLAG;
}
-/// This function returns the name of a function which has an interface
-/// like the non-standard bzero function, if such a function exists on
-/// the current subtarget and it is considered prefereable over
-/// memset with zero passed as the second argument. Otherwise it
-/// returns null.
-const char *AArch64Subtarget::getBZeroEntry() const {
- // Prefer bzero on Darwin only.
- if(isTargetDarwin())
- return "bzero";
-
- return nullptr;
-}
-
void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const {
// LNT run (at least on Cyclone) showed reasonably significant gains for
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 5d9759d363dd..9245b2f396b7 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -309,13 +309,6 @@ public:
unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
const TargetMachine &TM) const;
- /// This function returns the name of a function which has an interface
- /// like the non-standard bzero function, if such a function exists on
- /// the current subtarget and it is considered prefereable over
- /// memset with zero passed as the second argument. Otherwise it
- /// returns null.
- const char *getBZeroEntry() const;
-
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
diff --git a/lib/Target/AArch64/AArch64SystemOperands.td b/lib/Target/AArch64/AArch64SystemOperands.td
index df939add70fa..66b7e02ceb99 100644
--- a/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/lib/Target/AArch64/AArch64SystemOperands.td
@@ -322,6 +322,9 @@ def : ROSysReg<"PMCEID0_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b110>;
def : ROSysReg<"PMCEID1_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b111>;
def : ROSysReg<"MIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b000>;
def : ROSysReg<"CCSIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b000>;
+def : ROSysReg<"CCSIDR2_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b010> {
+ let Requires = [{ {AArch64::HasV8_3aOps} }];
+}
def : ROSysReg<"CLIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b001>;
def : ROSysReg<"CTR_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b001>;
def : ROSysReg<"MPIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b101>;
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 64583ead73f2..0e6ad944c141 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -346,10 +346,9 @@ public:
} // end anonymous namespace
-TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(AArch64TTIImpl(this, F));
- });
+TargetTransformInfo
+AArch64TargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(AArch64TTIImpl(this, F));
}
TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 2bbfb2da3db6..8d28a5e30ebf 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -44,8 +44,7 @@ public:
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- /// \brief Get the TargetIRAnalysis for this target.
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
TargetLoweringObjectFile* getObjFileLowering() const override {
return TLOF.get();
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index aeffbd70fc81..6e63783e5646 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1975,10 +1975,6 @@ static bool isValidSVEKind(StringRef Name) {
.Default(false);
}
-static bool isSVERegister(StringRef Name) {
- return Name[0] == 'z' || Name[0] == 'p';
-}
-
static void parseValidVectorKind(StringRef Name, unsigned &NumElements,
char &ElementKind) {
assert(isValidVectorKind(Name));
@@ -2008,21 +2004,19 @@ bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
// Matches a register name or register alias previously defined by '.req'
unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
RegKind Kind) {
- unsigned RegNum;
- switch (Kind) {
- case RegKind::Scalar:
- RegNum = MatchRegisterName(Name);
- break;
- case RegKind::NeonVector:
- RegNum = MatchNeonVectorRegName(Name);
- break;
- case RegKind::SVEDataVector:
- RegNum = matchSVEDataVectorRegName(Name);
- break;
- case RegKind::SVEPredicateVector:
- RegNum = matchSVEPredicateVectorRegName(Name);
- break;
- }
+ unsigned RegNum = 0;
+ if ((RegNum = matchSVEDataVectorRegName(Name)))
+ return Kind == RegKind::SVEDataVector ? RegNum : 0;
+
+ if ((RegNum = matchSVEPredicateVectorRegName(Name)))
+ return Kind == RegKind::SVEPredicateVector ? RegNum : 0;
+
+ if ((RegNum = MatchNeonVectorRegName(Name)))
+ return Kind == RegKind::NeonVector ? RegNum : 0;
+
+ // The parsed register must be of RegKind Scalar
+ if ((RegNum = MatchRegisterName(Name)))
+ return Kind == RegKind::Scalar ? RegNum : 0;
if (!RegNum) {
// Check for aliases registered via .req. Canonicalize to lower case.
@@ -2049,10 +2043,8 @@ int AArch64AsmParser::tryParseRegister() {
return -1;
std::string lowerCase = Tok.getString().lower();
- if (isSVERegister(lowerCase))
- return -1;
-
unsigned RegNum = matchRegisterNameAlias(lowerCase, RegKind::Scalar);
+
// Also handle a few aliases of registers.
if (RegNum == 0)
RegNum = StringSwitch<unsigned>(lowerCase)
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index bb628b8c558f..fda6252f46e3 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -695,18 +695,24 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
IsSGPR = false;
Width = 3;
} else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
+ assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
+ "trap handler registers should not be used");
IsSGPR = true;
Width = 4;
} else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
IsSGPR = false;
Width = 4;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
+ assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
+ "trap handler registers should not be used");
IsSGPR = true;
Width = 8;
} else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
IsSGPR = false;
Width = 8;
} else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
+ assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
+ "trap handler registers should not be used");
IsSGPR = true;
Width = 16;
} else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 3f8a9b1964ca..5c31bddd9b1a 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -202,6 +202,16 @@ public:
const char* getTargetNodeName(unsigned Opcode) const override;
+ // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection
+ // for AMDGPU.
+ // A commit ( git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319036
+ // 91177308-0d34-0410-b5e6-96231b3b80d8 ) turned on
+ // MergeConsecutiveStores() before Instruction Selection for all targets.
+ // Enough AMDGPU compiles go into an infinite loop ( MergeConsecutiveStores()
+ // merges two stores; LegalizeStoreOps() un-merges; MergeConsecutiveStores()
+ // re-merges, etc. ) to warrant turning it off for now.
+ bool mergeStoresAfterLegalization() const override { return false; }
+
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
return true;
}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 6984f4e71613..2042dbf6d5e2 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -571,10 +571,9 @@ public:
} // end anonymous namespace
-TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(AMDGPUTTIImpl(this, F));
- });
+TargetTransformInfo
+AMDGPUTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(AMDGPUTTIImpl(this, F));
}
void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
@@ -898,4 +897,3 @@ void GCNPassConfig::addPreEmitPass() {
TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
return new GCNPassConfig(*this, PM);
}
-
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 5043e31f6f5b..5f9b2a7fca20 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -55,7 +55,7 @@ public:
const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
return &IntrinsicInfo;
}
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2acd7f78faea..ebf656c549ec 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -536,6 +536,10 @@ public:
return EndLoc;
}
+ SMRange getLocRange() const {
+ return SMRange(StartLoc, EndLoc);
+ }
+
Modifiers getModifiers() const {
assert(isRegKind() || isImmTy(ImmTyNone));
return isRegKind() ? Reg.Mods : Imm.Mods;
@@ -1491,6 +1495,8 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
case 1: return AMDGPU::TTMP_32RegClassID;
case 2: return AMDGPU::TTMP_64RegClassID;
case 4: return AMDGPU::TTMP_128RegClassID;
+ case 8: return AMDGPU::TTMP_256RegClassID;
+ case 16: return AMDGPU::TTMP_512RegClassID;
}
} else if (Is == IS_SGPR) {
switch (RegWidth) {
@@ -1498,8 +1504,8 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
case 1: return AMDGPU::SGPR_32RegClassID;
case 2: return AMDGPU::SGPR_64RegClassID;
case 4: return AMDGPU::SGPR_128RegClassID;
- case 8: return AMDGPU::SReg_256RegClassID;
- case 16: return AMDGPU::SReg_512RegClassID;
+ case 8: return AMDGPU::SGPR_256RegClassID;
+ case 16: return AMDGPU::SGPR_512RegClassID;
}
}
return -1;
@@ -1754,6 +1760,11 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
// TODO: add syntactic sugar for 1/(2*PI)
bool Minus = false;
if (getLexer().getKind() == AsmToken::Minus) {
+ const AsmToken NextToken = getLexer().peekTok();
+ if (!NextToken.is(AsmToken::Integer) &&
+ !NextToken.is(AsmToken::Real)) {
+ return MatchOperand_NoMatch;
+ }
Minus = true;
Parser.Lex();
}
@@ -1783,7 +1794,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
return MatchOperand_Success;
}
default:
- return Minus ? MatchOperand_ParseFail : MatchOperand_NoMatch;
+ return MatchOperand_NoMatch;
}
}
@@ -2244,6 +2255,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
return true;
}
+static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
+ unsigned VariantID = 0);
+
bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
@@ -2286,8 +2300,13 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_MissingFeature:
return Error(IDLoc, "instruction not supported on this GPU");
- case Match_MnemonicFail:
- return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_MnemonicFail: {
+ uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ std::string Suggestion = AMDGPUMnemonicSpellCheck(
+ ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
+ return Error(IDLoc, "invalid instruction" + Suggestion,
+ ((AMDGPUOperand &)*Operands[0]).getLocRange());
+ }
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
@@ -3838,7 +3857,9 @@ AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
return Ok? MatchOperand_Success : MatchOperand_ParseFail;
} else {
- return MatchOperand_NoMatch;
+ // Swizzle "offset" operand is optional.
+ // If it is omitted, try parsing other optional operands.
+ return parseOptionalOperand(Operands);
}
}
@@ -4786,6 +4807,7 @@ extern "C" void LLVMInitializeAMDGPUAsmParser() {
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
+#define GET_MNEMONIC_SPELL_CHECKER
#include "AMDGPUGenAsmMatcher.inc"
// This fuction should be defined after auto-generated include so that we have
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 4a3f2c975179..47a2d3f2fdc5 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -348,10 +348,12 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
case AMDGPU::TTMP_128RegClassID:
// ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
// this bundle?
- case AMDGPU::SReg_256RegClassID:
- // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
+ case AMDGPU::SGPR_256RegClassID:
+ case AMDGPU::TTMP_256RegClassID:
+ // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
// this bundle?
- case AMDGPU::SReg_512RegClassID:
+ case AMDGPU::SGPR_512RegClassID:
+ case AMDGPU::TTMP_512RegClassID:
shift = 2;
break;
// ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
@@ -441,11 +443,11 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
}
MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
- return createSRegOperand(AMDGPU::SReg_256RegClassID, Val);
+ return decodeDstOp(OPW256, Val);
}
MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
- return createSRegOperand(AMDGPU::SReg_512RegClassID, Val);
+ return decodeDstOp(OPW512, Val);
}
MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
@@ -593,6 +595,8 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
return SGPR_32RegClassID;
case OPW64: return SGPR_64RegClassID;
case OPW128: return SGPR_128RegClassID;
+ case OPW256: return SGPR_256RegClassID;
+ case OPW512: return SGPR_512RegClassID;
}
}
@@ -608,6 +612,8 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
return TTMP_32RegClassID;
case OPW64: return TTMP_64RegClassID;
case OPW128: return TTMP_128RegClassID;
+ case OPW256: return TTMP_256RegClassID;
+ case OPW512: return TTMP_512RegClassID;
}
}
@@ -659,6 +665,25 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
}
}
+MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
+ using namespace AMDGPU::EncValues;
+
+ assert(Val < 128);
+ assert(Width == OPW256 || Width == OPW512);
+
+ if (Val <= SGPR_MAX) {
+ assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
+ return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
+ }
+
+ int TTmpIdx = getTTmpIdx(Val);
+ if (TTmpIdx >= 0) {
+ return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
+ }
+
+ llvm_unreachable("unknown dst register");
+}
+
MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
using namespace AMDGPU;
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index ce396eb68c4c..75cfc5e11282 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -95,6 +95,8 @@ public:
OPW32,
OPW64,
OPW128,
+ OPW256,
+ OPW512,
OPW16,
OPWV216,
OPW_LAST_,
@@ -110,6 +112,7 @@ public:
MCOperand decodeLiteralConstant() const;
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
+ MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 67663d39967c..bf57f88bef91 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -335,13 +335,13 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
} else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) {
O << 'v';
NumRegs = 8;
- } else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(RegNo)) {
+ } else if (MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo)) {
O << 's';
NumRegs = 8;
} else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo)) {
O << 'v';
NumRegs = 16;
- } else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(RegNo)) {
+ } else if (MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo)) {
O << 's';
NumRegs = 16;
} else {
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td
index 6b7c3ffb7bb8..dd0efef7f91b 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -8,6 +8,26 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// Helpers
+//===----------------------------------------------------------------------===//
+
+class getSubRegs<int size> {
+ list<SubRegIndex> ret2 = [sub0, sub1];
+ list<SubRegIndex> ret3 = [sub0, sub1, sub2];
+ list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3];
+ list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
+ list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3,
+ sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11,
+ sub12, sub13, sub14, sub15];
+
+ list<SubRegIndex> ret = !if(!eq(size, 2), ret2,
+ !if(!eq(size, 3), ret3,
+ !if(!eq(size, 4), ret4,
+ !if(!eq(size, 8), ret8, ret16))));
+}
+
+//===----------------------------------------------------------------------===//
// Declarations that describe the SI registers
//===----------------------------------------------------------------------===//
class SIReg <string n, bits<16> regIdx = 0> : Register<n>,
@@ -141,19 +161,19 @@ def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
}
// SGPR 64-bit registers
-def SGPR_64Regs : RegisterTuples<[sub0, sub1],
+def SGPR_64Regs : RegisterTuples<getSubRegs<2>.ret,
[(add (decimate SGPR_32, 2)),
(add (decimate (shl SGPR_32, 1), 2))]>;
// SGPR 128-bit registers
-def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
+def SGPR_128Regs : RegisterTuples<getSubRegs<4>.ret,
[(add (decimate SGPR_32, 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4))]>;
// SGPR 256-bit registers
-def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+def SGPR_256Regs : RegisterTuples<getSubRegs<8>.ret,
[(add (decimate SGPR_32, 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
@@ -164,8 +184,7 @@ def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
(add (decimate (shl SGPR_32, 7), 4))]>;
// SGPR 512-bit registers
-def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
- sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+def SGPR_512Regs : RegisterTuples<getSubRegs<16>.ret,
[(add (decimate SGPR_32, 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
@@ -190,47 +209,125 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
}
// Trap handler TMP 64-bit registers
-def TTMP_64Regs : RegisterTuples<[sub0, sub1],
+def TTMP_64Regs : RegisterTuples<getSubRegs<2>.ret,
[(add (decimate TTMP_32, 2)),
(add (decimate (shl TTMP_32, 1), 2))]>;
// Trap handler TMP 128-bit registers
-def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
+def TTMP_128Regs : RegisterTuples<getSubRegs<4>.ret,
[(add (decimate TTMP_32, 4)),
(add (decimate (shl TTMP_32, 1), 4)),
(add (decimate (shl TTMP_32, 2), 4)),
(add (decimate (shl TTMP_32, 3), 4))]>;
-class TmpRegTuples <string tgt,
- bit Is64Bit,
- int Index0,
- int Index1 = !add(Index0, 1),
- int Index2 = !add(Index0, !if(Is64Bit, 1, 2)),
- int Index3 = !add(Index0, !if(Is64Bit, 1, 3)),
- string name = "ttmp["#Index0#":"#Index3#"]",
- Register r0 = !cast<Register>("TTMP"#Index0#tgt),
- Register r1 = !cast<Register>("TTMP"#Index1#tgt),
- Register r2 = !cast<Register>("TTMP"#Index2#tgt),
- Register r3 = !cast<Register>("TTMP"#Index3#tgt)> :
- RegisterWithSubRegs<name, !if(Is64Bit, [r0, r1], [r0, r1, r2, r3])> {
- let SubRegIndices = !if(Is64Bit, [sub0, sub1], [sub0, sub1, sub2, sub3]);
- let HWEncoding = r0.HWEncoding;
-}
+def TTMP_256Regs : RegisterTuples<getSubRegs<8>.ret,
+ [(add (decimate TTMP_32, 4)),
+ (add (decimate (shl TTMP_32, 1), 4)),
+ (add (decimate (shl TTMP_32, 2), 4)),
+ (add (decimate (shl TTMP_32, 3), 4)),
+ (add (decimate (shl TTMP_32, 4), 4)),
+ (add (decimate (shl TTMP_32, 5), 4)),
+ (add (decimate (shl TTMP_32, 6), 4)),
+ (add (decimate (shl TTMP_32, 7), 4))]>;
+
+def TTMP_512Regs : RegisterTuples<getSubRegs<16>.ret,
+ [(add (decimate TTMP_32, 4)),
+ (add (decimate (shl TTMP_32, 1), 4)),
+ (add (decimate (shl TTMP_32, 2), 4)),
+ (add (decimate (shl TTMP_32, 3), 4)),
+ (add (decimate (shl TTMP_32, 4), 4)),
+ (add (decimate (shl TTMP_32, 5), 4)),
+ (add (decimate (shl TTMP_32, 6), 4)),
+ (add (decimate (shl TTMP_32, 7), 4)),
+ (add (decimate (shl TTMP_32, 8), 4)),
+ (add (decimate (shl TTMP_32, 9), 4)),
+ (add (decimate (shl TTMP_32, 10), 4)),
+ (add (decimate (shl TTMP_32, 11), 4)),
+ (add (decimate (shl TTMP_32, 12), 4)),
+ (add (decimate (shl TTMP_32, 13), 4)),
+ (add (decimate (shl TTMP_32, 14), 4)),
+ (add (decimate (shl TTMP_32, 15), 4))]>;
+
+class TmpRegTuplesBase<int index, int size,
+ list<Register> subRegs,
+ list<SubRegIndex> indices = getSubRegs<size>.ret,
+ int index1 = !add(index, !add(size, -1)),
+ string name = "ttmp["#index#":"#index1#"]"> :
+ RegisterWithSubRegs<name, subRegs> {
+ let HWEncoding = subRegs[0].HWEncoding;
+ let SubRegIndices = indices;
+}
+
+class TmpRegTuples<string tgt,
+ int size,
+ int index0,
+ int index1 = !add(index0, 1),
+ int index2 = !add(index0, !if(!eq(size, 2), 1, 2)),
+ int index3 = !add(index0, !if(!eq(size, 2), 1, 3)),
+ int index4 = !add(index0, !if(!eq(size, 8), 4, 1)),
+ int index5 = !add(index0, !if(!eq(size, 8), 5, 1)),
+ int index6 = !add(index0, !if(!eq(size, 8), 6, 1)),
+ int index7 = !add(index0, !if(!eq(size, 8), 7, 1)),
+ Register r0 = !cast<Register>("TTMP"#index0#tgt),
+ Register r1 = !cast<Register>("TTMP"#index1#tgt),
+ Register r2 = !cast<Register>("TTMP"#index2#tgt),
+ Register r3 = !cast<Register>("TTMP"#index3#tgt),
+ Register r4 = !cast<Register>("TTMP"#index4#tgt),
+ Register r5 = !cast<Register>("TTMP"#index5#tgt),
+ Register r6 = !cast<Register>("TTMP"#index6#tgt),
+ Register r7 = !cast<Register>("TTMP"#index7#tgt)> :
+ TmpRegTuplesBase<index0, size,
+ !if(!eq(size, 2), [r0, r1],
+ !if(!eq(size, 4), [r0, r1, r2, r3],
+ [r0, r1, r2, r3, r4, r5, r6, r7])),
+ getSubRegs<size>.ret>;
foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in {
- def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 1, Index>;
- def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 1, Index>;
+ def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
+ def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 2, Index>;
}
foreach Index = {0, 4, 8, 12} in {
def TTMP#Index#_TTMP#!add(Index,1)#
_TTMP#!add(Index,2)#
- _TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 0, Index>;
+ _TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 4, Index>;
def TTMP#Index#_TTMP#!add(Index,1)#
_TTMP#!add(Index,2)#
- _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 0, Index>;
+ _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 4, Index>;
}
+foreach Index = {0, 4, 8} in {
+ def TTMP#Index#_TTMP#!add(Index,1)#
+ _TTMP#!add(Index,2)#
+ _TTMP#!add(Index,3)#
+ _TTMP#!add(Index,4)#
+ _TTMP#!add(Index,5)#
+ _TTMP#!add(Index,6)#
+ _TTMP#!add(Index,7)#_vi : TmpRegTuples<"_vi", 8, Index>;
+ def TTMP#Index#_TTMP#!add(Index,1)#
+ _TTMP#!add(Index,2)#
+ _TTMP#!add(Index,3)#
+ _TTMP#!add(Index,4)#
+ _TTMP#!add(Index,5)#
+ _TTMP#!add(Index,6)#
+ _TTMP#!add(Index,7)#_gfx9 : TmpRegTuples<"_gfx9", 8, Index>;
+}
+
+def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi :
+ TmpRegTuplesBase<0, 16,
+ [TTMP0_vi, TTMP1_vi, TTMP2_vi, TTMP3_vi,
+ TTMP4_vi, TTMP5_vi, TTMP6_vi, TTMP7_vi,
+ TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi,
+ TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>;
+
+def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9 :
+ TmpRegTuplesBase<0, 16,
+ [TTMP0_gfx9, TTMP1_gfx9, TTMP2_gfx9, TTMP3_gfx9,
+ TTMP4_gfx9, TTMP5_gfx9, TTMP6_gfx9, TTMP7_gfx9,
+ TTMP8_gfx9, TTMP9_gfx9, TTMP10_gfx9, TTMP11_gfx9,
+ TTMP12_gfx9, TTMP13_gfx9, TTMP14_gfx9, TTMP15_gfx9]>;
+
+
// VGPR 32-bit registers
// i16/f16 only on VI+
def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
@@ -240,25 +337,25 @@ def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
}
// VGPR 64-bit registers
-def VGPR_64 : RegisterTuples<[sub0, sub1],
+def VGPR_64 : RegisterTuples<getSubRegs<2>.ret,
[(add (trunc VGPR_32, 255)),
(add (shl VGPR_32, 1))]>;
// VGPR 96-bit registers
-def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
+def VGPR_96 : RegisterTuples<getSubRegs<3>.ret,
[(add (trunc VGPR_32, 254)),
(add (shl VGPR_32, 1)),
(add (shl VGPR_32, 2))]>;
// VGPR 128-bit registers
-def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+def VGPR_128 : RegisterTuples<getSubRegs<4>.ret,
[(add (trunc VGPR_32, 253)),
(add (shl VGPR_32, 1)),
(add (shl VGPR_32, 2)),
(add (shl VGPR_32, 3))]>;
// VGPR 256-bit registers
-def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+def VGPR_256 : RegisterTuples<getSubRegs<8>.ret,
[(add (trunc VGPR_32, 249)),
(add (shl VGPR_32, 1)),
(add (shl VGPR_32, 2)),
@@ -269,8 +366,7 @@ def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
(add (shl VGPR_32, 7))]>;
// VGPR 512-bit registers
-def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
- sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+def VGPR_512 : RegisterTuples<getSubRegs<16>.ret,
[(add (trunc VGPR_32, 241)),
(add (shl VGPR_32, 1)),
(add (shl VGPR_32, 2)),
@@ -368,13 +464,31 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32,
} // End CopyCost = 2
-def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
+def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> {
+ let AllocationPriority = 11;
+}
+
+def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> {
+ let isAllocatable = 0;
+}
+
+def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
+ (add SGPR_256, TTMP_256)> {
// Requires 4 s_mov_b64 to copy
let CopyCost = 4;
let AllocationPriority = 11;
}
-def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512)> {
+def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512Regs)> {
+ let AllocationPriority = 12;
+}
+
+def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add TTMP_512Regs)> {
+ let isAllocatable = 0;
+}
+
+def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+ (add SGPR_512, TTMP_512)> {
// Requires 8 s_mov_b64 to copy
let CopyCost = 8;
let AllocationPriority = 12;
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 819a7add0be4..125a3b22d0cf 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -667,6 +667,10 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
+ CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
+ CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
}
#define CASE_CI_VI(node) \
diff --git a/lib/Target/ARC/ARCTargetMachine.cpp b/lib/Target/ARC/ARCTargetMachine.cpp
index d2512c281a61..1acae3a88870 100644
--- a/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/lib/Target/ARC/ARCTargetMachine.cpp
@@ -88,8 +88,7 @@ extern "C" void LLVMInitializeARCTarget() {
RegisterTargetMachine<ARCTargetMachine> X(getTheARCTarget());
}
-TargetIRAnalysis ARCTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(ARCTTIImpl(this, F));
- });
+TargetTransformInfo
+ARCTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(ARCTTIImpl(this, F));
}
diff --git a/lib/Target/ARC/ARCTargetMachine.h b/lib/Target/ARC/ARCTargetMachine.h
index 98021b3dc1d5..18117e3409af 100644
--- a/lib/Target/ARC/ARCTargetMachine.h
+++ b/lib/Target/ARC/ARCTargetMachine.h
@@ -40,7 +40,7 @@ public:
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 3aac689c6310..9ffb4c2055f9 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -61,6 +61,7 @@ void initializeARMLoadStoreOptPass(PassRegistry &);
void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
void initializeARMConstantIslandsPass(PassRegistry &);
void initializeARMExpandPseudoPass(PassRegistry &);
+void initializeThumb2SizeReducePass(PassRegistry &);
} // end namespace llvm
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index c1a3f639461d..c9766aa2161a 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -83,6 +83,9 @@ def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true",
"Has v7 clrex instruction">;
+def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true",
+ "Has full data barrier (dfb) instruction">;
+
def FeatureAcquireRelease : SubtargetFeature<"acquire-release",
"HasAcquireRelease", "true",
"Has v8 acquire/release (lda/ldaex "
@@ -617,6 +620,7 @@ def ARMv83a : Architecture<"armv8.3-a", "ARMv83a", [HasV8_3aOps,
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
FeatureDB,
+ FeatureDFB,
FeatureDSP,
FeatureCRC,
FeatureMP,
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 0ea435062ec0..60048d4453d8 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -1416,7 +1416,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
case MVT::i8:
case MVT::i16:
needsExt = true;
- // Intentional fall-through.
+ LLVM_FALLTHROUGH;
case MVT::i32:
if (isThumb2) {
if (!UseImm)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 1b4d7ff50848..aeda7c06a27a 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1041,7 +1041,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (!Subtarget->isThumb1Only())
setOperationAction(ISD::SETCCE, MVT::i32, Custom);
- setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
@@ -1084,20 +1084,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
}
- // Combine sin / cos into one node or libcall if possible.
- if (Subtarget->hasSinCos()) {
- setLibcallName(RTLIB::SINCOS_F32, "sincosf");
- setLibcallName(RTLIB::SINCOS_F64, "sincos");
- if (Subtarget->isTargetWatchABI()) {
- setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
- setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
- }
- if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
- // For iOS, we don't want to the normal expansion of a libcall to
- // sincos. We want to issue a libcall to __sincos_stret.
- setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
- setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
- }
+ // Use __sincos_stret if available.
+ if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
+ getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
}
// FP-ARMv8 implements a lot of rounding-like FP operations.
@@ -1255,6 +1246,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CMOV: return "ARMISD::CMOV";
case ARMISD::SSAT: return "ARMISD::SSAT";
+ case ARMISD::USAT: return "ARMISD::USAT";
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
@@ -3902,6 +3894,10 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
}
+// This function returns three things: the arithmetic computation itself
+// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
+// comparison and the condition code define the case in which the arithmetic
+// computation *does not* overflow.
std::pair<SDValue, SDValue>
ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
SDValue &ARMcc) const {
@@ -3927,7 +3923,11 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
break;
case ISD::UADDO:
ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
- Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
+ // We use ADDC here to correspond to its use in LowerUnsignedALUO.
+ // We do not use it in the USUBO case as Value may not be used.
+ Value = DAG.getNode(ARMISD::ADDC, dl,
+ DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
+ .getValue(0);
OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
break;
case ISD::SSUBO:
@@ -4205,7 +4205,7 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
}
-// Check if two chained conditionals could be converted into SSAT.
+// Check if two chained conditionals could be converted into SSAT or USAT.
//
// SSAT can replace a set of two conditional selectors that bound a number to an
// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
@@ -4216,10 +4216,14 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
// x < k ? (x < -k ? -k : x) : k
// etc.
//
+// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is
+// a power of 2.
+//
// It returns true if the conversion can be done, false otherwise.
-// Additionally, the variable is returned in parameter V and the constant in K.
+// Additionally, the variable is returned in parameter V, the constant in K and
+// usat is set to true if the conditional represents an unsigned saturation
static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
- uint64_t &K) {
+ uint64_t &K, bool &usat) {
SDValue LHS1 = Op.getOperand(0);
SDValue RHS1 = Op.getOperand(1);
SDValue TrueVal1 = Op.getOperand(2);
@@ -4286,13 +4290,23 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
int64_t PosVal = std::max(Val1, Val2);
+ int64_t NegVal = std::min(Val1, Val2);
if (((Val1 > Val2 && UpperCheckOp == &Op) ||
(Val1 < Val2 && UpperCheckOp == &Op2)) &&
- Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {
+ isPowerOf2_64(PosVal + 1)) {
+
+ // Handle the difference between USAT (unsigned) and SSAT (signed) saturation
+ if (Val1 == ~Val2)
+ usat = false;
+ else if (NegVal == 0)
+ usat = true;
+ else
+ return false;
V = V2;
K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
+
return true;
}
@@ -4306,10 +4320,16 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// Try to convert two saturating conditional selects into a single SSAT
SDValue SatValue;
uint64_t SatConstant;
+ bool SatUSat;
if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
- isSaturatingConditional(Op, SatValue, SatConstant))
- return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
- DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
+ isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) {
+ if (SatUSat)
+ return DAG.getNode(ARMISD::USAT, dl, VT, SatValue,
+ DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
+ else
+ return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
+ DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
+ }
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -4506,6 +4526,39 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Dest = Op.getOperand(2);
+ SDLoc dl(Op);
+
+ // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction.
+ unsigned Opc = Cond.getOpcode();
+ if (Cond.getResNo() == 1 && (Opc == ISD::SADDO || Opc == ISD::UADDO ||
+ Opc == ISD::SSUBO || Opc == ISD::USUBO)) {
+ // Only lower legal XALUO ops.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
+ return SDValue();
+
+ // The actual operation with overflow check.
+ SDValue Value, OverflowCmp;
+ SDValue ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
+
+ // Reverse the condition code.
+ ARMCC::CondCodes CondCode =
+ (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
+ CondCode = ARMCC::getOppositeCondition(CondCode);
+ ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
+ OverflowCmp);
+ }
+
+ return SDValue();
+}
+
SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
@@ -4526,6 +4579,33 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction.
+ unsigned Opc = LHS.getOpcode();
+ if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ // Only lower legal XALUO ops.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
+ return SDValue();
+
+ // The actual operation with overflow check.
+ SDValue Value, OverflowCmp;
+ SDValue ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
+
+ if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
+ // Reverse the condition code.
+ ARMCC::CondCodes CondCode =
+ (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
+ CondCode = ARMCC::getOppositeCondition(CondCode);
+ ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
+ }
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
+ OverflowCmp);
+ }
+
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
@@ -7523,10 +7603,9 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
Entry.IsZExt = false;
Args.push_back(Entry);
- const char *LibcallName =
- (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
RTLIB::Libcall LC =
- (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
+ (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
+ const char *LibcallName = getLibcallName(LC);
CallingConv::ID CC = getLibcallCallingConv(LC);
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
@@ -7782,6 +7861,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
@@ -13751,7 +13831,7 @@ Instruction *ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
case AtomicOrdering::SequentiallyConsistent:
if (!Inst->hasAtomicStore())
return nullptr; // Nothing to do
- /*FALLTHROUGH*/
+ LLVM_FALLTHROUGH;
case AtomicOrdering::Release:
case AtomicOrdering::AcquireRelease:
if (Subtarget->preferISHSTBarriers())
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 0a1af8d89f9b..bf63dfae4407 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -87,6 +87,7 @@ class VectorType;
CMOV, // ARM conditional move instructions.
SSAT, // Signed saturation
+ USAT, // Unsigned saturation
BCC_i64,
@@ -643,6 +644,7 @@ class VectorType;
SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 4e13af596300..eb8526bfeadf 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -139,6 +139,8 @@ def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
+def ARMusatnoshift : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>;
+
def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
@@ -278,6 +280,9 @@ def HasDSP : Predicate<"Subtarget->hasDSP()">,
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
AssemblerPredicate<"FeatureDB",
"data-barriers">;
+def HasDFB : Predicate<"Subtarget->hasFullDataBarrier()">,
+ AssemblerPredicate<"FeatureDFB",
+ "full-data-barrier">;
def HasV7Clrex : Predicate<"Subtarget->hasV7Clrex()">,
AssemblerPredicate<"FeatureV7Clrex",
"v7 clrex">;
@@ -3832,6 +3837,8 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos),
(USAT imm0_31:$pos, GPRnopc:$a, 0)>;
def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
(SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
+def : ARMPat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm),
+ (USAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos),
(SSAT16 imm1_16:$pos, GPRnopc:$a)>;
def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos),
@@ -5846,6 +5853,8 @@ include "ARMInstrNEON.td"
def : InstAlias<"dmb", (DMB 0xf), 0>, Requires<[IsARM, HasDB]>;
def : InstAlias<"dsb", (DSB 0xf), 0>, Requires<[IsARM, HasDB]>;
def : InstAlias<"isb", (ISB 0xf), 0>, Requires<[IsARM, HasDB]>;
+// Armv8-R 'Data Full Barrier'
+def : InstAlias<"dfb", (DSB 0xc), 1>, Requires<[IsARM, HasDFB]>;
// System instructions
def : MnemonicAlias<"swi", "svc">;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 670ed127da7e..4592249f5795 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -2336,6 +2336,8 @@ def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn),
def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
(t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
+def : T2Pat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm),
+ (t2USAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos),
(t2SSAT imm1_32:$pos, GPR:$a, 0)>;
def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos),
@@ -4506,6 +4508,8 @@ def : t2InstAlias<"tst${p} $Rn, $Rm",
def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p), 0>, Requires<[HasDB]>;
def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p), 0>, Requires<[HasDB]>;
def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p), 0>, Requires<[HasDB]>;
+// Armv8-R 'Data Full Barrier'
+def : InstAlias<"dfb${p}", (t2DSB 0xc, pred:$p), 1>, Requires<[HasDFB]>;
// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
// width specifier.
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 6bbeae2e1151..b0fd0b476920 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -669,13 +669,22 @@ bool ARMInstructionSelector::select(MachineInstr &I,
return true;
}
+ using namespace TargetOpcode;
+ if (I.getOpcode() == G_CONSTANT) {
+ // Pointer constants should be treated the same as 32-bit integer constants.
+ // Change the type and let TableGen handle it.
+ unsigned ResultReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(ResultReg);
+ if (Ty.isPointer())
+ MRI.setType(ResultReg, LLT::scalar(32));
+ }
+
if (selectImpl(I, CoverageInfo))
return true;
MachineInstrBuilder MIB{MF, I};
bool isSExt = false;
- using namespace TargetOpcode;
switch (I.getOpcode()) {
case G_SEXT:
isSExt = true;
@@ -741,6 +750,31 @@ bool ARMInstructionSelector::select(MachineInstr &I,
const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+ if (SrcRegBank.getID() == ARM::FPRRegBankID) {
+ // This should only happen in the obscure case where we have put a 64-bit
+ // integer into a D register. Get it out of there and keep only the
+ // interesting part.
+ assert(I.getOpcode() == G_TRUNC && "Unsupported operand for G_ANYEXT");
+ assert(DstRegBank.getID() == ARM::GPRRegBankID &&
+ "Unsupported combination of register banks");
+ assert(MRI.getType(SrcReg).getSizeInBits() == 64 && "Unsupported size");
+ assert(MRI.getType(DstReg).getSizeInBits() <= 32 && "Unsupported size");
+
+ unsigned IgnoredBits = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ auto InsertBefore = std::next(I.getIterator());
+ auto MovI =
+ BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::VMOVRRD))
+ .addDef(DstReg)
+ .addDef(IgnoredBits)
+ .addUse(SrcReg)
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI))
+ return false;
+
+ MIB->eraseFromParent();
+ return true;
+ }
+
if (SrcRegBank.getID() != DstRegBank.getID()) {
DEBUG(dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n");
return false;
@@ -754,6 +788,28 @@ bool ARMInstructionSelector::select(MachineInstr &I,
I.setDesc(TII.get(COPY));
return selectCopy(I, TII, MRI, TRI, RBI);
}
+ case G_INTTOPTR:
+ case G_PTRTOINT: {
+ auto SrcReg = I.getOperand(1).getReg();
+ auto DstReg = I.getOperand(0).getReg();
+
+ const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+ const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+
+ if (SrcRegBank.getID() != DstRegBank.getID()) {
+ DEBUG(dbgs()
+ << "G_INTTOPTR/G_PTRTOINT operands on different register banks\n");
+ return false;
+ }
+
+ if (SrcRegBank.getID() != ARM::GPRRegBankID) {
+ DEBUG(dbgs() << "G_INTTOPTR/G_PTRTOINT on non-GPR not supported yet\n");
+ return false;
+ }
+
+ I.setDesc(TII.get(COPY));
+ return selectCopy(I, TII, MRI, TRI, RBI);
+ }
case G_SELECT:
return selectSelect(MIB, MRI);
case G_ICMP: {
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 2dd1dff64e87..8cff1f0869d0 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -126,6 +126,12 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, s32}, Legal);
}
+ setAction({G_INTTOPTR, p0}, Legal);
+ setAction({G_INTTOPTR, 1, s32}, Legal);
+
+ setAction({G_PTRTOINT, s32}, Legal);
+ setAction({G_PTRTOINT, 1, p0}, Legal);
+
for (unsigned Op : {G_ASHR, G_LSHR, G_SHL})
setAction({Op, s32}, Legal);
@@ -139,6 +145,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({G_BRCOND, s1}, Legal);
setAction({G_CONSTANT, s32}, Legal);
+ setAction({G_CONSTANT, p0}, Legal);
setLegalizeScalarToDifferentSizeStrategy(G_CONSTANT, 0, widen_1_8_16);
setAction({G_ICMP, s1}, Legal);
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index b32bfd449544..fad0e98285e6 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -226,12 +226,30 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_SEXT:
case G_ZEXT:
case G_ANYEXT:
- case G_TRUNC:
case G_GEP:
+ case G_INTTOPTR:
+ case G_PTRTOINT:
// FIXME: We're abusing the fact that everything lives in a GPR for now; in
// the real world we would use different mappings.
OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
break;
+ case G_TRUNC: {
+ // In some cases we may end up with a G_TRUNC from a 64-bit value to a
+ // 32-bit value. This isn't a real floating point trunc (that would be a
+ // G_FPTRUNC). Instead it is an integer trunc in disguise, which can appear
+ // because the legalizer doesn't distinguish between integer and floating
+ // point values so it may leave some 64-bit integers un-narrowed. Until we
+ // have a more principled solution that doesn't let such things sneak all
+ // the way to this point, just map the source to a DPR and the destination
+ // to a GPR.
+ LLT LargeTy = MRI.getType(MI.getOperand(1).getReg());
+ OperandsMapping =
+ LargeTy.getSizeInBits() <= 32
+ ? &ARM::ValueMappings[ARM::GPR3OpsIdx]
+ : getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::DPR3OpsIdx]});
+ break;
+ }
case G_LOAD:
case G_STORE: {
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 4d4a88126ce6..23027e92481f 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -348,11 +348,6 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
return SchedModel.MispredictPenalty;
}
-bool ARMSubtarget::hasSinCos() const {
- return isTargetWatchOS() ||
- (isTargetIOS() && !getTargetTriple().isOSVersionLT(7, 0));
-}
-
bool ARMSubtarget::enableMachineScheduler() const {
// Enable the MachineScheduler before register allocation for subtargets
// with the use-misched feature.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 9301197e1387..eedb675a3304 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -236,6 +236,10 @@ protected:
/// instructions.
bool HasDataBarrier = false;
+ /// HasFullDataBarrier - True if the subtarget supports DFB data barrier
+ /// instruction.
+ bool HasFullDataBarrier = false;
+
/// HasV7Clrex - True if the subtarget supports CLREX instructions
bool HasV7Clrex = false;
@@ -544,6 +548,7 @@ public:
bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; }
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
bool hasDataBarrier() const { return HasDataBarrier; }
+ bool hasFullDataBarrier() const { return HasFullDataBarrier; }
bool hasV7Clrex() const { return HasV7Clrex; }
bool hasAcquireRelease() const { return HasAcquireRelease; }
@@ -712,10 +717,6 @@ public:
unsigned getMispredictionPenalty() const;
- /// This function returns true if the target has sincos() routine in its
- /// compiler runtime or math libraries.
- bool hasSinCos() const;
-
/// Returns true if machine scheduler should be enabled.
bool enableMachineScheduler() const override;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 51982b2dab14..0f6d1eddc985 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -92,6 +92,7 @@ extern "C" void LLVMInitializeARMTarget() {
initializeARMConstantIslandsPass(Registry);
initializeARMExecutionDepsFixPass(Registry);
initializeARMExpandPseudoPass(Registry);
+ initializeThumb2SizeReducePass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -282,10 +283,9 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
return I.get();
}
-TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(ARMTTIImpl(this, F));
- });
+TargetTransformInfo
+ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(ARMTTIImpl(this, F));
}
ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 655ec3202bfb..2072bb731f0a 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -53,8 +53,7 @@ public:
const ARMSubtarget *getSubtargetImpl() const = delete;
bool isLittleEndian() const { return isLittle; }
- /// \brief Get the TargetIRAnalysis for this target.
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index cae01e415eff..43d7888075b5 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -394,25 +394,6 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
return 1;
}
-int ARMTTIImpl::getFPOpCost(Type *Ty) {
- // Use similar logic that's in ARMISelLowering:
- // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
- // to VFP.
-
- if (ST->hasVFP2() && !ST->isThumb1Only()) {
- if (Ty->isFloatTy()) {
- return TargetTransformInfo::TCC_Basic;
- }
-
- if (Ty->isDoubleTy()) {
- return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive :
- TargetTransformInfo::TCC_Basic;
- }
- }
-
- return TargetTransformInfo::TCC_Expensive;
-}
-
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
// We only handle costs of reverse and alternate shuffles for now.
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 99353a3219a0..cd9fa0709020 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -156,8 +156,6 @@ public:
int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
const SCEV *Ptr);
- int getFPOpCost(Type *Ty);
-
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 26fda5f22b4f..97b642c99f80 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -5581,11 +5581,11 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
CanAcceptPredicationCode =
Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" &&
Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" &&
- Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" &&
- Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" &&
- Mnemonic != "ldc2" && Mnemonic != "ldc2l" && Mnemonic != "stc2" &&
- Mnemonic != "stc2l" && !Mnemonic.startswith("rfe") &&
- !Mnemonic.startswith("srs");
+ Mnemonic != "dmb" && Mnemonic != "dfb" && Mnemonic != "dsb" &&
+ Mnemonic != "isb" && Mnemonic != "pld" && Mnemonic != "pli" &&
+ Mnemonic != "pldw" && Mnemonic != "ldc2" && Mnemonic != "ldc2l" &&
+ Mnemonic != "stc2" && Mnemonic != "stc2l" &&
+ !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs");
} else if (isThumbOne()) {
if (hasV6MOps())
CanAcceptPredicationCode = Mnemonic != "movs";
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index a29a2eeccfe8..53c635877675 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -2386,6 +2386,7 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
case ARM::VLD4q32_UPD:
if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
return MCDisassembler::Fail;
+ break;
default:
break;
}
@@ -3326,6 +3327,7 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
case ARM::t2STRs:
if (Rn == 15)
return MCDisassembler::Fail;
+ break;
default:
break;
}
@@ -3391,6 +3393,7 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
break;
case ARM::t2LDRSBs:
Inst.setOpcode(ARM::t2PLIs);
+ break;
default:
break;
}
@@ -3854,6 +3857,7 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
case ARM::t2STRHi12:
if (Rn == 15)
return MCDisassembler::Fail;
+ break;
default:
break;
}
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 3920c73fba6a..5357e26856ea 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -45,6 +45,7 @@
using namespace llvm;
#define DEBUG_TYPE "t2-reduce-size"
+#define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
@@ -162,7 +163,7 @@ namespace {
const Thumb2InstrInfo *TII;
const ARMSubtarget *STI;
- Thumb2SizeReduce(std::function<bool(const Function &)> Ftor);
+ Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr);
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -172,7 +173,7 @@ namespace {
}
StringRef getPassName() const override {
- return "Thumb2 instruction size reduction pass";
+ return THUMB2_SIZE_REDUCE_NAME;
}
private:
@@ -237,6 +238,9 @@ namespace {
} // end anonymous namespace
+INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false,
+ false)
+
Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
: MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
OptimizeSize = MinimizeSize = false;
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
index 6f81e020b996..1f4ef098403d 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
+++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
@@ -56,7 +56,7 @@ void BPFInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
O << getRegisterName(Op.getReg());
} else if (Op.isImm()) {
- O << (int32_t)Op.getImm();
+ O << formatImm((int32_t)Op.getImm());
} else {
assert(Op.isExpr() && "Expected an expression");
printExpr(Op.getExpr(), O);
@@ -76,9 +76,9 @@ void BPFInstPrinter::printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O,
if (OffsetOp.isImm()) {
auto Imm = OffsetOp.getImm();
if (Imm >= 0)
- O << " + " << formatDec(Imm);
+ O << " + " << formatImm(Imm);
else
- O << " - " << formatDec(-Imm);
+ O << " - " << formatImm(-Imm);
} else {
assert(0 && "Expected an immediate");
}
@@ -88,7 +88,7 @@ void BPFInstPrinter::printImm64Operand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm())
- O << (uint64_t)Op.getImm();
+ O << formatImm(Op.getImm());
else if (Op.isExpr())
printExpr(Op.getExpr(), O);
else
@@ -100,7 +100,7 @@ void BPFInstPrinter::printBrTargetOperand(const MCInst *MI, unsigned OpNo,
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm()) {
int16_t Imm = Op.getImm();
- O << ((Imm >= 0) ? "+" : "") << Imm;
+ O << ((Imm >= 0) ? "+" : "") << formatImm(Imm);
} else if (Op.isExpr()) {
printExpr(Op.getExpr(), O);
} else {
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 537f97c9a987..8b6c571dee02 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -756,11 +756,11 @@ struct ShuffleMask {
ShuffleMask lo() const {
size_t H = Mask.size()/2;
- return ShuffleMask({Mask.data(), H});
+ return ShuffleMask(Mask.take_front(H));
}
ShuffleMask hi() const {
size_t H = Mask.size()/2;
- return ShuffleMask({Mask.data()+H, H});
+ return ShuffleMask(Mask.take_back(H));
}
};
@@ -836,15 +836,6 @@ namespace llvm {
};
}
-// Return a submask of A that is shorter than A by |C| elements:
-// - if C > 0, return a submask of A that starts at position C,
-// - if C <= 0, return a submask of A that starts at 0 (reduce A by |C|).
-static ArrayRef<int> subm(ArrayRef<int> A, int C) {
- if (C > 0)
- return { A.data()+C, A.size()-C };
- return { A.data(), A.size()+C };
-}
-
static void splitMask(ArrayRef<int> Mask, MutableArrayRef<int> MaskL,
MutableArrayRef<int> MaskR) {
unsigned VecLen = Mask.size();
@@ -910,21 +901,38 @@ bool HvxSelector::selectVectorConstants(SDNode *N) {
// Since they are generated during the selection process, the main
// selection algorithm is not aware of them. Select them directly
// here.
- if (!N->isMachineOpcode() && N->getOpcode() == ISD::LOAD) {
- SDValue Addr = cast<LoadSDNode>(N)->getBasePtr();
- unsigned AddrOpc = Addr.getOpcode();
- if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP) {
- if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool) {
- ISel.Select(N);
- return true;
- }
+ SmallVector<SDNode*,4> Loads;
+ SmallVector<SDNode*,16> WorkQ;
+
+ // The DAG can change (due to CSE) during selection, so cache all the
+ // unselected nodes first to avoid traversing a mutating DAG.
+
+ auto IsLoadToSelect = [] (SDNode *N) {
+ if (!N->isMachineOpcode() && N->getOpcode() == ISD::LOAD) {
+ SDValue Addr = cast<LoadSDNode>(N)->getBasePtr();
+ unsigned AddrOpc = Addr.getOpcode();
+ if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP)
+ if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return true;
}
+ return false;
+ };
+
+ WorkQ.push_back(N);
+ for (unsigned i = 0; i != WorkQ.size(); ++i) {
+ SDNode *W = WorkQ[i];
+ if (IsLoadToSelect(W)) {
+ Loads.push_back(W);
+ continue;
+ }
+ for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
+ WorkQ.push_back(W->getOperand(j).getNode());
}
- bool Selected = false;
- for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
- Selected = selectVectorConstants(N->getOperand(I).getNode()) || Selected;
- return Selected;
+ for (SDNode *L : Loads)
+ ISel.Select(L);
+
+ return !Loads.empty();
}
void HvxSelector::materialize(const ResultStack &Results) {
@@ -1159,8 +1167,8 @@ OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
ResultStack &Results) {
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
size_t S = Bytes.size() / 2;
- OpRef L = vmuxs({Bytes.data(), S}, OpRef::lo(Va), OpRef::lo(Vb), Results);
- OpRef H = vmuxs({Bytes.data()+S, S}, OpRef::hi(Va), OpRef::hi(Vb), Results);
+ OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results);
+ OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results);
return concat(L, H, Results);
}
@@ -1435,7 +1443,7 @@ OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
return OpRef::fail();
// Examine the rest of the mask.
for (int I = L; I < N; I += L) {
- auto S = findStrip(subm(SM.Mask,I), 1, N-I);
+ auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
// Check whether the mask element at the beginning of each strip
// increases by 2L each time.
if (S.first - Strip.first != 2*I)
@@ -1465,7 +1473,7 @@ OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
std::pair<int,unsigned> PrevS = Strip;
bool Flip = false;
for (int I = L; I < N; I += L) {
- auto S = findStrip(subm(SM.Mask,I), 1, N-I);
+ auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
if (S.second != PrevS.second)
return OpRef::fail();
int Diff = Flip ? PrevS.first - S.first + 2*L
@@ -1524,7 +1532,7 @@ OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
// First, check the non-ignored strips.
for (int I = 2*L; I < 2*N; I += 2*L) {
- auto S = findStrip(subm(SM.Mask,I), 1, N-I);
+ auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
if (S.second != unsigned(L))
return OpRef::fail();
if (2*S.first != I)
@@ -1532,7 +1540,7 @@ OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
}
// Check the -1s.
for (int I = L; I < 2*N; I += 2*L) {
- auto S = findStrip(subm(SM.Mask,I), 0, N-I);
+ auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
if (S.first != -1 || S.second != unsigned(L))
return OpRef::fail();
}
@@ -1666,8 +1674,8 @@ OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
if (!isPowerOf2_32(X))
return OpRef::fail();
// Check the other segments of Mask.
- for (int J = 0; J < VecLen; J += I) {
- if (XorPow2(subm(SM.Mask, -J), I) != X)
+ for (int J = I; J < VecLen; J += I) {
+ if (XorPow2(SM.Mask.slice(J, I), I) != X)
return OpRef::fail();
}
Perm[Log2_32(X)] = Log2_32(I)-1;
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 586363335df1..0e0da2ddc400 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -761,11 +761,13 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Promote the value if needed.
switch (VA.getLocInfo()) {
default:
- // Loc info must be one of Full, SExt, ZExt, or AExt.
+ // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
llvm_unreachable("Unknown loc info!");
- case CCValAssign::BCvt:
case CCValAssign::Full:
break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getBitcast(VA.getLocVT(), Arg);
+ break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
break;
@@ -1135,6 +1137,8 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
unsigned VReg =
RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ RegVT = VA.getValVT();
SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
// Treat values of type MVT::i1 specially: they are passed in
// registers of type i32, but they need to remain as values of
@@ -1155,6 +1159,8 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
unsigned VReg =
RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ RegVT = VA.getValVT();
InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
// Single Vector
@@ -1715,8 +1721,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa
addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba
addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
- addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass);
addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass);
addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass);
addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
@@ -1735,6 +1741,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
+ // These "short" boolean vector types should be legal because
+ // they will appear as results of vector compares. If they were
+ // not legal, type legalization would try to make them legal
+ // and that would require using operations that do not use or
+ // produce such types. That, in turn, would imply using custom
+ // nodes, which would be unoptimizable by the DAG combiner.
+ // The idea is to rely on target-independent operations as much
+ // as possible.
addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
@@ -1964,9 +1978,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
// Types natively supported:
- for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1,
- MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32,
- MVT::v2i32, MVT::v1i64}) {
+ for (MVT NativeVT : {MVT::v32i1, MVT::v64i1, MVT::v4i8, MVT::v8i8, MVT::v2i16,
+ MVT::v4i16, MVT::v1i32, MVT::v2i32, MVT::v1i64}) {
setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom);
@@ -1992,63 +2005,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
AddPromotedToType(Opc, FromTy, ToTy);
};
- if (Subtarget.useHVXOps()) {
- bool Use64b = Subtarget.useHVX64BOps();
- ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
- ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
- MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
- MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
-
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, ByteW, Legal);
- setOperationAction(ISD::AND, ByteV, Legal);
- setOperationAction(ISD::OR, ByteV, Legal);
- setOperationAction(ISD::XOR, ByteV, Legal);
-
- for (MVT T : LegalV) {
- setIndexedLoadAction(ISD::POST_INC, T, Legal);
- setIndexedStoreAction(ISD::POST_INC, T, Legal);
-
- setOperationAction(ISD::ADD, T, Legal);
- setOperationAction(ISD::SUB, T, Legal);
- setOperationAction(ISD::VSELECT, T, Legal);
- if (T != ByteV) {
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
- setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
- }
-
- setOperationAction(ISD::MUL, T, Custom);
- setOperationAction(ISD::SETCC, T, Custom);
- setOperationAction(ISD::BUILD_VECTOR, T, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
- if (T != ByteV)
- setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
- }
-
- for (MVT T : LegalV) {
- if (T == ByteV)
- continue;
- // Promote all shuffles and concats to operate on vectors of bytes.
- setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
- setPromoteTo(ISD::CONCAT_VECTORS, T, ByteV);
- setPromoteTo(ISD::AND, T, ByteV);
- setPromoteTo(ISD::OR, T, ByteV);
- setPromoteTo(ISD::XOR, T, ByteV);
- }
-
- for (MVT T : LegalW) {
- if (T == ByteW)
- continue;
- // Promote all shuffles and concats to operate on vectors of bytes.
- setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
- setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW);
- }
- }
-
// Subtarget-specific operation actions.
//
if (Subtarget.hasV5TOps()) {
@@ -2110,6 +2066,67 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setIndexedStoreAction(ISD::POST_INC, VT, Legal);
}
+ if (Subtarget.useHVXOps()) {
+ bool Use64b = Subtarget.useHVX64BOps();
+ ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
+ ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
+ MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
+ MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, ByteW, Legal);
+ setOperationAction(ISD::AND, ByteV, Legal);
+ setOperationAction(ISD::OR, ByteV, Legal);
+ setOperationAction(ISD::XOR, ByteV, Legal);
+
+ for (MVT T : LegalV) {
+ setIndexedLoadAction(ISD::POST_INC, T, Legal);
+ setIndexedStoreAction(ISD::POST_INC, T, Legal);
+
+ setOperationAction(ISD::ADD, T, Legal);
+ setOperationAction(ISD::SUB, T, Legal);
+ if (T != ByteV) {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
+ }
+
+ setOperationAction(ISD::MUL, T, Custom);
+ setOperationAction(ISD::SETCC, T, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, T, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
+ if (T != ByteV)
+ setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
+ }
+
+ for (MVT T : LegalV) {
+ if (T == ByteV)
+ continue;
+ // Promote all shuffles and concats to operate on vectors of bytes.
+ setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
+ setPromoteTo(ISD::CONCAT_VECTORS, T, ByteV);
+ setPromoteTo(ISD::AND, T, ByteV);
+ setPromoteTo(ISD::OR, T, ByteV);
+ setPromoteTo(ISD::XOR, T, ByteV);
+ }
+
+ for (MVT T : LegalW) {
+ // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
+ // independent) handling of it would convert it to a load, which is
+ // not always the optimal choice.
+ setOperationAction(ISD::BUILD_VECTOR, T, Custom);
+
+ if (T == ByteW)
+ continue;
+ // Promote all shuffles and concats to operate on vectors of bytes.
+ setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
+ setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW);
+ }
+ }
+
computeRegisterProperties(&HRI);
//
@@ -2256,6 +2273,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0";
case HexagonISD::VROR: return "HexagonISD::VROR";
case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
+ case HexagonISD::VZERO: return "HexagonISD::VZERO";
case HexagonISD::OP_END: break;
}
return nullptr;
@@ -2331,14 +2349,27 @@ bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
TargetLoweringBase::LegalizeTypeAction
HexagonTargetLowering::getPreferredVectorAction(EVT VT) const {
+ if (VT.getVectorNumElements() == 1)
+ return TargetLoweringBase::TypeScalarizeVector;
+
+ // Always widen vectors of i1.
+ MVT ElemTy = VT.getSimpleVT().getVectorElementType();
+ if (ElemTy == MVT::i1)
+ return TargetLoweringBase::TypeWidenVector;
+
if (Subtarget.useHVXOps()) {
// If the size of VT is at least half of the vector length,
// widen the vector. Note: the threshold was not selected in
// any scientific way.
- if (VT.getSizeInBits() >= Subtarget.getVectorLength()*8/2)
- return TargetLoweringBase::TypeWidenVector;
+ ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
+ if (llvm::find(Tys, ElemTy) != Tys.end()) {
+ unsigned HwWidth = 8*Subtarget.getVectorLength();
+ unsigned VecWidth = VT.getSizeInBits();
+ if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
+ return TargetLoweringBase::TypeWidenVector;
+ }
}
- return TargetLowering::getPreferredVectorAction(VT);
+ return TargetLoweringBase::TypeSplitVector;
}
// Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors
@@ -2463,21 +2494,43 @@ HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, dl, VT, Result);
}
+bool
+HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
+ MVT VecTy, SelectionDAG &DAG,
+ MutableArrayRef<ConstantInt*> Consts) const {
+ MVT ElemTy = VecTy.getVectorElementType();
+ unsigned ElemWidth = ElemTy.getSizeInBits();
+ IntegerType *IntTy = IntegerType::get(*DAG.getContext(), ElemWidth);
+ bool AllConst = true;
+
+ for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+ SDValue V = Values[i];
+ if (V.isUndef()) {
+ Consts[i] = ConstantInt::get(IntTy, 0);
+ continue;
+ }
+ if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
+ const ConstantInt *CI = CN->getConstantIntValue();
+ Consts[i] = const_cast<ConstantInt*>(CI);
+ } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
+ const ConstantFP *CF = CN->getConstantFPValue();
+ APInt A = CF->getValueAPF().bitcastToAPInt();
+ Consts[i] = ConstantInt::get(IntTy, A.getZExtValue());
+ } else {
+ AllConst = false;
+ }
+ }
+ return AllConst;
+}
+
SDValue
HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
MVT VecTy, SelectionDAG &DAG) const {
MVT ElemTy = VecTy.getVectorElementType();
assert(VecTy.getVectorNumElements() == Elem.size());
- SmallVector<ConstantSDNode*,4> Consts;
- bool AllConst = true;
- for (SDValue V : Elem) {
- if (isUndef(V))
- V = DAG.getConstant(0, dl, ElemTy);
- auto *C = dyn_cast<ConstantSDNode>(V.getNode());
- Consts.push_back(C);
- AllConst = AllConst && C != nullptr;
- }
+ SmallVector<ConstantInt*,4> Consts(Elem.size());
+ bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
unsigned First, Num = Elem.size();
for (First = 0; First != Num; ++First)
@@ -2486,6 +2539,10 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
if (First == Num)
return DAG.getUNDEF(VecTy);
+ if (AllConst &&
+ llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
+ return getZero(dl, VecTy, DAG);
+
if (ElemTy == MVT::i16) {
assert(Elem.size() == 2);
if (AllConst) {
@@ -2498,45 +2555,55 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
return DAG.getBitcast(MVT::v2i16, N);
}
- // First try generating a constant.
- assert(ElemTy == MVT::i8 && Num == 4);
- if (AllConst) {
- int32_t V = (Consts[0]->getZExtValue() & 0xFF) |
- (Consts[1]->getZExtValue() & 0xFF) << 8 |
- (Consts[1]->getZExtValue() & 0xFF) << 16 |
- Consts[2]->getZExtValue() << 24;
- return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
- }
+ if (ElemTy == MVT::i8) {
+ // First try generating a constant.
+ if (AllConst) {
+ int32_t V = (Consts[0]->getZExtValue() & 0xFF) |
+ (Consts[1]->getZExtValue() & 0xFF) << 8 |
+ (Consts[1]->getZExtValue() & 0xFF) << 16 |
+ Consts[2]->getZExtValue() << 24;
+ return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
+ }
- // Then try splat.
- bool IsSplat = true;
- for (unsigned i = 0; i != Num; ++i) {
- if (i == First)
- continue;
- if (Elem[i] == Elem[First] || isUndef(Elem[i]))
- continue;
- IsSplat = false;
- break;
- }
- if (IsSplat)
- return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Elem[First]);
+ // Then try splat.
+ bool IsSplat = true;
+ for (unsigned i = 0; i != Num; ++i) {
+ if (i == First)
+ continue;
+ if (Elem[i] == Elem[First] || isUndef(Elem[i]))
+ continue;
+ IsSplat = false;
+ break;
+ }
+ if (IsSplat) {
+ // Legalize the operand to VSPLAT.
+ SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
+ return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext);
+ }
- // Generate
- // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
- // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
- SDValue S8 = DAG.getConstant(8, dl, MVT::i32);
- SDValue V0 = DAG.getZeroExtendInReg(Elem[0], dl, MVT::i8);
- SDValue V1 = DAG.getZeroExtendInReg(Elem[1], dl, MVT::i8);
- SDValue V2 = DAG.getZeroExtendInReg(Elem[2], dl, MVT::i8);
- SDValue V3 = DAG.getZeroExtendInReg(Elem[3], dl, MVT::i8);
+ // Generate
+ // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
+ // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
+ assert(Elem.size() == 4);
+ SDValue Vs[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32);
+ Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8);
+ }
+ SDValue S8 = DAG.getConstant(8, dl, MVT::i32);
+ SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8});
+ SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8});
+ SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
+ SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});
- SDValue V4 = DAG.getNode(ISD::SHL, dl, MVT::i32, {V1, S8});
- SDValue V5 = DAG.getNode(ISD::SHL, dl, MVT::i32, {V3, S8});
- SDValue V6 = DAG.getNode(ISD::OR, dl, MVT::i32, {V0, V4});
- SDValue V7 = DAG.getNode(ISD::OR, dl, MVT::i32, {V2, V5});
+ SDValue R = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
+ return DAG.getBitcast(MVT::v4i8, R);
+ }
- SDValue T0 = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, {V7, V6}, DAG);
- return DAG.getBitcast(MVT::v4i8, T0);
+#ifndef NDEBUG
+ dbgs() << "VecTy: " << EVT(VecTy).getEVTString() << '\n';
+#endif
+ llvm_unreachable("Unexpected vector element type");
}
SDValue
@@ -2545,15 +2612,8 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
MVT ElemTy = VecTy.getVectorElementType();
assert(VecTy.getVectorNumElements() == Elem.size());
- SmallVector<ConstantSDNode*,8> Consts;
- bool AllConst = true;
- for (SDValue V : Elem) {
- if (isUndef(V))
- V = DAG.getConstant(0, dl, ElemTy);
- auto *C = dyn_cast<ConstantSDNode>(V.getNode());
- Consts.push_back(C);
- AllConst = AllConst && C != nullptr;
- }
+ SmallVector<ConstantInt*,8> Consts(Elem.size());
+ bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
unsigned First, Num = Elem.size();
for (First = 0; First != Num; ++First)
@@ -2562,6 +2622,10 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
if (First == Num)
return DAG.getUNDEF(VecTy);
+ if (AllConst &&
+ llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
+ return getZero(dl, VecTy, DAG);
+
// First try splat if possible.
if (ElemTy == MVT::i16) {
bool IsSplat = true;
@@ -2573,8 +2637,11 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
IsSplat = false;
break;
}
- if (IsSplat)
- return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Elem[First]);
+ if (IsSplat) {
+ // Legalize the operand to VSPLAT.
+ SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
+ return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext);
+ }
}
// Then try constant.
@@ -2593,10 +2660,10 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2);
SDValue L = (ElemTy == MVT::i32)
? Elem[0]
- : buildVector32({Elem.data(), Num/2}, dl, HalfTy, DAG);
+ : buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG);
SDValue H = (ElemTy == MVT::i32)
? Elem[1]
- : buildVector32({Elem.data()+Num/2, Num/2}, dl, HalfTy, DAG);
+ : buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG);
return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, {H, L});
}
@@ -2696,21 +2763,41 @@ HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
}
SDValue
+HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
+ const {
+ if (Ty.isVector()) {
+ assert(Ty.isInteger() && "Only integer vectors are supported here");
+ unsigned W = Ty.getSizeInBits();
+ if (W <= 64)
+ return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
+ return DAG.getNode(HexagonISD::VZERO, dl, Ty);
+ }
+
+ if (Ty.isInteger())
+ return DAG.getConstant(0, dl, Ty);
+ if (Ty.isFloatingPoint())
+ return DAG.getConstantFP(0.0, dl, Ty);
+ llvm_unreachable("Invalid type for zero");
+}
+
+SDValue
HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
MVT VecTy = ty(Op);
unsigned BW = VecTy.getSizeInBits();
+
+ if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy, true))
+ return LowerHvxBuildVector(Op, DAG);
+
if (BW == 32 || BW == 64) {
+ const SDLoc &dl(Op);
SmallVector<SDValue,8> Ops;
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
Ops.push_back(Op.getOperand(i));
if (BW == 32)
- return buildVector32(Ops, SDLoc(Op), VecTy, DAG);
- return buildVector64(Ops, SDLoc(Op), VecTy, DAG);
+ return buildVector32(Ops, dl, VecTy, DAG);
+ return buildVector64(Ops, dl, VecTy, DAG);
}
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
- return LowerHvxBuildVector(Op, DAG);
-
return SDValue();
}
@@ -2822,7 +2909,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
#ifndef NDEBUG
Op.getNode()->dumpr(&DAG);
if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
- errs() << "Check for a non-legal type in this operation\n";
+ errs() << "Error: check for a non-legal type in this operation\n";
#endif
llvm_unreachable("Should not custom lower this!");
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 0619e2e4e7f9..732834b464b4 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -70,6 +70,7 @@ namespace HexagonISD {
EH_RETURN,
DCFETCH,
READCYCLE,
+ VZERO,
OP_END
};
@@ -283,6 +284,9 @@ namespace HexagonISD {
}
private:
+ bool getBuildVectorConstInts(ArrayRef<SDValue> Values, MVT VecTy,
+ SelectionDAG &DAG,
+ MutableArrayRef<ConstantInt*> Consts) const;
SDValue buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy,
SelectionDAG &DAG) const;
SDValue buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy,
@@ -301,6 +305,7 @@ namespace HexagonISD {
SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops);
return SDValue(N, 0);
}
+ SDValue getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) const;
using VectorPair = std::pair<SDValue, SDValue>;
using TypePair = std::pair<MVT, MVT>;
@@ -344,6 +349,13 @@ namespace HexagonISD {
SDValue getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1,
ArrayRef<int> Mask, SelectionDAG &DAG) const;
+ MVT getVecBoolVT() const;
+
+ SDValue buildHvxVectorSingle(ArrayRef<SDValue> Values, const SDLoc &dl,
+ MVT VecTy, SelectionDAG &DAG) const;
+ SDValue buildHvxVectorPred(ArrayRef<SDValue> Values, const SDLoc &dl,
+ MVT VecTy, SelectionDAG &DAG) const;
+
SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index c1d44cb0e7de..51480d09d734 100644
--- a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -141,49 +141,50 @@ HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
opCastElem(Op1, MVT::i8, DAG), ByteMask);
}
+MVT
+HexagonTargetLowering::getVecBoolVT() const {
+ return MVT::getVectorVT(MVT::i1, 8*Subtarget.getVectorLength());
+}
+
SDValue
-HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
- const {
- const SDLoc &dl(Op);
- BuildVectorSDNode *BN = cast<BuildVectorSDNode>(Op.getNode());
- bool IsConst = BN->isConstant();
+HexagonTargetLowering::buildHvxVectorSingle(ArrayRef<SDValue> Values,
+ const SDLoc &dl, MVT VecTy,
+ SelectionDAG &DAG) const {
+ unsigned VecLen = Values.size();
MachineFunction &MF = DAG.getMachineFunction();
- MVT VecTy = ty(Op);
+ MVT ElemTy = VecTy.getVectorElementType();
+ unsigned ElemWidth = ElemTy.getSizeInBits();
+ unsigned HwLen = Subtarget.getVectorLength();
- if (IsConst) {
- SmallVector<Constant*, 128> Elems;
- for (SDValue V : BN->op_values()) {
- if (auto *C = dyn_cast<ConstantSDNode>(V.getNode()))
- Elems.push_back(const_cast<ConstantInt*>(C->getConstantIntValue()));
- }
- Constant *CV = ConstantVector::get(Elems);
- unsigned Align = VecTy.getSizeInBits() / 8;
+ SmallVector<ConstantInt*, 128> Consts(VecLen);
+ bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
+ if (AllConst) {
+ if (llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
+ return getZero(dl, VecTy, DAG);
+
+ ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
+ (Constant**)Consts.end());
+ Constant *CV = ConstantVector::get(Tmp);
+ unsigned Align = HwLen;
SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG);
return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(MF), Align);
}
- unsigned NumOps = Op.getNumOperands();
- unsigned HwLen = Subtarget.getVectorLength();
- unsigned ElemSize = VecTy.getVectorElementType().getSizeInBits() / 8;
- assert(ElemSize*NumOps == HwLen);
-
+ unsigned ElemSize = ElemWidth / 8;
+ assert(ElemSize*VecLen == HwLen);
SmallVector<SDValue,32> Words;
- SmallVector<SDValue,32> Ops;
- for (unsigned i = 0; i != NumOps; ++i)
- Ops.push_back(Op.getOperand(i));
if (VecTy.getVectorElementType() != MVT::i32) {
- assert(ElemSize < 4 && "vNi64 should have been promoted to vNi32");
assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
- for (unsigned i = 0; i != NumOps; i += OpsPerWord) {
- SDValue W = buildVector32({&Ops[i], OpsPerWord}, dl, PartVT, DAG);
+ for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
+ SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
Words.push_back(DAG.getBitcast(MVT::i32, W));
}
} else {
- Words.assign(Ops.begin(), Ops.end());
+ Words.assign(Values.begin(), Values.end());
}
// Construct two halves in parallel, then or them together.
@@ -208,6 +209,83 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
}
SDValue
+HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
+ const SDLoc &dl, MVT VecTy,
+ SelectionDAG &DAG) const {
+ // Construct a vector V of bytes, such that a comparison V >u 0 would
+ // produce the required vector predicate.
+ unsigned VecLen = Values.size();
+ unsigned HwLen = Subtarget.getVectorLength();
+ assert(VecLen <= HwLen || VecLen == 8*HwLen);
+ SmallVector<SDValue,128> Bytes;
+
+ if (VecLen <= HwLen) {
+ // In the hardware, each bit of a vector predicate corresponds to a byte
+ // of a vector register. Calculate how many bytes does a bit of VecTy
+ // correspond to.
+ assert(HwLen % VecLen == 0);
+ unsigned BitBytes = HwLen / VecLen;
+ for (SDValue V : Values) {
+ SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
+ : DAG.getConstant(0, dl, MVT::i8);
+ for (unsigned B = 0; B != BitBytes; ++B)
+ Bytes.push_back(Ext);
+ }
+ } else {
+ // There are as many i1 values, as there are bits in a vector register.
+ // Divide the values into groups of 8 and check that each group consists
+ // of the same value (ignoring undefs).
+ for (unsigned I = 0; I != VecLen; I += 8) {
+ unsigned B = 0;
+ // Find the first non-undef value in this group.
+ for (; B != 8; ++B) {
+ if (!Values[I+B].isUndef())
+ break;
+ }
+ SDValue F = Values[I+B];
+ SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
+ : DAG.getConstant(0, dl, MVT::i8);
+ Bytes.push_back(Ext);
+ // Verify that the rest of values in the group are the same as the
+ // first.
+ for (; B != 8; ++B)
+ assert(Values[I+B].isUndef() || Values[I+B] == F);
+ }
+ }
+
+ MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+ SDValue ByteVec = buildHvxVectorSingle(Bytes, dl, ByteTy, DAG);
+ SDValue Cmp = DAG.getSetCC(dl, VecTy, ByteVec, getZero(dl, ByteTy, DAG),
+ ISD::SETUGT);
+ return Cmp;
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
+ const {
+ const SDLoc &dl(Op);
+ MVT VecTy = ty(Op);
+
+ unsigned Size = Op.getNumOperands();
+ SmallVector<SDValue,128> Ops;
+ for (unsigned i = 0; i != Size; ++i)
+ Ops.push_back(Op.getOperand(i));
+
+ if (VecTy.getVectorElementType() == MVT::i1)
+ return buildHvxVectorPred(Ops, dl, VecTy, DAG);
+
+ if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
+ ArrayRef<SDValue> A(Ops);
+ MVT SingleTy = typeSplit(VecTy).first;
+ SDValue V0 = buildHvxVectorSingle(A.take_front(Size/2), dl, SingleTy, DAG);
+ SDValue V1 = buildHvxVectorSingle(A.drop_front(Size/2), dl, SingleTy, DAG);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
+ }
+
+ return buildHvxVectorSingle(Ops, dl, VecTy, DAG);
+}
+
+SDValue
HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
const {
// Change the type of the extracted element to i32.
@@ -399,6 +477,10 @@ HexagonTargetLowering::LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
// (negate (swap-op NewCmp)),
// the condition code for the NewCmp should be calculated from the original
// CC by applying these operations in the reverse order.
+ //
+ // This could also be done through setCondCodeAction, but for negation it
+ // uses a xor with a vector of -1s, which it obtains from BUILD_VECTOR.
+ // That is far too expensive for what can be done with a single instruction.
switch (CC) {
case ISD::SETNE: // !eq
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index e2120d3de2ef..cdc2085986a5 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -2899,6 +2899,8 @@ def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,
def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>;
+def SDTVecLeaf: SDTypeProfile<1, 0, [SDTCisVec<0>]>;
+
def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2,
[SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>;
def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>;
@@ -2920,7 +2922,14 @@ let Predicates = [UseHVX] in {
def: OpR_RR_pat<V6_vpackoh, pf2<HexagonVPACKO>, VecI16, HVI16>;
}
+def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>;
+def vzero: PatFrag<(ops), (HexagonVZERO)>;
+
let Predicates = [UseHVX] in {
+ def: Pat<(VecI8 vzero), (V6_vd0)>;
+ def: Pat<(VecI16 vzero), (V6_vd0)>;
+ def: Pat<(VecI32 vzero), (V6_vd0)>;
+
def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)),
(Combinev HvxVR:$Vt, HvxVR:$Vs)>;
def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)),
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
index 2ceed70c2497..1d1e85e7ac7e 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -242,7 +242,7 @@ def VecQ32
// FIXME: the register order should be defined in terms of the preferred
// allocation order...
//
-def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
+def IntRegs : RegisterClass<"Hexagon", [i32, f32, v32i1, v4i8, v2i16], 32,
(add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28),
R10, R11, R29, R30, R31)>;
@@ -254,7 +254,8 @@ def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32,
def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
(add R7, R6, R5, R4, R3, R2, R1, R0)> ;
-def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
+def DoubleRegs : RegisterClass<"Hexagon",
+ [i64, f64, v64i1, v8i8, v4i16, v2i32], 64,
(add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>;
def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64,
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 678ef210d0ae..af93f20d97fc 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -204,14 +204,38 @@ public:
llvm_unreachable("Invalid HVX vector length settings");
}
- bool isHVXVectorType(MVT VecTy) const {
+ ArrayRef<MVT> getHVXElementTypes() const {
+ static MVT Types[] = { MVT::i8, MVT::i16, MVT::i32 };
+ return makeArrayRef(Types);
+ }
+
+ bool isHVXVectorType(MVT VecTy, bool IncludeBool = false) const {
if (!VecTy.isVector() || !useHVXOps())
return false;
- unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
- if (ElemWidth < 8 || ElemWidth > 64)
+ MVT ElemTy = VecTy.getVectorElementType();
+ if (!IncludeBool && ElemTy == MVT::i1)
+ return false;
+
+ unsigned HwLen = getVectorLength();
+ unsigned NumElems = VecTy.getVectorNumElements();
+ ArrayRef<MVT> ElemTypes = getHVXElementTypes();
+
+ if (IncludeBool && ElemTy == MVT::i1) {
+ // Special case for the v512i1, etc.
+ if (8*HwLen == NumElems)
+ return true;
+ // Boolean HVX vector types are formed from regular HVX vector types
+ // by replacing the element type with i1.
+ for (MVT T : ElemTypes)
+ if (NumElems * T.getSizeInBits() == 8*HwLen)
+ return true;
return false;
+ }
+
unsigned VecWidth = VecTy.getSizeInBits();
- return VecWidth == 8*getVectorLength() || VecWidth == 16*getVectorLength();
+ if (VecWidth != 8*HwLen && VecWidth != 16*HwLen)
+ return false;
+ return llvm::any_of(ElemTypes, [ElemTy] (MVT T) { return ElemTy == T; });
}
unsigned getL1CacheLineSize() const;
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 0c40a7b8f382..363b703fef28 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -258,10 +258,9 @@ void HexagonTargetMachine::adjustPassManager(PassManagerBuilder &PMB) {
});
}
-TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(HexagonTTIImpl(this, F));
- });
+TargetTransformInfo
+HexagonTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(HexagonTTIImpl(this, F));
}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index acd41f920b53..a7c6a3437fbc 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -39,7 +39,7 @@ public:
void adjustPassManager(PassManagerBuilder &PMB) override;
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
HexagonTargetObjectFile *getObjFileLowering() const override {
return static_cast<HexagonTargetObjectFile*>(TLOF.get());
diff --git a/lib/Target/Lanai/LanaiTargetMachine.cpp b/lib/Target/Lanai/LanaiTargetMachine.cpp
index 9a73c95d6516..2c21a53b13bb 100644
--- a/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -74,10 +74,9 @@ LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT,
initAsmInfo();
}
-TargetIRAnalysis LanaiTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(LanaiTTIImpl(this, F));
- });
+TargetTransformInfo
+LanaiTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(LanaiTTIImpl(this, F));
}
namespace {
diff --git a/lib/Target/Lanai/LanaiTargetMachine.h b/lib/Target/Lanai/LanaiTargetMachine.h
index 2fb1a0536104..0db286ec13e7 100644
--- a/lib/Target/Lanai/LanaiTargetMachine.h
+++ b/lib/Target/Lanai/LanaiTargetMachine.h
@@ -42,7 +42,7 @@ public:
return &Subtarget;
}
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &pass_manager) override;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index ac81e6207456..2f6dd0035de3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -188,7 +188,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
// so we have to special check for them.
unsigned Opcode = TmpInst.getOpcode();
if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) &&
- (Opcode != Mips::SLL_MM) && !Binary)
+ (Opcode != Mips::SLL_MM) && (Opcode != Mips::SLL_MMR6) && !Binary)
llvm_unreachable("unimplemented opcode in encodeInstruction()");
int NewOpcode = -1;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 50537bed8ff0..c85ee20273c0 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -38,7 +38,7 @@ class MipsRegWithSubRegs<bits<16> Enc, string n, list<Register> subregs>
let Namespace = "Mips";
}
-// Mips CPU Registers
+// Mips CPU Registers.
class MipsGPRReg<bits<16> Enc, string n> : MipsReg<Enc, n>;
// Mips 64-bit CPU Registers
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 85193bffef56..fb79a4bf40c5 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -259,17 +259,16 @@ void MipsPassConfig::addPreRegAlloc() {
addPass(createMipsOptimizePICCallPass());
}
-TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- if (Subtarget->allowMixed16_32()) {
- DEBUG(errs() << "No Target Transform Info Pass Added\n");
- // FIXME: This is no longer necessary as the TTI returned is per-function.
- return TargetTransformInfo(F.getParent()->getDataLayout());
- }
-
- DEBUG(errs() << "Target Transform Info Pass Added\n");
- return TargetTransformInfo(BasicTTIImpl(this, F));
- });
+TargetTransformInfo
+MipsTargetMachine::getTargetTransformInfo(const Function &F) {
+ if (Subtarget->allowMixed16_32()) {
+ DEBUG(errs() << "No Target Transform Info Pass Added\n");
+ // FIXME: This is no longer necessary as the TTI returned is per-function.
+ return TargetTransformInfo(F.getParent()->getDataLayout());
+ }
+
+ DEBUG(errs() << "Target Transform Info Pass Added\n");
+ return TargetTransformInfo(BasicTTIImpl(this, F));
}
// Implemented by targets that want to run passes immediately before
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index ccfc9a938d9c..56e6e5d8daa2 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -44,7 +44,7 @@ public:
CodeGenOpt::Level OL, bool JIT, bool isLittle);
~MipsTargetMachine() override;
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
const MipsSubtarget *getSubtargetImpl() const {
if (Subtarget)
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 85f757878f94..d31e1cb5047b 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -180,10 +180,9 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
});
}
-TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(NVPTXTTIImpl(this, F));
- });
+TargetTransformInfo
+NVPTXTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(NVPTXTTIImpl(this, F));
}
void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 54a72a688ee3..eeebf64d39c3 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -63,7 +63,7 @@ public:
void adjustPassManager(PassManagerBuilder &) override;
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
bool isMachineVerifierClean() const override {
return false;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index c870a2256691..7902da20a010 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1531,11 +1531,11 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
- DebugLoc dl;
- if (MBBI != MBB.end())
- dl = MBBI->getDebugLoc();
+ // If we got this far a first terminator should exist.
+ assert(MBBI != MBB.end() && "Failed to find the first terminator.");
+ DebugLoc dl = MBBI->getDebugLoc();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
// Create branch instruction for pseudo tail call return instruction
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 18e567fa589c..cea59de3e8a9 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11882,6 +11882,12 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
SDLoc dl(N);
SDValue Op(N, 0);
+ // Don't handle ppc_fp128 here or i1 conversions.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDValue();
+ if (Op.getOperand(0).getValueType() == MVT::i1)
+ return SDValue();
+
SDValue FirstOperand(Op.getOperand(0));
bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
(FirstOperand.getValueType() == MVT::i8 ||
@@ -11910,11 +11916,6 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
}
- // Don't handle ppc_fp128 here or i1 conversions.
- if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
- return SDValue();
- if (Op.getOperand(0).getValueType() == MVT::i1)
- return SDValue();
// For i32 intermediate values, unfortunately, the conversion functions
// leave the upper 32 bits of the value are undefined. Within the set of
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index a2640727f813..474661aaaee8 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1025,9 +1025,6 @@ bool PPCMIPeephole::eliminateRedundantTOCSaves(
// bge 0, .LBB0_4
bool PPCMIPeephole::eliminateRedundantCompare(void) {
- // FIXME: this transformation is causing miscompiles. Disabling it for now
- // until we can resolve the issue.
- return false;
bool Simplified = false;
for (MachineBasicBlock &MBB2 : *MF) {
@@ -1087,10 +1084,21 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) {
// we replace it with a signed comparison if the comparison
// to be merged is a signed comparison.
// In other cases of opcode mismatch, we cannot optimize this.
- if (isEqOrNe(BI2) &&
+
+ // We cannot change opcode when comparing against an immediate
+ // if the most significant bit of the immediate is one
+ // due to the difference in sign extension.
+ auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
+ if (!I->getOperand(2).isImm())
+ return false;
+ int16_t Imm = (int16_t)I->getOperand(2).getImm();
+ return Imm < 0;
+ };
+
+ if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
NewOpCode = CMPI1->getOpcode();
- else if (isEqOrNe(BI1) &&
+ else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
NewOpCode = CMPI2->getOpcode();
else continue;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 491f25ca2c64..20a83c973026 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -451,8 +451,7 @@ void PPCPassConfig::addPreEmitPass() {
addPass(createPPCBranchSelectionPass(), false);
}
-TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(PPCTTIImpl(this, F));
- });
+TargetTransformInfo
+PPCTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(PPCTTIImpl(this, F));
}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 102bf7ca59c2..75b98a815ab4 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -49,7 +49,7 @@ public:
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index e74d68182949..3a167a6d452a 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -257,8 +257,7 @@ TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
return new SystemZPassConfig(*this, PM);
}
-TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(SystemZTTIImpl(this, F));
- });
+TargetTransformInfo
+SystemZTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(SystemZTTIImpl(this, F));
}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 95ad5e339e0b..52bf8bba55de 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -44,7 +44,7 @@ public:
// Override LLVMTargetMachine
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index ad63c7a9cb30..c4c0dd22ee0c 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -219,10 +219,8 @@ CodeGenOpt::Level TargetMachine::getOptLevel() const { return OptLevel; }
void TargetMachine::setOptLevel(CodeGenOpt::Level Level) { OptLevel = Level; }
-TargetIRAnalysis TargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([](const Function &F) {
- return TargetTransformInfo(F.getParent()->getDataLayout());
- });
+TargetTransformInfo TargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(F.getParent()->getDataLayout());
}
void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
@@ -244,3 +242,10 @@ MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV) const {
getNameWithPrefix(NameStr, GV, TLOF->getMangler());
return TLOF->getContext().getOrCreateSymbol(NameStr);
}
+
+TargetIRAnalysis TargetMachine::getTargetIRAnalysis() {
+ // Since Analysis can't depend on Target, use a std::function to invert the
+ // dependency.
+ return TargetIRAnalysis(
+ [this](const Function &F) { return this->getTargetTransformInfo(F); });
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 2bdba96ab674..a4bb967f36f6 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -746,6 +746,14 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ // Disable the TEE optimization if we aren't doing direct wasm object
+ // emission, because lowering TEE to TEE_LOCAL is done in the ExplicitLocals
+ // pass, which is also disabled.
+ bool UseTee = true;
+ if (MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple().isOSBinFormatELF())
+ UseTee = false;
+
// Walk the instructions from the bottom up. Currently we don't look past
// block boundaries, and the blocks aren't ordered so the block visitation
// order isn't significant, but we may want to change this in the future.
@@ -811,7 +819,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
Insert =
RematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
LIS, MFI, MRI, TII, TRI);
- } else if (CanMove &&
+ } else if (UseTee && CanMove &&
OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
MRI, TII);
diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index 2599064334ee..f808c063d7e4 100644
--- a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -223,6 +223,8 @@ RuntimeLibcallSignatures[RTLIB::UNKNOWN_LIBCALL] = {
/* SINCOS_F80 */ unsupported,
/* SINCOS_F128 */ func_i64_i64_iPTR_iPTR,
/* SINCOS_PPCF128 */ unsupported,
+/* SINCOS_STRET_F32 */ unsupported,
+/* SINCOS_STRET_F64 */ unsupported,
/* POW_F32 */ f32_func_f32_f32,
/* POW_F64 */ f64_func_f64_f64,
/* POW_F80 */ unsupported,
@@ -390,8 +392,9 @@ RuntimeLibcallSignatures[RTLIB::UNKNOWN_LIBCALL] = {
// MEMORY
/* MEMCPY */ iPTR_func_iPTR_iPTR_iPTR,
-/* MEMSET */ iPTR_func_iPTR_i32_iPTR,
/* MEMMOVE */ iPTR_func_iPTR_iPTR_iPTR,
+/* MEMSET */ iPTR_func_iPTR_i32_iPTR,
+/* BZERO */ unsupported,
// ELEMENT-WISE ATOMIC MEMORY
/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported,
@@ -687,6 +690,8 @@ RuntimeLibcallNames[RTLIB::UNKNOWN_LIBCALL] = {
/* SINCOS_F80 */ nullptr,
/* SINCOS_F128 */ "sincosl",
/* SINCOS_PPCF128 */ nullptr,
+/* SINCOS_STRET_F32 */ nullptr,
+/* SINCOS_STRET_F64 */ nullptr,
/* POW_F32 */ "powf",
/* POW_F64 */ "pow",
/* POW_F80 */ nullptr,
@@ -850,6 +855,7 @@ RuntimeLibcallNames[RTLIB::UNKNOWN_LIBCALL] = {
/* MEMCPY */ "memcpy",
/* MEMMOVE */ "memset",
/* MEMSET */ "memmove",
+/* BZERO */ nullptr,
/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr,
/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr,
/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr,
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 3cc19ef5fbab..d38cde74d2ec 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -146,10 +146,9 @@ public:
};
} // end anonymous namespace
-TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(WebAssemblyTTIImpl(this, F));
- });
+TargetTransformInfo
+WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(WebAssemblyTTIImpl(this, F));
}
TargetPassConfig *
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
index 224849526514..dd826befd117 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
@@ -43,8 +43,7 @@ public:
return TLOF.get();
}
- /// \brief Get the TargetIRAnalysis for this target.
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
bool usesPhysRegsForPEI() const override { return false; }
};
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 78385ae1877b..239db2a74b24 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -78,7 +78,7 @@ public:
CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" &&
- CPU != "c3" && CPU != "c3-2" && CPU != "lakemont";
+ CPU != "c3" && CPU != "c3-2" && CPU != "lakemont" && CPU != "";
}
unsigned getNumFixupKinds() const override {
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 08731cd0204c..7e7c35569093 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -137,7 +137,7 @@ def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
-def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1",
+def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
"true",
"Prefetch with Intent to Write and T1 Hint">;
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
@@ -263,6 +263,12 @@ def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
def FeatureSoftFloat
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software floating point features.">;
+// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
+// using a variable mask over multiple fixed shuffles.
+def FeatureFastVariableShuffle
+ : SubtargetFeature<"fast-variable-shuffle",
+ "HasFastVariableShuffle",
+ "true", "Shuffles with variable masks are fast">;
// On some X86 processors, there is no performance hazard to writing only the
// lower parts of a YMM or ZMM register without clearing the upper part.
def FeatureFastPartialYMMorZMMWrite
@@ -620,7 +626,8 @@ def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
FeatureERMSB,
FeatureFMA,
FeatureLZCNT,
- FeatureMOVBE
+ FeatureMOVBE,
+ FeatureFastVariableShuffle
]>;
class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
@@ -632,7 +639,8 @@ def : HaswellProc<"core-avx2">; // Legacy alias.
def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
FeatureADX,
- FeatureRDSEED
+ FeatureRDSEED,
+ FeaturePRFCHW
]>;
class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
BDWFeatures.Value, [
@@ -669,7 +677,8 @@ def KNLFeatures : ProcessorFeatures<IVBFeatures.Value, [
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
- FeatureFMA
+ FeatureFMA,
+ FeaturePRFCHW
]>;
// FIXME: define KNL model
diff --git a/lib/Target/X86/X86DomainReassignment.cpp b/lib/Target/X86/X86DomainReassignment.cpp
index 0a87fb4533c2..ba7280c29cc9 100644
--- a/lib/Target/X86/X86DomainReassignment.cpp
+++ b/lib/Target/X86/X86DomainReassignment.cpp
@@ -301,60 +301,21 @@ typedef DenseMap<InstrConverterBaseKeyTy, InstrConverterBase *>
/// different closure that manipulates the loaded or stored value.
class Closure {
private:
- const TargetInstrInfo *TII;
- MachineRegisterInfo *MRI;
-
/// Virtual registers in the closure.
DenseSet<unsigned> Edges;
/// Instructions in the closure.
SmallVector<MachineInstr *, 8> Instrs;
- /// A map of available Instruction Converters.
- const InstrConverterBaseMap &Converters;
-
- /// The register domain of this closure.
- RegDomain Domain;
-
/// Domains which this closure can legally be reassigned to.
std::bitset<NumDomains> LegalDstDomains;
- /// Enqueue \p Reg to be considered for addition to the closure.
- void visitRegister(unsigned Reg, SmallVectorImpl<unsigned> &Worklist);
-
- /// Add \p MI to this closure.
- void encloseInstr(MachineInstr *MI);
-
- /// Calculate the total cost of reassigning the closure to \p Domain.
- double calculateCost(RegDomain Domain) const;
-
- /// All edges that are included in some closure.
- DenseSet<unsigned> &EnclosedEdges;
-
- /// All instructions that are included in some closure.
- DenseMap<MachineInstr *, Closure *> &EnclosedInstrs;
-
public:
- Closure(const TargetInstrInfo *TII, MachineRegisterInfo *MRI,
- const InstrConverterBaseMap &Converters,
- std::initializer_list<RegDomain> LegalDstDomainList,
- DenseSet<unsigned> &EnclosedEdges,
- DenseMap<MachineInstr *, Closure *> &EnclosedInstrs)
- : TII(TII), MRI(MRI), Converters(Converters), Domain(NoDomain),
- EnclosedEdges(EnclosedEdges), EnclosedInstrs(EnclosedInstrs) {
+ Closure(std::initializer_list<RegDomain> LegalDstDomainList) {
for (RegDomain D : LegalDstDomainList)
LegalDstDomains.set(D);
}
- /// Starting from \Reg, expand the closure as much as possible.
- void buildClosure(unsigned E);
-
- /// /returns true if it is profitable to reassign the closure to \p Domain.
- bool isReassignmentProfitable(RegDomain Domain) const;
-
- /// Reassign the closure to \p Domain.
- void Reassign(RegDomain Domain) const;
-
/// Mark this closure as illegal for reassignment to all domains.
void setAllIllegal() { LegalDstDomains.reset(); }
@@ -364,10 +325,41 @@ public:
/// \returns true if is legal to reassign this closure to domain \p RD.
bool isLegal(RegDomain RD) const { return LegalDstDomains[RD]; }
+ /// Mark this closure as illegal for reassignment to domain \p RD.
+ void setIllegal(RegDomain RD) { LegalDstDomains[RD] = false; }
+
bool empty() const { return Edges.empty(); }
+
+ bool insertEdge(unsigned Reg) {
+ return Edges.insert(Reg).second;
+ }
+
+ using const_edge_iterator = DenseSet<unsigned>::const_iterator;
+ iterator_range<const_edge_iterator> edges() const {
+ return iterator_range<const_edge_iterator>(Edges.begin(), Edges.end());
+ }
+
+ void addInstruction(MachineInstr *I) {
+ Instrs.push_back(I);
+ }
+
+ ArrayRef<MachineInstr *> instructions() const {
+ return Instrs;
+ }
+
};
class X86DomainReassignment : public MachineFunctionPass {
+ const X86Subtarget *STI;
+ MachineRegisterInfo *MRI;
+ const X86InstrInfo *TII;
+
+ /// All edges that are included in some closure
+ DenseSet<unsigned> EnclosedEdges;
+
+ /// All instructions that are included in some closure.
+ DenseMap<MachineInstr *, Closure *> EnclosedInstrs;
+
public:
static char ID;
@@ -387,22 +379,39 @@ public:
}
private:
- const X86Subtarget *STI;
- MachineRegisterInfo *MRI;
- const X86InstrInfo *TII;
-
/// A map of available Instruction Converters.
InstrConverterBaseMap Converters;
/// Initialize Converters map.
void initConverters();
+
+ /// Starting from \Reg, expand the closure as much as possible.
+ void buildClosure(Closure &, unsigned Reg);
+
+ /// Enqueue \p Reg to be considered for addition to the closure.
+ void visitRegister(Closure &, unsigned Reg, RegDomain &Domain,
+ SmallVectorImpl<unsigned> &Worklist);
+
+ /// Reassign the closure to \p Domain.
+ void reassign(const Closure &C, RegDomain Domain) const;
+
+ /// Add \p MI to the closure.
+ void encloseInstr(Closure &C, MachineInstr *MI);
+
+ /// /returns true if it is profitable to reassign the closure to \p Domain.
+ bool isReassignmentProfitable(const Closure &C, RegDomain Domain) const;
+
+ /// Calculate the total cost of reassigning the closure to \p Domain.
+ double calculateCost(const Closure &C, RegDomain Domain) const;
};
char X86DomainReassignment::ID = 0;
} // End anonymous namespace.
-void Closure::visitRegister(unsigned Reg, SmallVectorImpl<unsigned> &Worklist) {
+void X86DomainReassignment::visitRegister(Closure &C, unsigned Reg,
+ RegDomain &Domain,
+ SmallVectorImpl<unsigned> &Worklist) {
if (EnclosedEdges.count(Reg))
return;
@@ -423,59 +432,61 @@ void Closure::visitRegister(unsigned Reg, SmallVectorImpl<unsigned> &Worklist) {
Worklist.push_back(Reg);
}
-void Closure::encloseInstr(MachineInstr *MI) {
+void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) {
auto I = EnclosedInstrs.find(MI);
if (I != EnclosedInstrs.end()) {
- if (I->second != this)
+ if (I->second != &C)
// Instruction already belongs to another closure, avoid conflicts between
// closure and mark this closure as illegal.
- setAllIllegal();
+ C.setAllIllegal();
return;
}
- EnclosedInstrs[MI] = this;
- Instrs.push_back(MI);
+ EnclosedInstrs[MI] = &C;
+ C.addInstruction(MI);
// Mark closure as illegal for reassignment to domains, if there is no
// converter for the instruction or if the converter cannot convert the
// instruction.
- for (unsigned i = 0; i != LegalDstDomains.size(); ++i) {
- if (LegalDstDomains[i]) {
+ for (int i = 0; i != NumDomains; ++i) {
+ if (C.isLegal((RegDomain)i)) {
InstrConverterBase *IC = Converters.lookup({i, MI->getOpcode()});
if (!IC || !IC->isLegal(MI, TII))
- LegalDstDomains[i] = false;
+ C.setIllegal((RegDomain)i);
}
}
}
-double Closure::calculateCost(RegDomain DstDomain) const {
- assert(isLegal(DstDomain) && "Cannot calculate cost for illegal closure");
+double X86DomainReassignment::calculateCost(const Closure &C,
+ RegDomain DstDomain) const {
+ assert(C.isLegal(DstDomain) && "Cannot calculate cost for illegal closure");
double Cost = 0.0;
- for (auto MI : Instrs)
+ for (auto *MI : C.instructions())
Cost +=
Converters.lookup({DstDomain, MI->getOpcode()})->getExtraCost(MI, MRI);
return Cost;
}
-bool Closure::isReassignmentProfitable(RegDomain Domain) const {
- return calculateCost(Domain) < 0.0;
+bool X86DomainReassignment::isReassignmentProfitable(const Closure &C,
+ RegDomain Domain) const {
+ return calculateCost(C, Domain) < 0.0;
}
-void Closure::Reassign(RegDomain Domain) const {
- assert(isLegal(Domain) && "Cannot convert illegal closure");
+void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const {
+ assert(C.isLegal(Domain) && "Cannot convert illegal closure");
// Iterate all instructions in the closure, convert each one using the
// appropriate converter.
SmallVector<MachineInstr *, 8> ToErase;
- for (auto MI : Instrs)
+ for (auto *MI : C.instructions())
if (Converters.lookup({Domain, MI->getOpcode()})
->convertInstr(MI, TII, MRI))
ToErase.push_back(MI);
// Iterate all registers in the closure, replace them with registers in the
// destination domain.
- for (unsigned Reg : Edges) {
+ for (unsigned Reg : C.edges()) {
MRI->setRegClass(Reg, getDstRC(MRI->getRegClass(Reg), Domain));
for (auto &MO : MRI->use_operands(Reg)) {
if (MO.isReg())
@@ -512,18 +523,19 @@ static bool usedAsAddr(const MachineInstr &MI, unsigned Reg,
return false;
}
-void Closure::buildClosure(unsigned Reg) {
+void X86DomainReassignment::buildClosure(Closure &C, unsigned Reg) {
SmallVector<unsigned, 4> Worklist;
- visitRegister(Reg, Worklist);
+ RegDomain Domain = NoDomain;
+ visitRegister(C, Reg, Domain, Worklist);
while (!Worklist.empty()) {
unsigned CurReg = Worklist.pop_back_val();
// Register already in this closure.
- if (!Edges.insert(CurReg).second)
+ if (!C.insertEdge(CurReg))
continue;
MachineInstr *DefMI = MRI->getVRegDef(CurReg);
- encloseInstr(DefMI);
+ encloseInstr(C, DefMI);
// Add register used by the defining MI to the worklist.
// Do not add registers which are used in address calculation, they will be
@@ -542,7 +554,7 @@ void Closure::buildClosure(unsigned Reg) {
auto &Op = DefMI->getOperand(OpIdx);
if (!Op.isReg() || !Op.isUse())
continue;
- visitRegister(Op.getReg(), Worklist);
+ visitRegister(C, Op.getReg(), Domain, Worklist);
}
// Expand closure through register uses.
@@ -550,10 +562,10 @@ void Closure::buildClosure(unsigned Reg) {
// We would like to avoid converting closures which calculare addresses,
// as this should remain in GPRs.
if (usedAsAddr(UseMI, CurReg, TII)) {
- setAllIllegal();
+ C.setAllIllegal();
continue;
}
- encloseInstr(&UseMI);
+ encloseInstr(C, &UseMI);
for (auto &DefOp : UseMI.defs()) {
if (!DefOp.isReg())
@@ -561,10 +573,10 @@ void Closure::buildClosure(unsigned Reg) {
unsigned DefReg = DefOp.getReg();
if (!TargetRegisterInfo::isVirtualRegister(DefReg)) {
- setAllIllegal();
+ C.setAllIllegal();
continue;
}
- visitRegister(DefReg, Worklist);
+ visitRegister(C, DefReg, Domain, Worklist);
}
}
}
@@ -701,8 +713,8 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
initConverters();
bool Changed = false;
- DenseSet<unsigned> EnclosedEdges;
- DenseMap<MachineInstr *, Closure *> EnclosedInstrs;
+ EnclosedEdges.clear();
+ EnclosedInstrs.clear();
std::vector<Closure> Closures;
@@ -719,9 +731,8 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
continue;
// Calculate closure starting with Reg.
- Closure C(TII, MRI, Converters, {MaskDomain}, EnclosedEdges,
- EnclosedInstrs);
- C.buildClosure(Reg);
+ Closure C({MaskDomain});
+ buildClosure(C, Reg);
// Collect all closures that can potentially be converted.
if (!C.empty() && C.isLegal(MaskDomain))
@@ -729,8 +740,8 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
}
for (Closure &C : Closures)
- if (C.isReassignmentProfitable(MaskDomain)) {
- C.Reassign(MaskDomain);
+ if (isReassignmentProfitable(C, MaskDomain)) {
+ reassign(C, MaskDomain);
++NumClosuresConverted;
Changed = true;
}
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index a6c7c5f22a3a..660c1eff3c4b 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -106,14 +106,15 @@ namespace {
if (Base_Reg.getNode())
Base_Reg.getNode()->dump();
else
- dbgs() << "nul";
- dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
- << " Scale" << Scale << '\n'
+ dbgs() << "nul\n";
+ if (BaseType == FrameIndexBase)
+ dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
+ dbgs() << " Scale " << Scale << '\n'
<< "IndexReg ";
if (IndexReg.getNode())
IndexReg.getNode()->dump();
else
- dbgs() << "nul";
+ dbgs() << "nul\n";
dbgs() << " Disp " << Disp << '\n'
<< "GV ";
if (GV)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a72f4daa5e11..5ac5d0348f8a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -461,7 +461,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, VT, Custom);
}
- if (Subtarget.hasSSE1())
+ if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
@@ -1622,16 +1622,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallName(RTLIB::MUL_I128, nullptr);
}
- // Combine sin / cos into one node or libcall if possible.
- if (Subtarget.hasSinCos()) {
- setLibcallName(RTLIB::SINCOS_F32, "sincosf");
- setLibcallName(RTLIB::SINCOS_F64, "sincos");
- if (Subtarget.isTargetDarwin()) {
- // For MacOSX, we don't want the normal expansion of a libcall to sincos.
- // We want to issue a libcall to __sincos_stret to avoid memory traffic.
- setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
- setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
- }
+ // Combine sin / cos into _sincos_stret if it is available.
+ if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
+ getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
}
if (Subtarget.isTargetWin64()) {
@@ -7480,9 +7475,9 @@ static bool isAddSub(const BuildVectorSDNode *BV,
}
/// Returns true if is possible to fold MUL and an idiom that has already been
-/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1).
-/// If (and only if) true is returned, the operands of FMADDSUB are written to
-/// parameters \p Opnd0, \p Opnd1, \p Opnd2.
+/// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into
+/// FMADDSUB/FMSUBADD(x, y, \p Opnd1). If (and only if) true is returned, the
+/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1, \p Opnd2.
///
/// Prior to calling this function it should be known that there is some
/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
@@ -7505,12 +7500,12 @@ static bool isAddSub(const BuildVectorSDNode *BV,
/// recognized ADDSUB idiom with ADDSUB operation is that such replacement
/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
/// FMADDSUB is.
-static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG,
- SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2,
- unsigned ExpectedUses) {
+static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
+ SelectionDAG &DAG,
+ SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2,
+ unsigned ExpectedUses) {
if (Opnd0.getOpcode() != ISD::FMUL ||
- !Opnd0->hasNUsesOfValue(ExpectedUses, 0) ||
- !Subtarget.hasAnyFMA())
+ !Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA())
return false;
// FIXME: These checks must match the similar ones in
@@ -7547,7 +7542,7 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
SDValue Opnd2;
// TODO: According to coverage reports, the FMADDSUB transform is not
// triggered by any tests.
- if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts))
+ if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts))
return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
// Do not generate X86ISD::ADDSUB node for 512-bit types even though
@@ -11958,6 +11953,19 @@ static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
return 0;
}
+static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
+ MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
+ MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
+
+ SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true);
+ if (V2.isUndef())
+ return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
+
+ return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2);
+}
+
/// \brief Generic lowering of v16i8 shuffles.
///
/// This is a hybrid strategy to lower v16i8 vectors. It first attempts to
@@ -12148,6 +12156,10 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
DL, MVT::v16i8, V1, V2, Mask, DAG))
return Unpack;
+
+ // If we have VBMI we can use one VPERM instead of multiple PSHUFBs.
+ if (Subtarget.hasVBMI() && Subtarget.hasVLX())
+ return lowerVectorShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG);
}
return PSHUFB;
@@ -13048,19 +13060,6 @@ static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
DAG.getConstant(Immediate, DL, MVT::i8));
}
-static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG) {
- MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
- MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
-
- SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true);
- if (V2.isUndef())
- return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
-
- return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2);
-}
-
/// \brief Handle lowering of 4-lane 64-bit floating point shuffles.
///
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
@@ -13615,6 +13614,10 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v32i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
return PSHUFB;
+ // AVX512VBMIVL can lower to VPERMB.
+ if (Subtarget.hasVBMI() && Subtarget.hasVLX())
+ return lowerVectorShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, DAG);
+
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
@@ -14077,6 +14080,10 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return Blend;
+ if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
+ DL, MVT::v32i16, Mask, V1, V2, Zeroable, Subtarget, DAG))
+ return PSHUFB;
+
return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
}
@@ -14212,7 +14219,9 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
ExtVT = MVT::v4i32;
break;
case MVT::v8i1:
- ExtVT = MVT::v8i64; // Take 512-bit type, more shuffles on KNL
+ // Take 512-bit type, more shuffles on KNL. If we have VLX use a 256-bit
+ // shuffle.
+ ExtVT = Subtarget.hasVLX() ? MVT::v8i32 : MVT::v8i64;
break;
case MVT::v16i1:
ExtVT = MVT::v16i32;
@@ -14569,11 +14578,10 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
unsigned NumElts = VecVT.getVectorNumElements();
// Extending v8i1/v16i1 to 512-bit get better performance on KNL
// than extending to 128/256bit.
- unsigned VecSize = (NumElts <= 4 ? 128 : 512);
- MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize / NumElts), NumElts);
- SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVT, Vec);
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- ExtVT.getVectorElementType(), Ext, Idx);
+ MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
+ MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec);
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtEltVT, Ext, Idx);
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
}
@@ -14768,12 +14776,11 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
// Non constant index. Extend source and destination,
// insert element and then truncate the result.
unsigned NumElts = VecVT.getVectorNumElements();
- unsigned VecSize = (NumElts <= 4 ? 128 : 512);
- MVT ExtVecVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize/NumElts), NumElts);
- MVT ExtEltVT = ExtVecVT.getVectorElementType();
+ MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
+ MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
- DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec),
- DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx);
+ DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec),
+ DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx);
return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
}
@@ -16287,21 +16294,6 @@ static SDValue LowerZERO_EXTEND_Mask(SDValue Op,
return SelectedVal;
}
-static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- SDValue In = Op->getOperand(0);
- MVT InVT = In.getSimpleValueType();
-
- if (InVT.getVectorElementType() == MVT::i1)
- return LowerZERO_EXTEND_Mask(Op, Subtarget, DAG);
-
- if (Subtarget.hasFp256())
- if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
- return Res;
-
- return SDValue();
-}
-
static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue In = Op.getOperand(0);
@@ -16440,7 +16432,8 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
assert((InVT.is256BitVector() || InVT.is128BitVector()) &&
"Unexpected vector type.");
unsigned NumElts = InVT.getVectorNumElements();
- MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
+ MVT EltVT = Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512/NumElts);
+ MVT ExtVT = MVT::getVectorVT(EltVT, NumElts);
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
InVT = ExtVT;
ShiftInx = InVT.getScalarSizeInBits() - 1;
@@ -18446,6 +18439,21 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
return V;
}
+static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ SDValue In = Op->getOperand(0);
+ MVT InVT = In.getSimpleValueType();
+
+ if (InVT.getVectorElementType() == MVT::i1)
+ return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG);
+
+ if (Subtarget.hasFp256())
+ if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
+ return Res;
+
+ return SDValue();
+}
+
// Lowering for SIGN_EXTEND_VECTOR_INREG and ZERO_EXTEND_VECTOR_INREG.
// For sign extend this needs to handle all vector sizes and SSE4.1 and
// non-SSE4.1 targets. For zero extend this should only handle inputs of
@@ -21128,7 +21136,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
// ADC/ADCX/SBB
case ADX: {
SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
- SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::i32);
+ SDVTList VTs = DAG.getVTList(Op.getOperand(3).getValueType(), MVT::i32);
SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
DAG.getConstant(-1, dl, MVT::i8));
SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3),
@@ -22231,6 +22239,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, HiMask));
}
+ assert(VT == MVT::v16i8 && "Unexpected VT");
+
SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v16i16, A);
SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v16i16, B);
SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB);
@@ -22989,12 +22999,14 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
(Subtarget.hasAVX512() && VT == MVT::v16i16) ||
(Subtarget.hasAVX512() && VT == MVT::v16i8) ||
(Subtarget.hasBWI() && VT == MVT::v32i8)) {
- MVT EvtSVT = (VT == MVT::v32i8 ? MVT::i16 : MVT::i32);
+ assert((!Subtarget.hasBWI() || VT == MVT::v32i8 || VT == MVT::v16i8) &&
+ "Unexpected vector type");
+ MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32;
MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());
unsigned ExtOpc =
Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
R = DAG.getNode(ExtOpc, dl, ExtVT, R);
- Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);
+ Amt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Amt);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
}
@@ -24101,8 +24113,9 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
// Only optimize x86_64 for now. i386 is a bit messy. For f32,
// the small struct {f32, f32} is returned in (eax, edx). For f64,
// the results are returned via SRet in memory.
- const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret";
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
+ const char *LibcallName = TLI.getLibcallName(LC);
SDValue Callee =
DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
@@ -24928,7 +24941,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST: {
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
EVT DstVT = N->getValueType(0);
- EVT SrcVT = N->getOperand(0)->getValueType(0);
+ EVT SrcVT = N->getOperand(0).getValueType();
if (SrcVT != MVT::f64 ||
(DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8))
@@ -28407,8 +28420,6 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// TODO - attempt to narrow Mask back to writemask size.
bool IsEVEXShuffle =
RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128);
- if (IsEVEXShuffle && (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits))
- return SDValue();
// TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
@@ -28491,11 +28502,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
- ShuffleVT)) {
+ ShuffleVT) &&
+ (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
- return SDValue(); // AVX512 Writemask clash.
Res = DAG.getBitcast(ShuffleSrcVT, V1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
@@ -28505,11 +28515,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, Subtarget, Shuffle,
- ShuffleVT, PermuteImm)) {
+ ShuffleVT, PermuteImm) &&
+ (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
- return SDValue(); // AVX512 Writemask clash.
Res = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
@@ -28520,12 +28529,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
- V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
- ShuffleVT, UnaryShuffle)) {
+ V1, V2, DL, DAG, Subtarget, Shuffle,
+ ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&
+ (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
- return SDValue(); // AVX512 Writemask clash.
V1 = DAG.getBitcast(ShuffleSrcVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(ShuffleSrcVT, V2);
@@ -28538,11 +28546,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, V1, V2, DL, DAG,
Subtarget, Shuffle, ShuffleVT,
- PermuteImm)) {
+ PermuteImm) &&
+ (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
- return SDValue(); // AVX512 Writemask clash.
V1 = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(ShuffleVT, V2);
@@ -28594,8 +28601,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return SDValue();
// Depth threshold above which we can efficiently use variable mask shuffles.
- // TODO This should probably be target specific.
- bool AllowVariableMask = (Depth >= 3) || HasVariableMask;
+ int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3;
+ bool AllowVariableMask = (Depth >= VariableShuffleDepth) || HasVariableMask;
bool MaskContainsZeros =
any_of(Mask, [](int M) { return M == SM_SentinelZero; });
@@ -29698,17 +29705,18 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return SDValue();
}
-/// Returns true iff the shuffle node \p N can be replaced with ADDSUB
-/// operation. If true is returned then the operands of ADDSUB operation
+/// Returns true iff the shuffle node \p N can be replaced with ADDSUB(SUBADD)
+/// operation. If true is returned then the operands of ADDSUB(SUBADD) operation
/// are written to the parameters \p Opnd0 and \p Opnd1.
///
-/// We combine shuffle to ADDSUB directly on the abstract vector shuffle nodes
+/// We combine shuffle to ADDSUB(SUBADD) directly on the abstract vector shuffle nodes
/// so it is easier to generically match. We also insert dummy vector shuffle
/// nodes for the operands which explicitly discard the lanes which are unused
/// by this operation to try to flow through the rest of the combiner
/// the fact that they're unused.
-static bool isAddSub(SDNode *N, const X86Subtarget &Subtarget,
- SDValue &Opnd0, SDValue &Opnd1) {
+static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
+ SDValue &Opnd0, SDValue &Opnd1,
+ bool matchSubAdd = false) {
EVT VT = N->getValueType(0);
if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
@@ -29728,12 +29736,15 @@ static bool isAddSub(SDNode *N, const X86Subtarget &Subtarget,
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
- // We require the first shuffle operand to be the FSUB node, and the second to
- // be the FADD node.
- if (V1.getOpcode() == ISD::FADD && V2.getOpcode() == ISD::FSUB) {
+ unsigned ExpectedOpcode = matchSubAdd ? ISD::FADD : ISD::FSUB;
+ unsigned NextExpectedOpcode = matchSubAdd ? ISD::FSUB : ISD::FADD;
+
+ // We require the first shuffle operand to be the ExpectedOpcode node,
+ // and the second to be the NextExpectedOpcode node.
+ if (V1.getOpcode() == NextExpectedOpcode && V2.getOpcode() == ExpectedOpcode) {
ShuffleVectorSDNode::commuteMask(Mask);
std::swap(V1, V2);
- } else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD)
+ } else if (V1.getOpcode() != ExpectedOpcode || V2.getOpcode() != NextExpectedOpcode)
return false;
// If there are other uses of these operations we can't fold them.
@@ -29767,7 +29778,7 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Opnd0, Opnd1;
- if (!isAddSub(N, Subtarget, Opnd0, Opnd1))
+ if (!isAddSubOrSubAdd(N, Subtarget, Opnd0, Opnd1))
return SDValue();
EVT VT = N->getValueType(0);
@@ -29775,7 +29786,7 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
- if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2))
+ if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2))
return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
// Do not generate X86ISD::ADDSUB node for 512-bit types even though
@@ -29787,6 +29798,26 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}
+/// \brief Try to combine a shuffle into a target-specific
+/// mul-sub-add node.
+static SDValue combineShuffleToFMSubAdd(SDNode *N,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ SDValue Opnd0, Opnd1;
+ if (!isAddSubOrSubAdd(N, Subtarget, Opnd0, Opnd1, true))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Try to generate X86ISD::FMSUBADD node here.
+ SDValue Opnd2;
+ if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2))
+ return DAG.getNode(X86ISD::FMSUBADD, DL, VT, Opnd0, Opnd1, Opnd2);
+
+ return SDValue();
+}
+
// We are looking for a shuffle where both sources are concatenated with undef
// and have a width that is half of the output's width. AVX2 has VPERMD/Q, so
// if we can express this as a single-source shuffle, that's preferable.
@@ -29873,11 +29904,14 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we have legalized the vector types, look for blends of FADD and FSUB
- // nodes that we can fuse into an ADDSUB node.
+ // nodes that we can fuse into an ADDSUB, FMADDSUB, or FMSUBADD node.
if (TLI.isTypeLegal(VT)) {
if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
return AddSub;
+ if (SDValue FMSubAdd = combineShuffleToFMSubAdd(N, Subtarget, DAG))
+ return FMSubAdd;
+
if (SDValue HAddSub = foldShuffleOfHorizOp(N))
return HAddSub;
}
@@ -30181,7 +30215,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
// sign-extend to a 256-bit operation to avoid truncation.
if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
- N0->getOperand(0)->getValueType(0).is256BitVector()) {
+ N0->getOperand(0).getValueType().is256BitVector()) {
SExtVT = MVT::v4i64;
FPCastVT = MVT::v4f64;
}
@@ -30194,8 +30228,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
// 256-bit because the shuffle is cheaper than sign extending the result of
// the compare.
if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
- (N0->getOperand(0)->getValueType(0).is256BitVector() ||
- N0->getOperand(0)->getValueType(0).is512BitVector())) {
+ (N0->getOperand(0).getValueType().is256BitVector() ||
+ N0->getOperand(0).getValueType().is512BitVector())) {
SExtVT = MVT::v8i32;
FPCastVT = MVT::v8f32;
}
@@ -30484,7 +30518,8 @@ static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1);
}
-// Attempt to replace an min/max v8i16 horizontal reduction with PHMINPOSUW.
+// Attempt to replace an min/max v8i16/v16i8 horizontal reduction with
+// PHMINPOSUW.
static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// Bail without SSE41.
@@ -30492,7 +30527,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
return SDValue();
EVT ExtractVT = Extract->getValueType(0);
- if (ExtractVT != MVT::i16)
+ if (ExtractVT != MVT::i16 && ExtractVT != MVT::i8)
return SDValue();
// Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns.
@@ -30504,7 +30539,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
EVT SrcVT = Src.getValueType();
EVT SrcSVT = SrcVT.getScalarType();
- if (SrcSVT != MVT::i16 || (SrcVT.getSizeInBits() % 128) != 0)
+ if (SrcSVT != ExtractVT || (SrcVT.getSizeInBits() % 128) != 0)
return SDValue();
SDLoc DL(Extract);
@@ -30520,22 +30555,39 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
SDValue Hi = extractSubVector(MinPos, NumSubElts, DAG, DL, SubSizeInBits);
MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi);
}
- assert(SrcVT == MVT::v8i16 && "Unexpected value type");
+ assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) ||
+ (SrcVT == MVT::v16i8 && ExtractVT == MVT::i8)) &&
+ "Unexpected value type");
// PHMINPOSUW applies to UMIN(v8i16), for SMIN/SMAX/UMAX we must apply a mask
// to flip the value accordingly.
SDValue Mask;
+ unsigned MaskEltsBits = ExtractVT.getSizeInBits();
if (BinOp == ISD::SMAX)
- Mask = DAG.getConstant(APInt::getSignedMaxValue(16), DL, SrcVT);
+ Mask = DAG.getConstant(APInt::getSignedMaxValue(MaskEltsBits), DL, SrcVT);
else if (BinOp == ISD::SMIN)
- Mask = DAG.getConstant(APInt::getSignedMinValue(16), DL, SrcVT);
+ Mask = DAG.getConstant(APInt::getSignedMinValue(MaskEltsBits), DL, SrcVT);
else if (BinOp == ISD::UMAX)
- Mask = DAG.getConstant(APInt::getAllOnesValue(16), DL, SrcVT);
+ Mask = DAG.getConstant(APInt::getAllOnesValue(MaskEltsBits), DL, SrcVT);
if (Mask)
MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);
- MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, SrcVT, MinPos);
+ // For v16i8 cases we need to perform UMIN on pairs of byte elements,
+ // shuffling each upper element down and insert zeros. This means that the
+ // v16i8 UMIN will leave the upper element as zero, performing zero-extension
+ // ready for the PHMINPOS.
+ if (ExtractVT == MVT::i8) {
+ SDValue Upper = DAG.getVectorShuffle(
+ SrcVT, DL, MinPos, getZeroVector(MVT::v16i8, Subtarget, DAG, DL),
+ {1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16});
+ MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper);
+ }
+
+ // Perform the PHMINPOS on a v8i16 vector,
+ MinPos = DAG.getBitcast(MVT::v8i16, MinPos);
+ MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, MVT::v8i16, MinPos);
+ MinPos = DAG.getBitcast(SrcVT, MinPos);
if (Mask)
MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);
@@ -30851,7 +30903,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget))
return Cmp;
- // Attempt to replace min/max v8i16 reductions with PHMINPOSUW.
+ // Attempt to replace min/max v8i16/v16i8 reductions with PHMINPOSUW.
if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget))
return MinMax;
@@ -32555,7 +32607,7 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG) {
// 1. MOVs can write to a register that differs from source
// 2. MOVs accept memory operands
- if (!VT.isInteger() || VT.isVector() || N1.getOpcode() != ISD::Constant ||
+ if (VT.isVector() || N1.getOpcode() != ISD::Constant ||
N0.getOpcode() != ISD::SHL || !N0.hasOneUse() ||
N0.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();
@@ -32569,11 +32621,11 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG) {
if (SarConst.isNegative())
return SDValue();
- for (MVT SVT : MVT::integer_valuetypes()) {
+ for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) {
unsigned ShiftSize = SVT.getSizeInBits();
// skipping types without corresponding sext/zext and
// ShlConst that is not one of [56,48,32,24,16]
- if (ShiftSize < 8 || ShiftSize > 64 || ShlConst != Size - ShiftSize)
+ if (ShiftSize >= Size || ShlConst != Size - ShiftSize)
continue;
SDLoc DL(N);
SDValue NN =
@@ -32626,37 +32678,6 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-/// \brief Returns a vector of 0s if the node in input is a vector logical
-/// shift by a constant amount which is known to be bigger than or equal
-/// to the vector element size in bits.
-static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- EVT VT = N->getValueType(0);
-
- if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
- (!Subtarget.hasInt256() ||
- (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
- return SDValue();
-
- SDValue Amt = N->getOperand(1);
- SDLoc DL(N);
- if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Amt))
- if (auto *AmtSplat = AmtBV->getConstantSplatNode()) {
- const APInt &ShiftAmt = AmtSplat->getAPIntValue();
- unsigned MaxAmount =
- VT.getSimpleVT().getScalarSizeInBits();
-
- // SSE2/AVX2 logical shifts always return a vector of 0s
- // if the shift amount is bigger than or equal to
- // the element size. The constant shift amount will be
- // encoded as a 8-bit immediate.
- if (ShiftAmt.trunc(8).uge(MaxAmount))
- return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, DL);
- }
-
- return SDValue();
-}
-
static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -32672,11 +32693,6 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
if (SDValue V = combineShiftRightLogical(N, DAG))
return V;
- // Try to fold this logical shift into a zero vector.
- if (N->getOpcode() != ISD::SRA)
- if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))
- return V;
-
return SDValue();
}
@@ -32996,21 +33012,20 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
// register. In most cases we actually compare or select YMM-sized registers
// and mixing the two types creates horrible code. This method optimizes
// some of the transition sequences.
+// Even with AVX-512 this is still useful for removing casts around logical
+// operations on vXi1 mask types.
static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
- if (!VT.is256BitVector())
- return SDValue();
+ assert(VT.isVector() && "Expected vector type");
assert((N->getOpcode() == ISD::ANY_EXTEND ||
N->getOpcode() == ISD::ZERO_EXTEND ||
N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
SDValue Narrow = N->getOperand(0);
- EVT NarrowVT = Narrow->getValueType(0);
- if (!NarrowVT.is128BitVector())
- return SDValue();
+ EVT NarrowVT = Narrow.getValueType();
if (Narrow->getOpcode() != ISD::XOR &&
Narrow->getOpcode() != ISD::AND &&
@@ -33026,12 +33041,12 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
return SDValue();
// The type of the truncated inputs.
- EVT WideVT = N0->getOperand(0)->getValueType(0);
- if (WideVT != VT)
+ if (N0->getOperand(0).getValueType() != VT)
return SDValue();
// The right side has to be a 'trunc' or a constant vector.
- bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
+ bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getValueType() == VT;
ConstantSDNode *RHSConstSplat = nullptr;
if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1))
RHSConstSplat = RHSBV->getConstantSplatNode();
@@ -33040,37 +33055,31 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT))
+ if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), VT))
return SDValue();
// Set N0 and N1 to hold the inputs to the new wide operation.
N0 = N0->getOperand(0);
if (RHSConstSplat) {
- N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getVectorElementType(),
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT.getVectorElementType(),
SDValue(RHSConstSplat, 0));
- N1 = DAG.getSplatBuildVector(WideVT, DL, N1);
+ N1 = DAG.getSplatBuildVector(VT, DL, N1);
} else if (RHSTrunc) {
N1 = N1->getOperand(0);
}
// Generate the wide operation.
- SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1);
+ SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, VT, N0, N1);
unsigned Opcode = N->getOpcode();
switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode");
case ISD::ANY_EXTEND:
return Op;
- case ISD::ZERO_EXTEND: {
- unsigned InBits = NarrowVT.getScalarSizeInBits();
- APInt Mask = APInt::getAllOnesValue(InBits);
- Mask = Mask.zext(VT.getScalarSizeInBits());
- return DAG.getNode(ISD::AND, DL, VT,
- Op, DAG.getConstant(Mask, DL, VT));
- }
+ case ISD::ZERO_EXTEND:
+ return DAG.getZeroExtendInReg(Op, DL, NarrowVT.getScalarType());
case ISD::SIGN_EXTEND:
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
Op, DAG.getValueType(NarrowVT));
- default:
- llvm_unreachable("Unexpected opcode");
}
}
@@ -33882,16 +33891,6 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
if (!Subtarget.hasSSE2())
return SDValue();
- if (Subtarget.hasBWI()) {
- if (VT.getSizeInBits() > 512)
- return SDValue();
- } else if (Subtarget.hasAVX2()) {
- if (VT.getSizeInBits() > 256)
- return SDValue();
- } else {
- if (VT.getSizeInBits() > 128)
- return SDValue();
- }
// Detect the following pattern:
//
@@ -33903,7 +33902,6 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
// %6 = trunc <N x i32> %5 to <N x i8>
//
// In AVX512, the last instruction can also be a trunc store.
-
if (In.getOpcode() != ISD::SRL)
return SDValue();
@@ -33924,6 +33922,35 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
return true;
};
+ // Split vectors to legal target size and apply AVG.
+ auto LowerToAVG = [&](SDValue Op0, SDValue Op1) {
+ unsigned NumSubs = 1;
+ if (Subtarget.hasBWI()) {
+ if (VT.getSizeInBits() > 512)
+ NumSubs = VT.getSizeInBits() / 512;
+ } else if (Subtarget.hasAVX2()) {
+ if (VT.getSizeInBits() > 256)
+ NumSubs = VT.getSizeInBits() / 256;
+ } else {
+ if (VT.getSizeInBits() > 128)
+ NumSubs = VT.getSizeInBits() / 128;
+ }
+
+ if (NumSubs == 1)
+ return DAG.getNode(X86ISD::AVG, DL, VT, Op0, Op1);
+
+ SmallVector<SDValue, 4> Subs;
+ EVT SubVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ VT.getVectorNumElements() / NumSubs);
+ for (unsigned i = 0; i != NumSubs; ++i) {
+ unsigned Idx = i * SubVT.getVectorNumElements();
+ SDValue LHS = extractSubVector(Op0, Idx, DAG, DL, SubVT.getSizeInBits());
+ SDValue RHS = extractSubVector(Op1, Idx, DAG, DL, SubVT.getSizeInBits());
+ Subs.push_back(DAG.getNode(X86ISD::AVG, DL, SubVT, LHS, RHS));
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
+ };
+
// Check if each element of the vector is left-shifted by one.
auto LHS = In.getOperand(0);
auto RHS = In.getOperand(1);
@@ -33947,8 +33974,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
SDValue VecOnes = DAG.getConstant(1, DL, InVT);
Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes);
Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]);
- return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0),
- Operands[1]);
+ return LowerToAVG(Operands[0].getOperand(0), Operands[1]);
}
if (Operands[0].getOpcode() == ISD::ADD)
@@ -33972,8 +33998,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
return SDValue();
// The pattern is detected, emit X86ISD::AVG instruction.
- return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0),
- Operands[1].getOperand(0));
+ return LowerToAVG(Operands[0].getOperand(0), Operands[1].getOperand(0));
}
return SDValue();
@@ -35872,14 +35897,8 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
return NewCMov;
- if (!DCI.isBeforeLegalizeOps()) {
- if (InVT == MVT::i1) {
- SDValue Zero = DAG.getConstant(0, DL, VT);
- SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
- return DAG.getSelect(DL, VT, N0, AllOnes, Zero);
- }
+ if (!DCI.isBeforeLegalizeOps())
return SDValue();
- }
if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR &&
isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) {
@@ -35897,7 +35916,7 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
return V;
- if (Subtarget.hasAVX() && VT.is256BitVector())
+ if (VT.isVector())
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
@@ -36089,7 +36108,7 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
return V;
- if (VT.is256BitVector())
+ if (VT.isVector())
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
@@ -36244,39 +36263,54 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
- // Pre-shrink oversized index elements to avoid triggering scalarization.
- if (DCI.isBeforeLegalize()) {
+ if (DCI.isBeforeLegalizeOps()) {
SDValue Index = N->getOperand(4);
- if (Index.getScalarValueSizeInBits() > 64) {
- EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), MVT::i64,
+ // Remove any sign extends from 32 or smaller to larger than 32.
+ // Only do this before LegalizeOps in case we need the sign extend for
+ // legalization.
+ if (Index.getOpcode() == ISD::SIGN_EXTEND) {
+ if (Index.getScalarValueSizeInBits() > 32 &&
+ Index.getOperand(0).getScalarValueSizeInBits() <= 32) {
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ NewOps[4] = Index.getOperand(0);
+ DAG.UpdateNodeOperands(N, NewOps);
+ // The original sign extend has less users, add back to worklist in case
+ // it needs to be removed
+ DCI.AddToWorklist(Index.getNode());
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ }
+
+ // Make sure the index is either i32 or i64
+ unsigned ScalarSize = Index.getScalarValueSizeInBits();
+ if (ScalarSize != 32 && ScalarSize != 64) {
+ MVT EltVT = ScalarSize > 32 ? MVT::i64 : MVT::i32;
+ EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
Index.getValueType().getVectorNumElements());
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
+ Index = DAG.getSExtOrTrunc(Index, DL, IndexVT);
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
- NewOps[4] = Trunc;
+ NewOps[4] = Index;
DAG.UpdateNodeOperands(N, NewOps);
DCI.AddToWorklist(N);
return SDValue(N, 0);
}
- }
- // Try to remove sign extends from i32 to i64 on the index.
- // Only do this before legalize in case we are relying on it for
- // legalization.
- // TODO: We should maybe remove any sign extend once we learn how to sign
- // extend narrow index during lowering.
- if (DCI.isBeforeLegalizeOps()) {
- SDValue Index = N->getOperand(4);
- if (Index.getScalarValueSizeInBits() == 64 &&
- Index.getOpcode() == ISD::SIGN_EXTEND &&
+ // Try to remove zero extends from 32->64 if we know the sign bit of
+ // the input is zero.
+ if (Index.getOpcode() == ISD::ZERO_EXTEND &&
+ Index.getScalarValueSizeInBits() == 64 &&
Index.getOperand(0).getScalarValueSizeInBits() == 32) {
- SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
- NewOps[4] = Index.getOperand(0);
- DAG.UpdateNodeOperands(N, NewOps);
- // The original sign extend has less users, add back to worklist in case
- // it needs to be removed.
- DCI.AddToWorklist(Index.getNode());
- DCI.AddToWorklist(N);
- return SDValue(N, 0);
+ if (DAG.SignBitIsZero(Index.getOperand(0))) {
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ NewOps[4] = Index.getOperand(0);
+ DAG.UpdateNodeOperands(N, NewOps);
+ // The original zero extend has less users, add back to worklist in case
+ // it needs to be removed
+ DCI.AddToWorklist(Index.getNode());
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
}
}
@@ -36288,6 +36322,7 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
NewOps[2] = Mask.getOperand(0);
DAG.UpdateNodeOperands(N, NewOps);
+ return SDValue(N, 0);
}
// With AVX2 we only demand the upper bit of the mask.
@@ -36356,7 +36391,7 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
EVT VT = N->getValueType(0);
if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
- VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
+ VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
return SDValue();
// Now check that the other operand of the AND is a constant. We could
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index 2acd8d17beb2..0d30b7d47f3e 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -116,14 +116,30 @@ defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", I3DNOW_MISC_FUNC_ITINS, 1>;
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
[(int_x86_mmx_femms)], IIC_MMX_EMMS>;
+// PREFETCHWT1 is supported we want to use it for everything but T0.
+def PrefetchWLevel : PatFrag<(ops), (i32 imm), [{
+ return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
+}]>;
+
+// Use PREFETCHWT1 for NTA, T2, T1.
+def PrefetchWT1Level : ImmLeaf<i32, [{
+ return Imm < 3;
+}]>;
+
let SchedRW = [WriteLoad] in {
+let Predicates = [Has3DNow, NoSSEPrefetch] in
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
"prefetch\t$addr",
- [(prefetch addr:$addr, (i32 0), imm, (i32 1))],
+ [(prefetch addr:$addr, imm, imm, (i32 1))],
IIC_SSE_PREFETCH>;
+
def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
- [(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))],
+ [(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))],
IIC_SSE_PREFETCH>, TB, Requires<[HasPrefetchW]>;
+
+def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr",
+ [(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))],
+ IIC_SSE_PREFETCH>, TB, Requires<[HasPREFETCHWT1]>;
}
// "3DNowA" instructions
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 2a6ed02fadab..0b266e5591b4 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -349,8 +349,9 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{54} = hasEVEX_RC;
}
-class PseudoI<dag oops, dag iops, list<dag> pattern>
- : X86Inst<0, Pseudo, NoImm, oops, iops, "", NoItinerary> {
+class PseudoI<dag oops, dag iops, list<dag> pattern,
+ InstrItinClass itin = NoItinerary>
+ : X86Inst<0, Pseudo, NoImm, oops, iops, "", itin> {
let Pattern = pattern;
}
@@ -423,9 +424,8 @@ class FPI<bits<8> o, Format F, dag outs, dag ins, string asm,
// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
InstrItinClass itin = NoItinerary>
- : X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> {
+ : PseudoI<outs, ins, pattern, itin> {
let FPForm = fp;
- let Pattern = pattern;
}
// Templates for instructions that use a 16- or 32-bit segmented address as
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 42e89cb4831d..fdf3e73e4fcd 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -874,7 +874,10 @@ def HasADX : Predicate<"Subtarget->hasADX()">;
def HasSHA : Predicate<"Subtarget->hasSHA()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
+def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
+def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
+def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">;
def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index a86a0bfc168d..b48fa1841979 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3487,7 +3487,7 @@ let Predicates = [UseSSE2] in {
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
-let Predicates = [HasSSE1], SchedRW = [WriteLoad] in {
+let Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in {
def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
IIC_SSE_PREFETCH>, TB;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 1e04997ad294..e131f1a1e4bd 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -89,8 +89,9 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
- if (const char *bzeroEntry = ValC &&
- ValC->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) {
+ if (const char *bzeroName = (ValC && ValC->isNullValue())
+ ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
+ : nullptr) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
@@ -106,7 +107,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
CLI.setDebugLoc(dl)
.setChain(Chain)
.setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroEntry, IntPtr),
+ DAG.getExternalSymbol(bzeroName, IntPtr),
std::move(Args))
.setDiscardResult();
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 8b08766b6171..ad023623142f 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -174,28 +174,6 @@ X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
return X86II::MO_NO_FLAG;
}
-/// This function returns the name of a function which has an interface like
-/// the non-standard bzero function, if such a function exists on the
-/// current subtarget and it is considered preferable over memset with zero
-/// passed as the second argument. Otherwise it returns null.
-const char *X86Subtarget::getBZeroEntry() const {
- // Darwin 10 has a __bzero entry point for this purpose.
- if (getTargetTriple().isMacOSX() &&
- !getTargetTriple().isMacOSXVersionLT(10, 6))
- return "__bzero";
-
- return nullptr;
-}
-
-bool X86Subtarget::hasSinCos() const {
- if (getTargetTriple().isMacOSX()) {
- return !getTargetTriple().isMacOSXVersionLT(10, 9) && is64Bit();
- } else if (getTargetTriple().isOSFuchsia()) {
- return true;
- }
- return false;
-}
-
/// Return true if the subtarget allows calls to immediate address.
bool X86Subtarget::isLegalToCallImmediateAddr() const {
// FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
@@ -324,6 +302,7 @@ void X86Subtarget::initializeEnvironment() {
HasVNNI = false;
HasBITALG = false;
HasSHA = false;
+ HasPREFETCHWT1 = false;
HasPRFCHW = false;
HasRDSEED = false;
HasLAHFSAHF = false;
@@ -342,6 +321,7 @@ void X86Subtarget::initializeEnvironment() {
HasSSEUnalignedMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
+ HasFastVariableShuffle = false;
HasFastPartialYMMorZMMWrite = false;
HasFastGather = false;
HasFastScalarFSQRT = false;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index be4d46c470de..c9435890fc1f 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -201,7 +201,7 @@ protected:
bool HasCLZERO;
/// Processor has Prefetch with intent to Write instruction
- bool HasPFPREFETCHWT1;
+ bool HasPREFETCHWT1;
/// True if SHLD instructions are slow.
bool IsSHLDSlow;
@@ -228,6 +228,10 @@ protected:
/// the stack pointer. This is an optimization for Intel Atom processors.
bool UseLeaForSP;
+ /// True if its preferable to combine to a single shuffle using a variable
+ /// mask over multiple fixed shuffles.
+ bool HasFastVariableShuffle;
+
/// True if there is no performance penalty to writing only the lower parts
/// of a YMM or ZMM register without clearing the upper part.
bool HasFastPartialYMMorZMMWrite;
@@ -513,7 +517,14 @@ public:
bool hasRTM() const { return HasRTM; }
bool hasADX() const { return HasADX; }
bool hasSHA() const { return HasSHA; }
- bool hasPRFCHW() const { return HasPRFCHW; }
+ bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; }
+ bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
+ bool hasSSEPrefetch() const {
+ // We implicitly enable these when we have a write prefix supporting cache
+ // level OR if we have prfchw, but don't already have a read prefetch from
+ // 3dnow.
+ return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
+ }
bool hasRDSEED() const { return HasRDSEED; }
bool hasLAHFSAHF() const { return HasLAHFSAHF; }
bool hasMWAITX() const { return HasMWAITX; }
@@ -527,6 +538,9 @@ public:
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
+ bool hasFastVariableShuffle() const {
+ return HasFastVariableShuffle;
+ }
bool hasFastPartialYMMorZMMWrite() const {
return HasFastPartialYMMorZMMWrite;
}
@@ -682,17 +696,6 @@ public:
/// Return true if the subtarget allows calls to immediate address.
bool isLegalToCallImmediateAddr() const;
- /// This function returns the name of a function which has an interface
- /// like the non-standard bzero function, if such a function exists on
- /// the current subtarget and it is considered prefereable over
- /// memset with zero passed as the second argument. Otherwise it
- /// returns null.
- const char *getBZeroEntry() const;
-
- /// This function returns true if the target has sincos() routine in its
- /// compiler runtime or math libraries.
- bool hasSinCos() const;
-
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index ea8c9862230e..e95e6ecae091 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -281,10 +281,9 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
// X86 TTI query.
//===----------------------------------------------------------------------===//
-TargetIRAnalysis X86TargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(X86TTIImpl(this, F));
- });
+TargetTransformInfo
+X86TargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(X86TTIImpl(this, F));
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 952bd1321ff9..5b21cd82b5b1 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -45,7 +45,7 @@ public:
// attributes of each function.
const X86Subtarget *getSubtargetImpl() const = delete;
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
// Set up the pass pipeline.
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 3aa7187e0cd1..38925bfd51b0 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -108,8 +108,7 @@ extern "C" void LLVMInitializeXCoreTarget() {
RegisterTargetMachine<XCoreTargetMachine> X(getTheXCoreTarget());
}
-TargetIRAnalysis XCoreTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
- return TargetTransformInfo(XCoreTTIImpl(this, F));
- });
+TargetTransformInfo
+XCoreTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(XCoreTTIImpl(this, F));
}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 5baa3524d2a6..965b9b2c4d65 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -43,7 +43,7 @@ public:
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- TargetIRAnalysis getTargetIRAnalysis() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 683655f1f68b..a9cfd8ded6fb 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -710,7 +710,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
// Check if there is PGO data or user annoated branch data:
static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
- if (F->getEntryCount())
+ if (F->hasProfileData())
return true;
// Now check if any of the entry block has MD_prof data:
for (auto *E : OI->Entries) {
@@ -863,6 +863,7 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
case Instruction::GetElementPtr:
if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
continue;
+ break;
default:
break;
}
@@ -1273,7 +1274,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
// Only try to outline cold regions if we have a profile summary, which
// implies we have profiling information.
- if (PSI->hasProfileSummary() && F->getEntryCount().hasValue() &&
+ if (PSI->hasProfileSummary() && F->hasProfileData() &&
!DisableMultiRegionPartialInline) {
std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
computeOutliningColdRegionsInfo(F);
@@ -1379,10 +1380,10 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
Cloner.ClonedFunc->user_end());
DenseMap<User *, uint64_t> CallSiteToProfCountMap;
- if (Cloner.OrigFunc->getEntryCount())
+ auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
+ if (CalleeEntryCount)
computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
- auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
bool AnyInline = false;
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index f0e781b9d923..7086c2eb52c4 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -1583,7 +1583,10 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
}
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
- F.setEntryCount(0);
+ // Initialize the entry count to -1, which will be treated conservatively
+ // by getEntryCount as the same as unknown (None). If we have samples this
+ // will be overwritten in emitAnnotations.
+ F.setEntryCount(-1);
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
auto &FAM =
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index ec56f0cde25d..5fbb001216a3 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -1346,6 +1346,7 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
ResByArg.Bit);
applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
+ break;
}
default:
break;
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index aa055121e710..a088d447337f 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4394,6 +4394,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
cast<CallInst>(Caller)->getCallingConv());
cast<CallInst>(NewCaller)->setAttributes(NewPAL);
}
+ NewCaller->setDebugLoc(Caller->getDebugLoc());
return NewCaller;
}
diff --git a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 2a25423e04bd..8e2833d22032 100644
--- a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -80,6 +80,11 @@ static cl::opt<bool> ClInstrumentAtomics(
cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
cl::init(true));
+static cl::opt<bool> ClRecover(
+ "hwasan-recover",
+ cl::desc("Enable recovery mode (continue-after-error)."),
+ cl::Hidden, cl::init(false));
+
namespace {
/// \brief An instrumentation pass implementing detection of addressability bugs
@@ -89,7 +94,8 @@ public:
// Pass identification, replacement for typeid.
static char ID;
- HWAddressSanitizer() : FunctionPass(ID) {}
+ HWAddressSanitizer(bool Recover = false)
+ : FunctionPass(ID), Recover(Recover || ClRecover) {}
StringRef getPassName() const override { return "HWAddressSanitizer"; }
@@ -109,6 +115,8 @@ private:
LLVMContext *C;
Type *IntptrTy;
+ bool Recover;
+
Function *HwasanCtorFunction;
Function *HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
@@ -126,8 +134,8 @@ INITIALIZE_PASS_END(
HWAddressSanitizer, "hwasan",
"HWAddressSanitizer: detect memory bugs using tagged addressing.", false, false)
-FunctionPass *llvm::createHWAddressSanitizerPass() {
- return new HWAddressSanitizer();
+FunctionPass *llvm::createHWAddressSanitizerPass(bool Recover) {
+ return new HWAddressSanitizer(Recover);
}
/// \brief Module-level initialization.
@@ -156,10 +164,11 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
const std::string TypeStr = AccessIsWrite ? "store" : "load";
+ const std::string EndingStr = Recover ? "_noabort" : "";
HwasanMemoryAccessCallbackSized[AccessIsWrite] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- ClMemoryAccessCallbackPrefix + TypeStr,
+ ClMemoryAccessCallbackPrefix + TypeStr + EndingStr,
FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false)));
for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
@@ -167,7 +176,7 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
ClMemoryAccessCallbackPrefix + TypeStr +
- itostr(1ULL << AccessSizeIndex),
+ itostr(1ULL << AccessSizeIndex) + EndingStr,
FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false)));
}
}
@@ -246,14 +255,16 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
TerminatorInst *CheckTerm =
- SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
+ SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, !Recover,
MDBuilder(*C).createBranchWeights(1, 100000));
IRB.SetInsertPoint(CheckTerm);
// The signal handler will find the data address in x0.
InlineAsm *Asm = InlineAsm::get(
FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
- "hlt #" + itostr(0x100 + IsWrite * 0x10 + AccessSizeIndex), "{x0}",
+ "hlt #" +
+ itostr(0x100 + Recover * 0x20 + IsWrite * 0x10 + AccessSizeIndex),
+ "{x0}",
/*hasSideEffects=*/true);
IRB.CreateCall(Asm, PtrLong);
}
diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp
index d8c408035038..207243231aad 100644
--- a/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -13,10 +13,11 @@
// threading, or IPA-CP based function cloning, etc.).
// As of now we support two cases :
//
-// 1) If a call site is dominated by an OR condition and if any of its arguments
-// are predicated on this OR condition, try to split the condition with more
-// constrained arguments. For example, in the code below, we try to split the
-// call site since we can predicate the argument(ptr) based on the OR condition.
+// 1) Try to a split call-site with constrained arguments, if any constraints
+// on any argument can be found by following the single predecessors of the
+// all site's predecessors. Currently this pass only handles call-sites with 2
+// predecessors. For example, in the code below, we try to split the call-site
+// since we can predicate the argument(ptr) based on the OR condition.
//
// Split from :
// if (!ptr || c)
@@ -200,16 +201,15 @@ static bool canSplitCallSite(CallSite CS) {
}
/// Return true if the CS is split into its new predecessors which are directly
-/// hooked to each of its orignial predecessors pointed by PredBB1 and PredBB2.
-/// In OR predicated case, PredBB1 will point the header, and PredBB2 will point
-/// to the second compare block. CallInst1 and CallInst2 will be the new
-/// call-sites placed in the new predecessors split for PredBB1 and PredBB2,
-/// repectively. Therefore, CallInst1 will be the call-site placed
-/// between Header and Tail, and CallInst2 will be the call-site between TBB and
-/// Tail. For example, in the IR below with an OR condition, the call-site can
-/// be split
+/// hooked to each of its original predecessors pointed by PredBB1 and PredBB2.
+/// CallInst1 and CallInst2 will be the new call-sites placed in the new
+/// predecessors split for PredBB1 and PredBB2, respectively.
+/// For example, in the IR below with an OR condition, the call-site can
+/// be split. Assuming PredBB1=Header and PredBB2=TBB, CallInst1 will be the
+/// call-site placed between Header and Tail, and CallInst2 will be the
+/// call-site between TBB and Tail.
///
-/// from :
+/// From :
///
/// Header:
/// %c = icmp eq i32* %a, null
@@ -237,9 +237,9 @@ static bool canSplitCallSite(CallSite CS) {
/// Tail:
/// %p = phi i1 [%ca1, %Tail-split1],[%ca2, %Tail-split2]
///
-/// Note that for an OR predicated case, CallInst1 and CallInst2 should be
-/// created with more constrained arguments in
-/// createCallSitesOnOrPredicatedArgument().
+/// Note that in case any arguments at the call-site are constrained by its
+/// predecessors, new call-sites with more constrained arguments will be
+/// created in createCallSitesOnPredicatedArgument().
static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2,
Instruction *CallInst1, Instruction *CallInst2) {
Instruction *Instr = CS.getInstruction();
@@ -332,18 +332,10 @@ static bool tryToSplitOnPHIPredicatedArgument(CallSite CS) {
splitCallSite(CS, Preds[0], Preds[1], nullptr, nullptr);
return true;
}
-// Check if one of the predecessors is a single predecessors of the other.
-// This is a requirement for control flow modeling an OR. HeaderBB points to
-// the single predecessor and OrBB points to other node. HeaderBB potentially
-// contains the first compare of the OR and OrBB the second.
-static bool isOrHeader(BasicBlock *HeaderBB, BasicBlock *OrBB) {
- return OrBB->getSinglePredecessor() == HeaderBB &&
- HeaderBB->getTerminator()->getNumSuccessors() == 2;
-}
-static bool tryToSplitOnOrPredicatedArgument(CallSite CS) {
+static bool tryToSplitOnPredicatedArgument(CallSite CS) {
auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());
- if (!isOrHeader(Preds[0], Preds[1]) && !isOrHeader(Preds[1], Preds[0]))
+ if (Preds[0] == Preds[1])
return false;
SmallVector<std::pair<ICmpInst *, unsigned>, 2> C1, C2;
@@ -362,7 +354,7 @@ static bool tryToSplitOnOrPredicatedArgument(CallSite CS) {
static bool tryToSplitCallSite(CallSite CS) {
if (!CS.arg_size() || !canSplitCallSite(CS))
return false;
- return tryToSplitOnOrPredicatedArgument(CS) ||
+ return tryToSplitOnPredicatedArgument(CS) ||
tryToSplitOnPHIPredicatedArgument(CS);
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 6b0377e0ecb3..1476f7850cf0 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -282,7 +282,7 @@ bool JumpThreading::runOnFunction(Function &F) {
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
- bool HasProfileData = F.getEntryCount().hasValue();
+ bool HasProfileData = F.hasProfileData();
if (HasProfileData) {
LoopInfo LI{DominatorTree(F)};
BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
@@ -307,8 +307,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
- bool HasProfileData = F.getEntryCount().hasValue();
- if (HasProfileData) {
+ if (F.hasProfileData()) {
LoopInfo LI{DominatorTree(F)};
BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
@@ -1333,6 +1332,20 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// code size.
BasicBlock *UnavailablePred = nullptr;
+ // If the value is unavailable in one of predecessors, we will end up
+ // inserting a new instruction into them. It is only valid if all the
+ // instructions before LI are guaranteed to pass execution to its successor,
+ // or if LI is safe to speculate.
+ // TODO: If this logic becomes more complex, and we will perform PRE insertion
+ // farther than to a predecessor, we need to reuse the code from GVN's PRE.
+ // It requires domination tree analysis, so for this simple case it is an
+ // overkill.
+ if (PredsScanned.size() != AvailablePreds.size() &&
+ !isSafeToSpeculativelyExecute(LI))
+ for (auto I = LoadBB->begin(); &*I != LI; ++I)
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
+ return false;
+
// If there is exactly one predecessor where the value is unavailable, the
// already computed 'OneUnavailablePred' block is it. If it ends in an
// unconditional branch, we know that it isn't a critical edge.
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index c9d55b4594fe..430a7085d93f 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -247,7 +247,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// Enable LoopSink only when runtime profile is available.
// With static profile, the sinking decision may be sub-optimal.
- if (!Preheader->getParent()->getEntryCount())
+ if (!Preheader->getParent()->hasProfileData())
return false;
const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader);
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 7b1d6446a24a..15e7da5e1a7a 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -882,7 +882,7 @@ static bool computeUnrollCount(
}
// Check if the runtime trip count is too small when profile is available.
- if (L->getHeader()->getParent()->getEntryCount()) {
+ if (L->getHeader()->getParent()->hasProfileData()) {
if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
if (*ProfileTripCount < FlatLoopTripCountThreshold)
return false;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9c870b42a747..6af3fef963dc 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -476,22 +476,33 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
Alignment = DL.getABITypeAlignment(EltType);
}
- AMemSet =
- Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+ // Remember the debug location.
+ DebugLoc Loc;
+ if (!Range.TheStores.empty())
+ Loc = Range.TheStores[0]->getDebugLoc();
DEBUG(dbgs() << "Replace stores:\n";
for (Instruction *SI : Range.TheStores)
- dbgs() << *SI << '\n';
- dbgs() << "With: " << *AMemSet << '\n');
-
- if (!Range.TheStores.empty())
- AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
+ dbgs() << *SI << '\n');
// Zap all the stores.
for (Instruction *SI : Range.TheStores) {
MD->removeInstruction(SI);
SI->eraseFromParent();
}
+
+ // Create the memset after removing the stores, so that if there any cached
+ // non-local dependencies on the removed instructions in
+ // MemoryDependenceAnalysis, the cache entries are updated to "dirty"
+ // entries pointing below the memset, so subsequent queries include the
+ // memset.
+ AMemSet =
+ Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+ if (!Range.TheStores.empty())
+ AMemSet->setDebugLoc(Loc);
+
+ DEBUG(dbgs() << "With: " << *AMemSet << '\n');
+
++NumMemSetInfer;
}
@@ -1031,9 +1042,22 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
//
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
- MemDepResult SourceDep =
- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
- M->getIterator(), M->getParent());
+ MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep);
+ MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false,
+ M->getIterator(), M->getParent());
+
+ if (SourceDep.isNonLocal()) {
+ SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
+ MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false,
+ NonLocalDepResults);
+ if (NonLocalDepResults.size() == 1) {
+ SourceDep = NonLocalDepResults[0].getResult();
+ assert((!SourceDep.getInst() ||
+ LookupDomTree().dominates(SourceDep.getInst(), M)) &&
+ "when memdep returns exactly one result, it should dominate");
+ }
+ }
+
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
@@ -1235,6 +1259,18 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
SrcLoc, true, M->getIterator(), M->getParent());
+ if (SrcDepInfo.isNonLocal()) {
+ SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
+ MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true,
+ NonLocalDepResults);
+ if (NonLocalDepResults.size() == 1) {
+ SrcDepInfo = NonLocalDepResults[0].getResult();
+ assert((!SrcDepInfo.getInst() ||
+ LookupDomTree().dominates(SrcDepInfo.getInst(), M)) &&
+ "when memdep returns exactly one result, it should dominate");
+ }
+ }
+
if (SrcDepInfo.isClobber()) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
return processMemCpyMemCpyDependence(M, MDep);
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index e5866b4718da..66608ec631f6 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1929,9 +1929,32 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
if (!I) continue;
bool Folded = ConstantFoldTerminator(I->getParent());
- assert(Folded &&
- "Expect TermInst on constantint or blockaddress to be folded");
- (void) Folded;
+ if (!Folded) {
+ // The constant folder may not have been able to fold the terminator
+ // if this is a branch or switch on undef. Fold it manually as a
+ // branch to the first successor.
+#ifndef NDEBUG
+ if (auto *BI = dyn_cast<BranchInst>(I)) {
+ assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) &&
+ "Branch should be foldable!");
+ } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
+ assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
+ } else {
+ llvm_unreachable("Didn't fold away reference to block!");
+ }
+#endif
+
+ // Make this an uncond branch to the first successor.
+ TerminatorInst *TI = I->getParent()->getTerminator();
+ BranchInst::Create(TI->getSuccessor(0), TI);
+
+ // Remove entries in successor phi nodes to remove edges.
+ for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
+ TI->getSuccessor(i)->removePredecessor(TI->getParent());
+
+ // Remove the old terminator.
+ TI->eraseFromParent();
+ }
}
// Finally, delete the basic block.
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 209821ff21d7..8fa9ffb6d014 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -97,7 +97,7 @@
// load %p2
// ...
//
-// We can not do CSE for to the common part related to index "i64 %i". Lowering
+// We can not do CSE to the common part related to index "i64 %i". Lowering
// GEPs can achieve such goals.
// If the target does not use alias analysis in codegen, this pass will
// lower a GEP with multiple indices into arithmetic operations:
diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp
index eb3139ce4293..8825f77555e7 100644
--- a/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -23,10 +23,30 @@ using namespace llvm;
/// Fix-up phi nodes in an invoke instruction's normal destination.
///
/// After versioning an invoke instruction, values coming from the original
-/// block will now either be coming from the original block or the "else" block.
+/// block will now be coming from the "merge" block. For example, in the code
+/// below:
+///
+/// then_bb:
+/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
+///
+/// else_bb:
+/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
+///
+/// merge_bb:
+/// %t2 = phi i32 [ %t0, %then_bb ], [ %t1, %else_bb ]
+/// br %normal_dst
+///
+/// normal_dst:
+/// %t3 = phi i32 [ %x, %orig_bb ], ...
+///
+/// "orig_bb" is no longer a predecessor of "normal_dst", so the phi nodes in
+/// "normal_dst" must be fixed to refer to "merge_bb":
+///
+/// normal_dst:
+/// %t3 = phi i32 [ %x, %merge_bb ], ...
+///
static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
- BasicBlock *ElseBlock,
- Instruction *NewInst) {
+ BasicBlock *MergeBlock) {
for (auto &I : *Invoke->getNormalDest()) {
auto *Phi = dyn_cast<PHINode>(&I);
if (!Phi)
@@ -34,13 +54,7 @@ static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
int Idx = Phi->getBasicBlockIndex(OrigBlock);
if (Idx == -1)
continue;
- Value *V = Phi->getIncomingValue(Idx);
- if (dyn_cast<Instruction>(V) == Invoke) {
- Phi->setIncomingBlock(Idx, ElseBlock);
- Phi->addIncoming(NewInst, OrigBlock);
- continue;
- }
- Phi->addIncoming(V, ElseBlock);
+ Phi->setIncomingBlock(Idx, MergeBlock);
}
}
@@ -48,6 +62,23 @@ static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
///
/// After versioning an invoke instruction, values coming from the original
/// block will now be coming from either the "then" block or the "else" block.
+/// For example, in the code below:
+///
+/// then_bb:
+/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
+///
+/// else_bb:
+/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
+///
+/// unwind_dst:
+/// %t3 = phi i32 [ %x, %orig_bb ], ...
+///
+/// "orig_bb" is no longer a predecessor of "unwind_dst", so the phi nodes in
+/// "unwind_dst" must be fixed to refer to "then_bb" and "else_bb":
+///
+/// unwind_dst:
+/// %t3 = phi i32 [ %x, %then_bb ], [ %x, %else_bb ], ...
+///
static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
BasicBlock *ThenBlock,
BasicBlock *ElseBlock) {
@@ -64,44 +95,26 @@ static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
}
}
-/// Get the phi node having the returned value of a call or invoke instruction
-/// as it's operand.
-static bool getRetPhiNode(Instruction *Inst, BasicBlock *Block) {
- BasicBlock *FromBlock = Inst->getParent();
- for (auto &I : *Block) {
- PHINode *PHI = dyn_cast<PHINode>(&I);
- if (!PHI)
- break;
- int Idx = PHI->getBasicBlockIndex(FromBlock);
- if (Idx == -1)
- continue;
- auto *V = PHI->getIncomingValue(Idx);
- if (V == Inst)
- return true;
- }
- return false;
-}
-
/// Create a phi node for the returned value of a call or invoke instruction.
///
/// After versioning a call or invoke instruction that returns a value, we have
/// to merge the value of the original and new instructions. We do this by
/// creating a phi node and replacing uses of the original instruction with this
/// phi node.
-static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst) {
+///
+/// For example, if \p OrigInst is defined in "else_bb" and \p NewInst is
+/// defined in "then_bb", we create the following phi node:
+///
+/// ; Uses of the original instruction are replaced by uses of the phi node.
+/// %t0 = phi i32 [ %orig_inst, %else_bb ], [ %new_inst, %then_bb ],
+///
+static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst,
+ BasicBlock *MergeBlock, IRBuilder<> &Builder) {
if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty())
return;
- BasicBlock *RetValBB = NewInst->getParent();
- if (auto *Invoke = dyn_cast<InvokeInst>(NewInst))
- RetValBB = Invoke->getNormalDest();
- BasicBlock *PhiBB = RetValBB->getSingleSuccessor();
-
- if (getRetPhiNode(OrigInst, PhiBB))
- return;
-
- IRBuilder<> Builder(&PhiBB->front());
+ Builder.SetInsertPoint(&MergeBlock->front());
PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0);
SmallVector<User *, 16> UsersToUpdate;
for (User *U : OrigInst->users())
@@ -109,7 +122,7 @@ static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst) {
for (User *U : UsersToUpdate)
U->replaceUsesOfWith(OrigInst, Phi);
Phi->addIncoming(OrigInst, OrigInst->getParent());
- Phi->addIncoming(NewInst, RetValBB);
+ Phi->addIncoming(NewInst, NewInst->getParent());
}
/// Cast a call or invoke instruction to the given type.
@@ -118,7 +131,41 @@ static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst) {
/// that of the callee. If this is the case, we have to cast the returned value
/// to the correct type. The location of the cast depends on if we have a call
/// or invoke instruction.
-Instruction *createRetBitCast(CallSite CS, Type *RetTy) {
+///
+/// For example, if the call instruction below requires a bitcast after
+/// promotion:
+///
+/// orig_bb:
+/// %t0 = call i32 @func()
+/// ...
+///
+/// The bitcast is placed after the call instruction:
+///
+/// orig_bb:
+/// ; Uses of the original return value are replaced by uses of the bitcast.
+/// %t0 = call i32 @func()
+/// %t1 = bitcast i32 %t0 to ...
+/// ...
+///
+/// A similar transformation is performed for invoke instructions. However,
+/// since invokes are terminating, a new block is created for the bitcast. For
+/// example, if the invoke instruction below requires a bitcast after promotion:
+///
+/// orig_bb:
+/// %t0 = invoke i32 @func() to label %normal_dst unwind label %unwind_dst
+///
+/// The edge between the original block and the invoke's normal destination is
+/// split, and the bitcast is placed there:
+///
+/// orig_bb:
+/// %t0 = invoke i32 @func() to label %split_bb unwind label %unwind_dst
+///
+/// split_bb:
+/// ; Uses of the original return value are replaced by uses of the bitcast.
+/// %t1 = bitcast i32 %t0 to ...
+/// br label %normal_dst
+///
+static void createRetBitCast(CallSite CS, Type *RetTy, CastInst **RetBitCast) {
// Save the users of the calling instruction. These uses will be changed to
// use the bitcast after we create it.
@@ -130,19 +177,20 @@ Instruction *createRetBitCast(CallSite CS, Type *RetTy) {
// value. The location depends on if we have a call or invoke instruction.
Instruction *InsertBefore = nullptr;
if (auto *Invoke = dyn_cast<InvokeInst>(CS.getInstruction()))
- InsertBefore = &*Invoke->getNormalDest()->getFirstInsertionPt();
+ InsertBefore =
+ &SplitEdge(Invoke->getParent(), Invoke->getNormalDest())->front();
else
InsertBefore = &*std::next(CS.getInstruction()->getIterator());
// Bitcast the return value to the correct type.
auto *Cast = CastInst::Create(Instruction::BitCast, CS.getInstruction(),
RetTy, "", InsertBefore);
+ if (RetBitCast)
+ *RetBitCast = Cast;
// Replace all the original uses of the calling instruction with the bitcast.
for (User *U : UsersToUpdate)
U->replaceUsesOfWith(CS.getInstruction(), Cast);
-
- return Cast;
}
/// Predicate and clone the given call site.
@@ -152,21 +200,78 @@ Instruction *createRetBitCast(CallSite CS, Type *RetTy) {
/// callee. The original call site is moved into the "else" block, and a clone
/// of the call site is placed in the "then" block. The cloned instruction is
/// returned.
+///
+/// For example, the call instruction below:
+///
+/// orig_bb:
+/// %t0 = call i32 %ptr()
+/// ...
+///
+/// Is replace by the following:
+///
+/// orig_bb:
+/// %cond = icmp eq i32 ()* %ptr, @func
+/// br i1 %cond, %then_bb, %else_bb
+///
+/// then_bb:
+/// ; The clone of the original call instruction is placed in the "then"
+/// ; block. It is not yet promoted.
+/// %t1 = call i32 %ptr()
+/// br merge_bb
+///
+/// else_bb:
+/// ; The original call instruction is moved to the "else" block.
+/// %t0 = call i32 %ptr()
+/// br merge_bb
+///
+/// merge_bb:
+/// ; Uses of the original call instruction are replaced by uses of the phi
+/// ; node.
+/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
+/// ...
+///
+/// A similar transformation is performed for invoke instructions. However,
+/// since invokes are terminating, more work is required. For example, the
+/// invoke instruction below:
+///
+/// orig_bb:
+/// %t0 = invoke %ptr() to label %normal_dst unwind label %unwind_dst
+///
+/// Is replace by the following:
+///
+/// orig_bb:
+/// %cond = icmp eq i32 ()* %ptr, @func
+/// br i1 %cond, %then_bb, %else_bb
+///
+/// then_bb:
+/// ; The clone of the original invoke instruction is placed in the "then"
+/// ; block, and its normal destination is set to the "merge" block. It is
+/// ; not yet promoted.
+/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
+///
+/// else_bb:
+/// ; The original invoke instruction is moved into the "else" block, and
+/// ; its normal destination is set to the "merge" block.
+/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
+///
+/// merge_bb:
+/// ; Uses of the original invoke instruction are replaced by uses of the
+/// ; phi node, and the merge block branches to the normal destination.
+/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
+/// br %normal_dst
+///
static Instruction *versionCallSite(CallSite CS, Value *Callee,
- MDNode *BranchWeights,
- BasicBlock *&ThenBlock,
- BasicBlock *&ElseBlock,
- BasicBlock *&MergeBlock) {
+ MDNode *BranchWeights) {
IRBuilder<> Builder(CS.getInstruction());
Instruction *OrigInst = CS.getInstruction();
+ BasicBlock *OrigBlock = OrigInst->getParent();
// Create the compare. The called value and callee must have the same type to
// be compared.
- auto *LHS =
- Builder.CreateBitCast(CS.getCalledValue(), Builder.getInt8PtrTy());
- auto *RHS = Builder.CreateBitCast(Callee, Builder.getInt8PtrTy());
- auto *Cond = Builder.CreateICmpEQ(LHS, RHS);
+ if (CS.getCalledValue()->getType() != Callee->getType())
+ Callee = Builder.CreateBitCast(Callee, CS.getCalledValue()->getType());
+ auto *Cond = Builder.CreateICmpEQ(CS.getCalledValue(), Callee);
// Create an if-then-else structure. The original instruction is moved into
// the "else" block, and a clone of the original instruction is placed in the
@@ -175,9 +280,9 @@ static Instruction *versionCallSite(CallSite CS, Value *Callee,
TerminatorInst *ElseTerm = nullptr;
SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, &ElseTerm,
BranchWeights);
- ThenBlock = ThenTerm->getParent();
- ElseBlock = ElseTerm->getParent();
- MergeBlock = OrigInst->getParent();
+ BasicBlock *ThenBlock = ThenTerm->getParent();
+ BasicBlock *ElseBlock = ElseTerm->getParent();
+ BasicBlock *MergeBlock = OrigInst->getParent();
ThenBlock->setName("if.true.direct_targ");
ElseBlock->setName("if.false.orig_indirect");
@@ -188,7 +293,8 @@ static Instruction *versionCallSite(CallSite CS, Value *Callee,
NewInst->insertBefore(ThenTerm);
// If the original call site is an invoke instruction, we have extra work to
- // do since invoke instructions are terminating.
+ // do since invoke instructions are terminating. We have to fix-up phi nodes
+ // in the invoke's normal and unwind destinations.
if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigInst)) {
auto *NewInvoke = cast<InvokeInst>(NewInst);
@@ -201,11 +307,19 @@ static Instruction *versionCallSite(CallSite CS, Value *Callee,
Builder.SetInsertPoint(MergeBlock);
Builder.CreateBr(OrigInvoke->getNormalDest());
- // Now set the normal destination of new the invoke instruction to be the
+ // Fix-up phi nodes in the original invoke's normal and unwind destinations.
+ fixupPHINodeForNormalDest(OrigInvoke, OrigBlock, MergeBlock);
+ fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock);
+
+ // Now set the normal destinations of the invoke instructions to be the
// "merge" block.
+ OrigInvoke->setNormalDest(MergeBlock);
NewInvoke->setNormalDest(MergeBlock);
}
+ // Create a phi node for the returned value of the call site.
+ createRetPHINode(OrigInst, NewInst, MergeBlock, Builder);
+
return NewInst;
}
@@ -253,7 +367,8 @@ bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
return true;
}
-static void promoteCall(CallSite CS, Function *Callee, Instruction *&Cast) {
+Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
+ CastInst **RetBitCast) {
assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
// Set the called function of the call site to be the given callee.
@@ -268,7 +383,7 @@ static void promoteCall(CallSite CS, Function *Callee, Instruction *&Cast) {
// If the function type of the call site matches that of the callee, no
// additional work is required.
if (CS.getFunctionType() == Callee->getFunctionType())
- return;
+ return CS.getInstruction();
// Save the return types of the call site and callee.
Type *CallSiteRetTy = CS.getInstruction()->getType();
@@ -294,7 +409,9 @@ static void promoteCall(CallSite CS, Function *Callee, Instruction *&Cast) {
// If the return type of the call site doesn't match that of the callee, cast
// the returned value to the appropriate type.
if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy)
- Cast = createRetBitCast(CS, CallSiteRetTy);
+ createRetBitCast(CS, CallSiteRetTy, RetBitCast);
+
+ return CS.getInstruction();
}
Instruction *llvm::promoteCallWithIfThenElse(CallSite CS, Function *Callee,
@@ -303,26 +420,10 @@ Instruction *llvm::promoteCallWithIfThenElse(CallSite CS, Function *Callee,
// Version the indirect call site. If the called value is equal to the given
// callee, 'NewInst' will be executed, otherwise the original call site will
// be executed.
- BasicBlock *ThenBlock, *ElseBlock, *MergeBlock;
- Instruction *NewInst = versionCallSite(CS, Callee, BranchWeights, ThenBlock,
- ElseBlock, MergeBlock);
+ Instruction *NewInst = versionCallSite(CS, Callee, BranchWeights);
// Promote 'NewInst' so that it directly calls the desired function.
- Instruction *Cast = NewInst;
- promoteCall(CallSite(NewInst), Callee, Cast);
-
- // If the original call site is an invoke instruction, we have to fix-up phi
- // nodes in the invoke's normal and unwind destinations.
- if (auto *OrigInvoke = dyn_cast<InvokeInst>(CS.getInstruction())) {
- fixupPHINodeForNormalDest(OrigInvoke, MergeBlock, ElseBlock, Cast);
- fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock);
- }
-
- // Create a phi node for the returned value of the call site.
- createRetPHINode(CS.getInstruction(), Cast ? Cast : NewInst);
-
- // Return the new direct call.
- return NewInst;
+ return promoteCall(CallSite(NewInst), Callee);
}
#undef DEBUG_TYPE
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 4273ce0b6200..c84ae7d693d7 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -203,7 +203,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// hit the peeled section.
// We only do this in the presence of profile information, since otherwise
// our estimates of the trip count are not reliable enough.
- if (UP.AllowPeeling && L->getHeader()->getParent()->getEntryCount()) {
+ if (UP.AllowPeeling && L->getHeader()->getParent()->hasProfileData()) {
Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L);
if (!PeelCount)
return;
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index f02f80cc1b78..b3c80424c8b9 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -127,6 +127,16 @@ static cl::opt<unsigned> MaxSpeculationDepth(
cl::desc("Limit maximum recursion depth when calculating costs of "
"speculatively executed instructions"));
+static cl::opt<unsigned> DependenceChainLatency(
+ "dependence-chain-latency", cl::Hidden, cl::init(8),
+ cl::desc("Limit the maximum latency of dependence chain containing cmp "
+ "for if conversion"));
+
+static cl::opt<unsigned> SmallBBSize(
+ "small-bb-size", cl::Hidden, cl::init(40),
+ cl::desc("Check dependence chain latency only in basic block smaller than "
+ "this number"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
@@ -395,6 +405,166 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
return true;
}
+/// Estimate the code size of the specified BB.
+static unsigned CountBBCodeSize(BasicBlock *BB,
+ const TargetTransformInfo &TTI) {
+ unsigned Size = 0;
+ for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II)
+ Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize);
+ return Size;
+}
+
+/// Find out the latency of the longest dependence chain in the BB if
+/// LongestChain is true, or the dependence chain containing the compare
+/// instruction feeding the block's conditional branch.
+static unsigned FindDependenceChainLatency(BasicBlock *BB,
+ DenseMap<Instruction *, unsigned> &Instructions,
+ const TargetTransformInfo &TTI,
+ bool LongestChain) {
+ unsigned MaxLatency = 0;
+
+ BasicBlock::iterator II;
+ for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) {
+ unsigned Latency = 0;
+ for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) {
+ Instruction *Op = dyn_cast<Instruction>(II->getOperand(O));
+ if (Op && Instructions.count(Op)) {
+ auto OpLatency = Instructions[Op];
+ if (OpLatency > Latency)
+ Latency = OpLatency;
+ }
+ }
+ Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency);
+ Instructions[&(*II)] = Latency;
+
+ if (Latency > MaxLatency)
+ MaxLatency = Latency;
+ }
+
+ if (LongestChain)
+ return MaxLatency;
+
+ // The length of the dependence chain containing the compare instruction is
+ // wanted, so the terminator must be a BranchInst.
+ assert(isa<BranchInst>(II));
+ BranchInst* Br = cast<BranchInst>(II);
+ Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition());
+ if (Cmp && Instructions.count(Cmp))
+ return Instructions[Cmp];
+ else
+ return 0;
+}
+
+/// Instructions in BB2 may depend on instructions in BB1, and instructions
+/// in BB1 may have users in BB2. If the last (in terms of latency) such kind
+/// of instruction in BB1 is I, then the instructions after I can be executed
+/// in parallel with instructions in BB2.
+/// This function returns the latency of I.
+static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2,
+ BasicBlock *IfBlock1, BasicBlock *IfBlock2,
+ DenseMap<Instruction *, unsigned> &BB1Instructions) {
+ unsigned LastLatency = 0;
+ SmallVector<Instruction *, 16> Worklist;
+ BasicBlock::iterator II;
+ for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) {
+ if (PHINode *PN = dyn_cast<PHINode>(II)) {
+ // Look for users in BB2.
+ bool InBBUser = false;
+ for (User *U : PN->users()) {
+ if (cast<Instruction>(U)->getParent() == BB2) {
+ InBBUser = true;
+ break;
+ }
+ }
+ // No such user, we don't care about this instruction and its operands.
+ if (!InBBUser)
+ break;
+ }
+ Worklist.push_back(&(*II));
+ }
+
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) {
+ if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) {
+ if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2)
+ Worklist.push_back(Op);
+ else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) {
+ if (BB1Instructions[Op] > LastLatency)
+ LastLatency = BB1Instructions[Op];
+ }
+ }
+ }
+ }
+
+ return LastLatency;
+}
+
+/// If after if conversion, most of the instructions in this new BB construct a
+/// long and slow dependence chain, it may be slower than cmp/branch, even
+/// if the branch has a high miss rate, because the control dependence is
+/// transformed into data dependence, and control dependence can be speculated,
+/// and thus, the second part can execute in parallel with the first part on
+/// modern OOO processor.
+///
+/// To check this condition, this function finds the length of the dependence
+/// chain in BB1 (only the part that can be executed in parallel with code after
+/// branch in BB2) containing cmp, and if the length is longer than a threshold,
+/// don't perform if conversion.
+///
+/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion.
+/// SpeculationSize contains the code size of IfBlock1 and IfBlock2.
+static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2,
+ BasicBlock *IfBlock1, BasicBlock *IfBlock2,
+ unsigned SpeculationSize,
+ const TargetTransformInfo &TTI) {
+ // Accumulated latency of each instruction in their BBs.
+ DenseMap<Instruction *, unsigned> BB1Instructions;
+ DenseMap<Instruction *, unsigned> BB2Instructions;
+
+ if (!TTI.isOutOfOrder())
+ return false;
+
+ unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI)
+ + SpeculationSize;
+
+ // We check small BB only since it is more difficult to find unrelated
+ // instructions to fill functional units in a small BB.
+ if (NewBBSize > SmallBBSize)
+ return false;
+
+ auto BB1Chain =
+ FindDependenceChainLatency(BB1, BB1Instructions, TTI, false);
+ auto BB2Chain =
+ FindDependenceChainLatency(BB2, BB2Instructions, TTI, true);
+
+ // If there are many unrelated instructions in the new BB, there will be
+ // other instructions for the processor to issue regardless of the length
+ // of this new dependence chain.
+ // Modern processors can issue 3 or more instructions in each cycle. But in
+ // real world applications, an IPC of 2 is already very good for non-loop
+ // code with small basic blocks. Higher IPC is usually found in programs with
+ // small kernel. So IPC of 2 is more reasonable for most applications.
+ if ((BB1Chain + BB2Chain) * 2 <= NewBBSize)
+ return false;
+
+ // We only care about part of the dependence chain in BB1 that can be
+ // executed in parallel with BB2, so adjust the latency.
+ BB1Chain -=
+ LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions);
+
+ // Correctly predicted branch instruction can skip the dependence chain in
+ // BB1, but misprediction has a penalty, so only when the dependence chain is
+ // longer than DependenceChainLatency, then branch is better than select.
+ // Besides misprediction penalty, the threshold value DependenceChainLatency
+ // also depends on branch misprediction rate, taken branch latency and cmov
+ // latency.
+ if (BB1Chain >= DependenceChainLatency)
+ return true;
+
+ return false;
+}
+
/// Extract ConstantInt from value, looking through IntToPtr
/// and PointerNullValue. Return NULL if value is not a constant int.
static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
@@ -1654,14 +1824,11 @@ namespace {
} // end anonymous namespace
-/// Given an unconditional branch that goes to BBEnd,
-/// check whether BBEnd has only two predecessors and the other predecessor
-/// ends with an unconditional branch. If it is true, sink any common code
-/// in the two predecessors to BBEnd.
-static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
- assert(BI1->isUnconditional());
- BasicBlock *BBEnd = BI1->getSuccessor(0);
-
+/// Check whether BB's predecessors end with unconditional branches. If it is
+/// true, sink any common code from the predecessors to BB.
+/// We also allow one predecessor to end with conditional branch (but no more
+/// than one).
+static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
// We support two situations:
// (1) all incoming arcs are unconditional
// (2) one incoming arc is conditional
@@ -1705,7 +1872,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
//
SmallVector<BasicBlock*,4> UnconditionalPreds;
Instruction *Cond = nullptr;
- for (auto *B : predecessors(BBEnd)) {
+ for (auto *B : predecessors(BB)) {
auto *T = B->getTerminator();
if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional())
UnconditionalPreds.push_back(B);
@@ -1773,8 +1940,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
DEBUG(dbgs() << "SINK: Splitting edge\n");
// We have a conditional edge and we're going to sink some instructions.
// Insert a new block postdominating all blocks we're going to sink from.
- if (!SplitBlockPredecessors(BI1->getSuccessor(0), UnconditionalPreds,
- ".sink.split"))
+ if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split"))
// Edges couldn't be split.
return false;
Changed = true;
@@ -2048,6 +2214,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
return false;
+ // Don't do if conversion for long dependence chain.
+ if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr,
+ CountBBCodeSize(ThenBB, TTI), TTI))
+ return false;
+
// If we get here, we can hoist the instruction and if-convert.
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
@@ -2355,6 +2526,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
}
}
+ if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2,
+ AggressiveInsts.size(), TTI))
+ return false;
+
DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
<< IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
@@ -5728,9 +5903,6 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
BasicBlock *BB = BI->getParent();
BasicBlock *Succ = BI->getSuccessor(0);
- if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI))
- return true;
-
// If the Terminator is the only non-phi instruction, simplify the block.
// If LoopHeader is provided, check if the block or its successor is a loop
// header. (This is for early invocations before loop simplify and
@@ -6008,6 +6180,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
if (MergeBlockIntoPredecessor(BB))
return true;
+ if (SinkCommon && Options.SinkCommonInsts)
+ Changed |= SinkCommonCodeFromPredecessors(BB);
+
IRBuilder<> Builder(BB);
// If there is a trivial two-entry PHI node in this basic block, and we can
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index fbcdc0df0f1c..52f32cda2609 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5049,13 +5049,13 @@ bool LoopVectorizationLegality::canVectorize() {
bool Result = true;
bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
- if (DoExtraAnalysis)
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
if (!TheLoop->getLoopPreheader()) {
+ DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n");
ORE->emit(createMissedAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by vectorizer");
- if (DoExtraAnalysis)
+ if (DoExtraAnalysis)
Result = false;
else
return false;
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 76ba62f5d596..a7ccd3faec44 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -646,23 +646,17 @@ private:
int getEntryCost(TreeEntry *E);
/// This is the recursive part of buildTree.
- void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, int UserIndx = -1,
- int OpdNum = 0);
+ void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, int);
/// \returns True if the ExtractElement/ExtractValue instructions in VL can
/// be vectorized to use the original vector (or aggregate "bitcast" to a vector).
bool canReuseExtract(ArrayRef<Value *> VL, Value *OpValue) const;
- /// Vectorize a single entry in the tree.\p OpdNum indicate the ordinality of
- /// operand corrsponding to this tree entry \p E for the user tree entry
- /// indicated by \p UserIndx.
- // In other words, "E == TreeEntry[UserIndx].getOperand(OpdNum)".
- Value *vectorizeTree(TreeEntry *E, int OpdNum = 0, int UserIndx = -1);
+ /// Vectorize a single entry in the tree.
+ Value *vectorizeTree(TreeEntry *E);
- /// Vectorize a single entry in the tree, starting in \p VL.\p OpdNum indicate
- /// the ordinality of operand corrsponding to the \p VL of scalar values for the
- /// user indicated by \p UserIndx this \p VL feeds into.
- Value *vectorizeTree(ArrayRef<Value *> VL, int OpdNum = 0, int UserIndx = -1);
+ /// Vectorize a single entry in the tree, starting in \p VL.
+ Value *vectorizeTree(ArrayRef<Value *> VL);
/// \returns the pointer to the vectorized value if \p VL is already
/// vectorized, or NULL. They may happen in cycles.
@@ -708,16 +702,6 @@ private:
return std::equal(VL.begin(), VL.end(), Scalars.begin());
}
- /// \returns true if the scalars in VL are found in this tree entry.
- bool isFoundJumbled(ArrayRef<Value *> VL, const DataLayout &DL,
- ScalarEvolution &SE) const {
- assert(VL.size() == Scalars.size() && "Invalid size");
- SmallVector<Value *, 8> List;
- if (!sortLoadAccesses(VL, DL, SE, List))
- return false;
- return std::equal(List.begin(), List.end(), Scalars.begin());
- }
-
/// A vector of scalars.
ValueList Scalars;
@@ -727,14 +711,6 @@ private:
/// Do we need to gather this sequence ?
bool NeedToGather = false;
- /// Records optional shuffle mask for the uses of jumbled memory accesses.
- /// For example, a non-empty ShuffleMask[1] represents the permutation of
- /// lanes that operand #1 of this vectorized instruction should undergo
- /// before feeding this vectorized instruction, whereas an empty
- /// ShuffleMask[0] indicates that the lanes of operand #0 of this vectorized
- /// instruction need not be permuted at all.
- SmallVector<SmallVector<unsigned, 4>, 2> ShuffleMask;
-
/// Points back to the VectorizableTree.
///
/// Only used for Graphviz right now. Unfortunately GraphTrait::NodeRef has
@@ -750,31 +726,12 @@ private:
/// Create a new VectorizableTree entry.
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
- int &UserTreeIdx, const InstructionsState &S,
- ArrayRef<unsigned> ShuffleMask = None,
- int OpdNum = 0) {
- assert((!Vectorized || S.Opcode != 0) &&
- "Vectorized TreeEntry without opcode");
+ int &UserTreeIdx) {
VectorizableTree.emplace_back(VectorizableTree);
-
int idx = VectorizableTree.size() - 1;
TreeEntry *Last = &VectorizableTree[idx];
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
Last->NeedToGather = !Vectorized;
-
- TreeEntry *UserTreeEntry = nullptr;
- if (UserTreeIdx != -1)
- UserTreeEntry = &VectorizableTree[UserTreeIdx];
-
- if (UserTreeEntry && !ShuffleMask.empty()) {
- if ((unsigned)OpdNum >= UserTreeEntry->ShuffleMask.size())
- UserTreeEntry->ShuffleMask.resize(OpdNum + 1);
- assert(UserTreeEntry->ShuffleMask[OpdNum].empty() &&
- "Mask already present");
- using mask = SmallVector<unsigned, 4>;
- mask tempMask(ShuffleMask.begin(), ShuffleMask.end());
- UserTreeEntry->ShuffleMask[OpdNum] = tempMask;
- }
if (Vectorized) {
for (int i = 0, e = VL.size(); i != e; ++i) {
assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
@@ -1427,34 +1384,34 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
}
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
- int UserTreeIdx, int OpdNum) {
+ int UserTreeIdx) {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
InstructionsState S = getSameOpcode(VL);
if (Depth == RecursionMaxDepth) {
DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
// Don't handle vectors.
if (S.OpValue->getType()->isVectorTy()) {
DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
if (SI->getValueOperand()->getType()->isVectorTy()) {
DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
// If all of the operands are identical or constant we have a simple solution.
if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.Opcode) {
DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
@@ -1466,7 +1423,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (EphValues.count(VL[i])) {
DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
") is ephemeral.\n");
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
@@ -1477,7 +1434,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n");
if (E->Scalars[i] != VL[i]) {
DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
@@ -1496,7 +1453,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (getTreeEntry(I)) {
DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
") is already in tree.\n");
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
@@ -1506,7 +1463,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
if (MustGather.count(VL[i])) {
DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
@@ -1520,7 +1477,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Don't go into unreachable blocks. They may contain instructions with
// dependency cycles which confuse the final scheduling.
DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
@@ -1529,7 +1486,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (unsigned j = i + 1; j < e; ++j)
if (VL[i] == VL[j]) {
DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
@@ -1544,7 +1501,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
assert((!BS.getScheduleData(VL0) ||
!BS.getScheduleData(VL0)->isPartOfBundle()) &&
"tryScheduleBundle should cancelScheduling on failure");
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
@@ -1563,12 +1520,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (Term) {
DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
- newTreeEntry(VL, true, UserTreeIdx, S);
+ newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
@@ -1578,7 +1535,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
PH->getIncomingBlock(i)));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx, i);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@@ -1590,7 +1547,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
} else {
BS.cancelScheduling(VL, VL0);
}
- newTreeEntry(VL, Reuse, UserTreeIdx, S);
+ newTreeEntry(VL, Reuse, UserTreeIdx);
return;
}
case Instruction::Load: {
@@ -1605,7 +1562,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (DL->getTypeSizeInBits(ScalarTy) !=
DL->getTypeAllocSizeInBits(ScalarTy)) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
return;
}
@@ -1616,13 +1573,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LoadInst *L = cast<LoadInst>(VL[i]);
if (!L->isSimple()) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
}
// Check if the loads are consecutive, reversed, or neither.
+ // TODO: What we really want is to sort the loads, but for now, check
+ // the two likely directions.
bool Consecutive = true;
bool ReverseConsecutive = true;
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
@@ -1636,7 +1595,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (Consecutive) {
++NumLoadsWantToKeepOrder;
- newTreeEntry(VL, true, UserTreeIdx, S);
+ newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of loads.\n");
return;
}
@@ -1650,41 +1609,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
break;
}
+ BS.cancelScheduling(VL, VL0);
+ newTreeEntry(VL, false, UserTreeIdx);
+
if (ReverseConsecutive) {
- DEBUG(dbgs() << "SLP: Gathering reversed loads.\n");
++NumLoadsWantToChangeOrder;
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, S);
- return;
- }
-
- if (VL.size() > 2) {
- bool ShuffledLoads = true;
- SmallVector<Value *, 8> Sorted;
- SmallVector<unsigned, 4> Mask;
- if (sortLoadAccesses(VL, *DL, *SE, Sorted, &Mask)) {
- auto NewVL = makeArrayRef(Sorted.begin(), Sorted.end());
- for (unsigned i = 0, e = NewVL.size() - 1; i < e; ++i) {
- if (!isConsecutiveAccess(NewVL[i], NewVL[i + 1], *DL, *SE)) {
- ShuffledLoads = false;
- break;
- }
- }
- // TODO: Tracking how many load wants to have arbitrary shuffled order
- // would be usefull.
- if (ShuffledLoads) {
- DEBUG(dbgs() << "SLP: added a vector of loads which needs "
- "permutation of loaded lanes.\n");
- newTreeEntry(NewVL, true, UserTreeIdx, S,
- makeArrayRef(Mask.begin(), Mask.end()), OpdNum);
- return;
- }
- }
+ DEBUG(dbgs() << "SLP: Gathering reversed loads.\n");
+ } else {
+ DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
}
-
- DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, S);
return;
}
case Instruction::ZExt:
@@ -1704,12 +1637,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
if (Ty != SrcTy || !isValidElementType(Ty)) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
return;
}
}
- newTreeEntry(VL, true, UserTreeIdx, S);
+ newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of casts.\n");
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
@@ -1718,7 +1651,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx, i);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@@ -1732,13 +1665,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (Cmp->getPredicate() != P0 ||
Cmp->getOperand(0)->getType() != ComparedTy) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, S);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
return;
}
}
- newTreeEntry(VL, true, UserTreeIdx, S);
+ newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of compares.\n");
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
@@ -1747,7 +1680,7 @@ void BoUpSLP::buildTree_rec(Ar