aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasSetTracker.cpp16
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp92
-rw-r--r--lib/Analysis/CFGPrinter.cpp2
-rw-r--r--lib/Analysis/CallGraph.cpp2
-rw-r--r--lib/Analysis/CallGraphSCCPass.cpp98
-rw-r--r--lib/Analysis/DemandedBits.cpp4
-rw-r--r--lib/Analysis/GlobalsModRef.cpp12
-rw-r--r--lib/Analysis/InstructionSimplify.cpp325
-rw-r--r--lib/Analysis/LazyValueInfo.cpp2
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp52
-rw-r--r--lib/Analysis/MemDepPrinter.cpp2
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp17
-rw-r--r--lib/Analysis/MustExecute.cpp6
-rw-r--r--lib/Analysis/ScalarEvolution.cpp10
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp18
-rw-r--r--lib/Analysis/ValueTracking.cpp12
-rw-r--r--lib/AsmParser/LLParser.cpp4
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp2
-rw-r--r--lib/CodeGen/AntiDepBreaker.h2
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.cpp18
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.h3
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp15
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h3
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp4
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp16
-rw-r--r--lib/CodeGen/BuiltinGCs.cpp2
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp2
-rw-r--r--lib/CodeGen/GCMetadata.cpp8
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp15
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp9
-rw-r--r--lib/CodeGen/GlobalMerge.cpp2
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp28
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp4
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp4
-rw-r--r--lib/CodeGen/MachineOutliner.cpp47
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp2
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp4
-rw-r--r--lib/CodeGen/MachineSink.cpp2
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp4
-rw-r--r--lib/CodeGen/MachineVerifier.cpp4
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp231
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp14
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp246
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h10
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp39
-rw-r--r--lib/CodeGen/ShadowStackGCLowering.cpp4
-rw-r--r--lib/CodeGen/SplitKit.h2
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp13
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp67
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp2
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp2
-rw-r--r--lib/DebugInfo/CodeView/RecordName.cpp3
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp6
-rw-r--r--lib/DebugInfo/CodeView/SymbolRecordMapping.cpp8
-rw-r--r--lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp3
-rw-r--r--lib/DebugInfo/CodeView/TypeStreamMerger.cpp5
-rw-r--r--lib/DebugInfo/DWARF/CMakeLists.txt1
-rw-r--r--lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp2
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp65
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugAddr.cpp198
-rw-r--r--lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp2
-rw-r--r--lib/Demangle/ItaniumDemangle.cpp8
-rw-r--r--lib/Demangle/MicrosoftDemangle.cpp1048
-rw-r--r--lib/Demangle/StringView.h24
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp16
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h6
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/jitprofiling.h86
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp42
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.h6
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp6
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp2
-rw-r--r--lib/FuzzMutate/FuzzerCLI.cpp6
-rw-r--r--lib/IR/Attributes.cpp9
-rw-r--r--lib/IR/AutoUpgrade.cpp2
-rw-r--r--lib/IR/Function.cpp2
-rw-r--r--lib/IR/InlineAsm.cpp32
-rw-r--r--lib/IR/Instructions.cpp182
-rw-r--r--lib/IR/LLVMContextImpl.h16
-rw-r--r--lib/IR/SymbolTableListTraitsImpl.h10
-rw-r--r--lib/IR/ValueSymbolTable.cpp4
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp10
-rw-r--r--lib/MC/MCAsmStreamer.cpp4
-rw-r--r--lib/MC/MCAssembler.cpp44
-rw-r--r--lib/MC/MCDisassembler/Disassembler.cpp2
-rw-r--r--lib/MC/MCDisassembler/Disassembler.h4
-rw-r--r--lib/MC/MCDwarf.cpp53
-rw-r--r--lib/MC/MCInstrAnalysis.cpp5
-rw-r--r--lib/MC/MCObjectFileInfo.cpp15
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp2
-rw-r--r--lib/MC/MCStreamer.cpp2
-rw-r--r--lib/MC/MachObjectWriter.cpp2
-rw-r--r--lib/Object/COFFObjectFile.cpp2
-rw-r--r--lib/ObjectYAML/CodeViewYAMLSymbols.cpp4
-rw-r--r--lib/Support/APFloat.cpp2
-rw-r--r--lib/Support/ConvertUTF.cpp28
-rw-r--r--lib/Support/CrashRecoveryContext.cpp4
-rw-r--r--lib/Support/DAGDeltaAlgorithm.cpp6
-rw-r--r--lib/Support/Errno.cpp2
-rw-r--r--lib/Support/FoldingSet.cpp40
-rw-r--r--lib/Support/FormattedStream.cpp2
-rw-r--r--lib/Support/ManagedStatic.cpp6
-rw-r--r--lib/Support/MemoryBuffer.cpp5
-rw-r--r--lib/Support/Path.cpp10
-rw-r--r--lib/Support/PrettyStackTrace.cpp20
-rw-r--r--lib/Support/SourceMgr.cpp24
-rw-r--r--lib/Support/StringPool.cpp4
-rw-r--r--lib/Support/StringRef.cpp2
-rw-r--r--lib/Support/TargetRegistry.cpp2
-rw-r--r--lib/Support/Windows/Path.inc2
-rw-r--r--lib/Support/YAMLParser.cpp2
-rw-r--r--lib/Support/regex_impl.h2
-rw-r--r--lib/Support/xxhash.cpp4
-rw-r--r--lib/TableGen/StringMatcher.cpp38
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp2
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp13
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h2
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td15
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp254
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h31
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp96
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp2
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.h2
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td69
-rw-r--r--lib/Target/AArch64/AArch64SVEInstrInfo.td106
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp3
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp2
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp161
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp14
-rw-r--r--lib/Target/AArch64/SVEInstrFormats.td365
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp17
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.td5
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructions.td1
-rw-r--r--lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp26
-rw-r--r--lib/Target/AMDGPU/MIMGInstructions.td26
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp147
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h13
-rw-r--r--lib/Target/AMDGPU/SIInsertSkips.cpp22
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp30
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h3
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td10
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp1
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h9
-rw-r--r--lib/Target/AMDGPU/VOP3PInstructions.td31
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp9
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMCallingConv.h11
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp2
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h2
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp2
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp10
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp2
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h2
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp8
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h2
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp51
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp10
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp38
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp2
-rw-r--r--lib/Target/AVR/AVRISelLowering.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonBitSimplify.cpp123
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.cpp36
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.h1
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h2
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h2
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp2
-rw-r--r--lib/Target/Mips/MipsCallLowering.cpp3
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp12
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp2
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp6
-rw-r--r--lib/Target/Mips/MipsISelLowering.h5
-rw-r--r--lib/Target/Mips/MipsInstructionSelector.cpp27
-rw-r--r--lib/Target/Mips/MipsLegalizerInfo.cpp3
-rw-r--r--lib/Target/Mips/MipsRegisterBankInfo.cpp1
-rw-r--r--lib/Target/Mips/MipsSubtarget.h2
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h2
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXMachineFunctionInfo.h2
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp2
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp22
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h2
-rw-r--r--lib/Target/PowerPC/PPC.h4
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp16
-rw-r--r--lib/Target/PowerPC/PPCEarlyReturn.cpp2
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp2
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp6
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp4
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp12
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp4
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp2
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp6
-rw-r--r--lib/Target/PowerPC/PPCMIPeephole.cpp2
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCVSXSwapRemoval.cpp2
-rw-r--r--lib/Target/Sparc/AsmParser/SparcAsmParser.cpp8
-rw-r--r--lib/Target/Sparc/Disassembler/SparcDisassembler.cpp8
-rw-r--r--lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp6
-rw-r--r--lib/Target/Sparc/Sparc.h2
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h4
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZHazardRecognizer.cpp45
-rw-r--r--lib/Target/SystemZ/SystemZHazardRecognizer.h17
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp80
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h1
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td48
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.h4
-rw-r--r--lib/Target/SystemZ/SystemZOperands.td1
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td10
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp8
-rw-r--r--lib/Target/Target.cpp2
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp4
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp6
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp74
-rw-r--r--lib/Target/X86/X86CallingConv.h2
-rw-r--r--lib/Target/X86/X86CmovConversion.cpp2
-rw-r--r--lib/Target/X86/X86FastISel.cpp6
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp2
-rw-r--r--lib/Target/X86/X86FlagsCopyLowering.cpp7
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp6
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp65
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp317
-rw-r--r--lib/Target/X86/X86ISelLowering.h7
-rw-r--r--lib/Target/X86/X86InstrFoldTables.cpp2
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp2
-rw-r--r--lib/Target/X86/X86InstrInfo.td16
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td18
-rwxr-xr-xlib/Target/X86/X86SchedBroadwell.td46
-rw-r--r--lib/Target/X86/X86SchedHaswell.td52
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td53
-rw-r--r--lib/Target/X86/X86SchedSkylakeClient.td48
-rwxr-xr-xlib/Target/X86/X86SchedSkylakeServer.td48
-rw-r--r--lib/Target/X86/X86Schedule.td10
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td20
-rw-r--r--lib/Target/X86/X86ScheduleBtVer2.td37
-rw-r--r--lib/Target/X86/X86ScheduleSLM.td12
-rw-r--r--lib/Target/X86/X86ScheduleZnver1.td12
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp14
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp6
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp36
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.h6
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp4
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h2
-rw-r--r--lib/Target/XCore/XCoreSubtarget.h2
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp10
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp2
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp118
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp16
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp10
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp18
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp32
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp33
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp2
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp4
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp2
-rw-r--r--lib/Transforms/Instrumentation/InstrProfiling.cpp2
-rw-r--r--lib/Transforms/Scalar/AlignmentFromAssumptions.cpp2
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp2
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp8
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp2
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp6
-rw-r--r--lib/Transforms/Scalar/GVNSink.cpp2
-rw-r--r--lib/Transforms/Scalar/GuardWidening.cpp96
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp6
-rw-r--r--lib/Transforms/Scalar/LICM.cpp8
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopPredication.cpp6
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp10
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp2
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp16
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp22
-rw-r--r--lib/Transforms/Scalar/SROA.cpp2
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp6
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp2
-rw-r--r--lib/Transforms/Utils/CallPromotionUtils.cpp2
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp38
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp4
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp6
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp14
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp10
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp9
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp4
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp2
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp38
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp237
-rw-r--r--lib/Transforms/Utils/SymbolRewriter.cpp2
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp2
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp28
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp8
-rw-r--r--lib/Transforms/Vectorize/VPlan.cpp5
-rw-r--r--lib/Transforms/Vectorize/VPlan.h121
-rw-r--r--lib/Transforms/Vectorize/VPlanDominatorTree.h41
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp21
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGBuilder.h23
-rw-r--r--lib/Transforms/Vectorize/VPlanLoopInfo.h45
326 files changed, 5312 insertions, 2754 deletions
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 8aee81b1f1d8..8f903fa4f1e8 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -142,7 +142,7 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
Alias = SetMayAlias;
AST.TotalMayAliasSetSize += size();
} else {
- // First entry of must alias must have maximum size!
+ // First entry of must alias must have maximum size!
P->updateSizeAndAAInfo(Size, AAInfo);
}
assert(Result != NoAlias && "Cannot be part of must set!");
@@ -251,9 +251,9 @@ void AliasSetTracker::clear() {
for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
I != E; ++I)
I->second->eraseFromList();
-
+
PointerMap.clear();
-
+
// The alias sets should all be clear now.
AliasSets.clear();
}
@@ -269,7 +269,7 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
for (iterator I = begin(), E = end(); I != E;) {
iterator Cur = I++;
if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue;
-
+
if (!FoundSet) { // If this is the first alias set ptr can go into.
FoundSet = &*Cur; // Remember it.
} else { // Otherwise, we must merge the sets.
@@ -336,13 +336,13 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer,
// Return the set!
return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
}
-
+
if (AliasSet *AS = mergeAliasSetsForPointer(Pointer, Size, AAInfo)) {
// Add it to the alias set it aliases.
AS->addPointer(*this, Entry, Size, AAInfo);
return *AS;
}
-
+
// Otherwise create a new alias set to hold the loaded pointer.
AliasSets.push_back(new AliasSet());
AliasSets.back().addPointer(*this, Entry, Size, AAInfo);
@@ -526,10 +526,10 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
AS->SetSize--;
TotalMayAliasSetSize--;
}
-
+
// Stop using the alias set.
AS->dropRef(*this);
-
+
PointerMap.erase(I);
}
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 96326347b712..1a24ae3dba15 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/PhiValues.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
@@ -93,7 +94,8 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
// depend on them.
if (Inv.invalidate<AssumptionAnalysis>(Fn, PA) ||
(DT && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)) ||
- (LI && Inv.invalidate<LoopAnalysis>(Fn, PA)))
+ (LI && Inv.invalidate<LoopAnalysis>(Fn, PA)) ||
+ (PV && Inv.invalidate<PhiValuesAnalysis>(Fn, PA)))
return true;
// Otherwise this analysis result remains valid.
@@ -1527,34 +1529,70 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
return Alias;
}
- SmallPtrSet<Value *, 4> UniqueSrc;
SmallVector<Value *, 4> V1Srcs;
bool isRecursive = false;
- for (Value *PV1 : PN->incoming_values()) {
- if (isa<PHINode>(PV1))
- // If any of the source itself is a PHI, return MayAlias conservatively
- // to avoid compile time explosion. The worst possible case is if both
- // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
- // and 'n' are the number of PHI sources.
+ if (PV) {
+ // If we have PhiValues then use it to get the underlying phi values.
+ const PhiValues::ValueSet &PhiValueSet = PV->getValuesForPhi(PN);
+ // If we have more phi values than the search depth then return MayAlias
+ // conservatively to avoid compile time explosion. The worst possible case
+ // is if both sides are PHI nodes. In which case, this is O(m x n) time
+ // where 'm' and 'n' are the number of PHI sources.
+ if (PhiValueSet.size() > MaxLookupSearchDepth)
return MayAlias;
-
- if (EnableRecPhiAnalysis)
- if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
- // Check whether the incoming value is a GEP that advances the pointer
- // result of this PHI node (e.g. in a loop). If this is the case, we
- // would recurse and always get a MayAlias. Handle this case specially
- // below.
- if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
- isa<ConstantInt>(PV1GEP->idx_begin())) {
- isRecursive = true;
- continue;
+ // Add the values to V1Srcs
+ for (Value *PV1 : PhiValueSet) {
+ if (EnableRecPhiAnalysis) {
+ if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
+ // Check whether the incoming value is a GEP that advances the pointer
+ // result of this PHI node (e.g. in a loop). If this is the case, we
+ // would recurse and always get a MayAlias. Handle this case specially
+ // below.
+ if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
+ isa<ConstantInt>(PV1GEP->idx_begin())) {
+ isRecursive = true;
+ continue;
+ }
}
}
-
- if (UniqueSrc.insert(PV1).second)
V1Srcs.push_back(PV1);
+ }
+ } else {
+ // If we don't have PhiInfo then just look at the operands of the phi itself
+ // FIXME: Remove this once we can guarantee that we have PhiInfo always
+ SmallPtrSet<Value *, 4> UniqueSrc;
+ for (Value *PV1 : PN->incoming_values()) {
+ if (isa<PHINode>(PV1))
+ // If any of the source itself is a PHI, return MayAlias conservatively
+ // to avoid compile time explosion. The worst possible case is if both
+ // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
+ // and 'n' are the number of PHI sources.
+ return MayAlias;
+
+ if (EnableRecPhiAnalysis)
+ if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
+ // Check whether the incoming value is a GEP that advances the pointer
+ // result of this PHI node (e.g. in a loop). If this is the case, we
+ // would recurse and always get a MayAlias. Handle this case specially
+ // below.
+ if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
+ isa<ConstantInt>(PV1GEP->idx_begin())) {
+ isRecursive = true;
+ continue;
+ }
+ }
+
+ if (UniqueSrc.insert(PV1).second)
+ V1Srcs.push_back(PV1);
+ }
}
+ // If V1Srcs is empty then that means that the phi has no underlying non-phi
+ // value. This should only be possible in blocks unreachable from the entry
+ // block, but return MayAlias just in case.
+ if (V1Srcs.empty())
+ return MayAlias;
+
// If this PHI node is recursive, set the size of the accessed memory to
// unknown to represent all the possible values the GEP could advance the
// pointer to.
@@ -1879,7 +1917,8 @@ BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) {
AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<AssumptionAnalysis>(F),
&AM.getResult<DominatorTreeAnalysis>(F),
- AM.getCachedResult<LoopAnalysis>(F));
+ AM.getCachedResult<LoopAnalysis>(F),
+ AM.getCachedResult<PhiValuesAnalysis>(F));
}
BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) {
@@ -1891,12 +1930,12 @@ char BasicAAWrapperPass::ID = 0;
void BasicAAWrapperPass::anchor() {}
INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa",
- "Basic Alias Analysis (stateless AA impl)", true, true)
+ "Basic Alias Analysis (stateless AA impl)", false, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa",
- "Basic Alias Analysis (stateless AA impl)", true, true)
+ "Basic Alias Analysis (stateless AA impl)", false, true)
FunctionPass *llvm::createBasicAAWrapperPass() {
return new BasicAAWrapperPass();
@@ -1907,10 +1946,12 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) {
auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
auto &DTWP = getAnalysis<DominatorTreeWrapperPass>();
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *PVWP = getAnalysisIfAvailable<PhiValuesWrapperPass>();
Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, TLIWP.getTLI(),
ACT.getAssumptionCache(F), &DTWP.getDomTree(),
- LIWP ? &LIWP->getLoopInfo() : nullptr));
+ LIWP ? &LIWP->getLoopInfo() : nullptr,
+ PVWP ? &PVWP->getResult() : nullptr));
return false;
}
@@ -1920,6 +1961,7 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addUsedIfAvailable<PhiValuesWrapperPass>();
}
BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) {
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index fc25cef8ddca..5b170dfa7903 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -124,7 +124,7 @@ namespace {
}
char CFGPrinterLegacyPass::ID = 0;
-INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file",
+INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file",
false, true)
PreservedAnalyses CFGPrinterPass::run(Function &F,
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index 7d5d2d2e4496..cbdf5f63c557 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -166,7 +166,7 @@ void CallGraphNode::print(raw_ostream &OS) const {
OS << "Call graph node for function: '" << F->getName() << "'";
else
OS << "Call graph node <<null function>>";
-
+
OS << "<<" << this << ">> #uses=" << getNumReferences() << '\n';
for (const auto &I : *this) {
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index f2211edba216..4c33c420b65d 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -41,7 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "cgscc-passmgr"
-static cl::opt<unsigned>
+static cl::opt<unsigned>
MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC");
@@ -97,13 +97,13 @@ public:
}
PassManagerType getPassManagerType() const override {
- return PMT_CallGraphPassManager;
+ return PMT_CallGraphPassManager;
}
-
+
private:
bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
bool &DevirtualizedCall);
-
+
bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
CallGraph &CG, bool &CallGraphUpToDate,
bool &DevirtualizedCall);
@@ -142,21 +142,21 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
if (EmitICRemark)
emitInstrCountChangedRemark(P, M, InstrCount);
}
-
+
// After the CGSCCPass is done, when assertions are enabled, use
// RefreshCallGraph to verify that the callgraph was correctly updated.
#ifndef NDEBUG
if (Changed)
RefreshCallGraph(CurSCC, CG, true);
#endif
-
+
return Changed;
}
-
+
assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
"Invalid CGPassManager member");
FPPassManager *FPP = (FPPassManager*)P;
-
+
// Run pass P on all functions in the current SCC.
for (CallGraphNode *CGN : CurSCC) {
if (Function *F = CGN->getFunction()) {
@@ -168,7 +168,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
F->getContext().yield();
}
}
-
+
// The function pass(es) modified the IR, they may have clobbered the
// callgraph.
if (Changed && CallGraphUpToDate) {
@@ -199,7 +199,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
bool MadeChange = false;
bool DevirtualizedCall = false;
-
+
// Scan all functions in the SCC.
unsigned FunctionNo = 0;
for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end();
@@ -207,14 +207,14 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
CallGraphNode *CGN = *SCCIdx;
Function *F = CGN->getFunction();
if (!F || F->isDeclaration()) continue;
-
+
// Walk the function body looking for call sites. Sync up the call sites in
// CGN with those actually in the function.
// Keep track of the number of direct and indirect calls that were
// invalidated and removed.
unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
-
+
// Get the set of call sites currently in the function.
for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
// If this call site is null, then the function pass deleted the call
@@ -226,7 +226,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
CallSites.count(I->first) ||
// If the call edge is not from a call or invoke, or it is a
- // instrinsic call, then the function pass RAUW'd a call with
+ // instrinsic call, then the function pass RAUW'd a call with
// another value. This can happen when constant folding happens
// of well known functions etc.
!CallSite(I->first) ||
@@ -236,18 +236,18 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
CallSite(I->first).getCalledFunction()->getIntrinsicID()))) {
assert(!CheckingMode &&
"CallGraphSCCPass did not update the CallGraph correctly!");
-
+
// If this was an indirect call site, count it.
if (!I->second->getFunction())
++NumIndirectRemoved;
- else
+ else
++NumDirectRemoved;
-
+
// Just remove the edge from the set of callees, keep track of whether
// I points to the last element of the vector.
bool WasLast = I + 1 == E;
CGN->removeCallEdge(I);
-
+
// If I pointed to the last element of the vector, we have to bail out:
// iterator checking rejects comparisons of the resultant pointer with
// end.
@@ -256,10 +256,10 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
E = CGN->end();
continue;
}
-
+
assert(!CallSites.count(I->first) &&
"Call site occurs in node multiple times");
-
+
CallSite CS(I->first);
if (CS) {
Function *Callee = CS.getCalledFunction();
@@ -269,7 +269,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
}
++I;
}
-
+
// Loop over all of the instructions in the function, getting the callsites.
// Keep track of the number of direct/indirect calls added.
unsigned NumDirectAdded = 0, NumIndirectAdded = 0;
@@ -280,7 +280,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
if (!CS) continue;
Function *Callee = CS.getCalledFunction();
if (Callee && Callee->isIntrinsic()) continue;
-
+
// If this call site already existed in the callgraph, just verify it
// matches up to expectations and remove it from CallSites.
DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
@@ -290,11 +290,11 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
// Remove from CallSites since we have now seen it.
CallSites.erase(ExistingIt);
-
+
// Verify that the callee is right.
if (ExistingNode->getFunction() == CS.getCalledFunction())
continue;
-
+
// If we are in checking mode, we are not allowed to actually mutate
// the callgraph. If this is a case where we can infer that the
// callgraph is less precise than it could be (e.g. an indirect call
@@ -303,10 +303,10 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
if (CheckingMode && CS.getCalledFunction() &&
ExistingNode->getFunction() == nullptr)
continue;
-
+
assert(!CheckingMode &&
"CallGraphSCCPass did not update the CallGraph correctly!");
-
+
// If not, we either went from a direct call to indirect, indirect to
// direct, or direct to different direct.
CallGraphNode *CalleeNode;
@@ -328,7 +328,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
MadeChange = true;
continue;
}
-
+
assert(!CheckingMode &&
"CallGraphSCCPass did not update the CallGraph correctly!");
@@ -341,11 +341,11 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
CalleeNode = CG.getCallsExternalNode();
++NumIndirectAdded;
}
-
+
CGN->addCalledFunction(CS, CalleeNode);
MadeChange = true;
}
-
+
// We scanned the old callgraph node, removing invalidated call sites and
// then added back newly found call sites. One thing that can happen is
// that an old indirect call site was deleted and replaced with a new direct
@@ -359,13 +359,13 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
if (NumIndirectRemoved > NumIndirectAdded &&
NumDirectRemoved < NumDirectAdded)
DevirtualizedCall = true;
-
+
// After scanning this function, if we still have entries in callsites, then
// they are dangling pointers. WeakTrackingVH should save us for this, so
// abort if
// this happens.
assert(CallSites.empty() && "Dangling pointers found in call sites map");
-
+
// Periodically do an explicit clear to remove tombstones when processing
// large scc's.
if ((FunctionNo & 15) == 15)
@@ -392,7 +392,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
bool &DevirtualizedCall) {
bool Changed = false;
-
+
// Keep track of whether the callgraph is known to be up-to-date or not.
// The CGSSC pass manager runs two types of passes:
// CallGraphSCC Passes and other random function passes. Because other
@@ -406,7 +406,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
for (unsigned PassNo = 0, e = getNumContainedPasses();
PassNo != e; ++PassNo) {
Pass *P = getContainedPass(PassNo);
-
+
// If we're in -debug-pass=Executions mode, construct the SCC node list,
// otherwise avoid constructing this string as it is expensive.
if (isPassDebuggingExecutionsOrMore()) {
@@ -423,23 +423,23 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
}
dumpRequiredSet(P);
-
+
initializeAnalysisImpl(P);
-
+
// Actually run this pass on the current SCC.
Changed |= RunPassOnSCC(P, CurSCC, CG,
CallGraphUpToDate, DevirtualizedCall);
-
+
if (Changed)
dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
dumpPreservedSet(P);
-
- verifyPreservedAnalysis(P);
+
+ verifyPreservedAnalysis(P);
removeNotPreservedAnalysis(P);
recordAvailableAnalysis(P);
removeDeadPasses(P, "", ON_CG_MSG);
}
-
+
// If the callgraph was left out of date (because the last pass run was a
// functionpass), refresh it before we move on to the next SCC.
if (!CallGraphUpToDate)
@@ -452,7 +452,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
bool CGPassManager::runOnModule(Module &M) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
bool Changed = doInitialization(CG);
-
+
// Walk the callgraph in bottom-up SCC order.
scc_iterator<CallGraph*> CGI = scc_begin(&CG);
@@ -485,7 +485,7 @@ bool CGPassManager::runOnModule(Module &M) {
DevirtualizedCall = false;
Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
} while (Iteration++ < MaxIterations && DevirtualizedCall);
-
+
if (DevirtualizedCall)
LLVM_DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after "
<< Iteration
@@ -500,7 +500,7 @@ bool CGPassManager::runOnModule(Module &M) {
/// Initialize CG
bool CGPassManager::doInitialization(CallGraph &CG) {
bool Changed = false;
- for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
+ for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
"Invalid CGPassManager member");
@@ -515,7 +515,7 @@ bool CGPassManager::doInitialization(CallGraph &CG) {
/// Finalize CG
bool CGPassManager::doFinalization(CallGraph &CG) {
bool Changed = false;
- for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
+ for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
"Invalid CGPassManager member");
@@ -541,7 +541,7 @@ void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
Nodes[i] = New;
break;
}
-
+
// Update the active scc_iterator so that it doesn't contain dangling
// pointers to the old CallGraphNode.
scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context;
@@ -555,18 +555,18 @@ void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
/// Assign pass manager to manage this pass.
void CallGraphSCCPass::assignPassManager(PMStack &PMS,
PassManagerType PreferredType) {
- // Find CGPassManager
+ // Find CGPassManager
while (!PMS.empty() &&
PMS.top()->getPassManagerType() > PMT_CallGraphPassManager)
PMS.pop();
assert(!PMS.empty() && "Unable to handle Call Graph Pass");
CGPassManager *CGP;
-
+
if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager)
CGP = (CGPassManager*)PMS.top();
else {
- // Create new Call Graph SCC Pass Manager if it does not exist.
+ // Create new Call Graph SCC Pass Manager if it does not exist.
assert(!PMS.empty() && "Unable to create Call Graph Pass Manager");
PMDataManager *PMD = PMS.top();
@@ -608,7 +608,7 @@ namespace {
class PrintCallGraphPass : public CallGraphSCCPass {
std::string Banner;
raw_ostream &OS; // raw_ostream to print on.
-
+
public:
static char ID;
@@ -640,10 +640,10 @@ namespace {
}
return false;
}
-
+
StringRef getPassName() const override { return "Print CallGraph IR"; }
};
-
+
} // end anonymous namespace.
char PrintCallGraphPass::ID = 0;
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index 58c5bccff65d..e7637cd88327 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -272,7 +272,7 @@ void DemandedBits::performAnalysis() {
// Analysis already completed for this function.
return;
Analyzed = true;
-
+
Visited.clear();
AliveBits.clear();
@@ -367,7 +367,7 @@ void DemandedBits::performAnalysis() {
APInt DemandedBits::getDemandedBits(Instruction *I) {
performAnalysis();
-
+
const DataLayout &DL = I->getModule()->getDataLayout();
auto Found = AliveBits.find(I);
if (Found != AliveBits.end())
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index 197aee9dacb7..2c503609d96b 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -409,7 +409,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
if (Constant *C = GV->getInitializer())
if (!C->isNullValue())
return false;
-
+
// Walk the user list of the global. If we find anything other than a direct
// load or store, bail out.
for (User *U : GV->users()) {
@@ -464,7 +464,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
return true;
}
-void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) {
+void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) {
// We do a bottom-up SCC traversal of the call graph. In other words, we
// visit all callees before callers (leaf-first).
unsigned SCCID = 0;
@@ -633,7 +633,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
Inputs.push_back(V);
do {
const Value *Input = Inputs.pop_back_val();
-
+
if (isa<GlobalValue>(Input) || isa<Argument>(Input) || isa<CallInst>(Input) ||
isa<InvokeInst>(Input))
// Arguments to functions or returns from functions are inherently
@@ -654,7 +654,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
if (auto *LI = dyn_cast<LoadInst>(Input)) {
Inputs.push_back(GetUnderlyingObject(LI->getPointerOperand(), DL));
continue;
- }
+ }
if (auto *SI = dyn_cast<SelectInst>(Input)) {
const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL);
const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL);
@@ -672,7 +672,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
}
continue;
}
-
+
return false;
} while (!Inputs.empty());
@@ -754,7 +754,7 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
// non-addr-taken globals.
continue;
}
-
+
// Recurse through a limited number of selects, loads and PHIs. This is an
// arbitrary depth of 4, lower numbers could be used to fix compile time
// issues if needed, but this is generally expected to be only be important
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 519d6d67be51..7fc7c15a0c25 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -65,6 +65,48 @@ static Value *SimplifyCastInst(unsigned, Value *, Type *,
static Value *SimplifyGEPInst(Type *, ArrayRef<Value *>, const SimplifyQuery &,
unsigned);
+static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal,
+ Value *FalseVal) {
+ BinaryOperator::BinaryOps BinOpCode;
+ if (auto *BO = dyn_cast<BinaryOperator>(Cond))
+ BinOpCode = BO->getOpcode();
+ else
+ return nullptr;
+
+ CmpInst::Predicate ExpectedPred, Pred1, Pred2;
+ if (BinOpCode == BinaryOperator::Or) {
+ ExpectedPred = ICmpInst::ICMP_NE;
+ } else if (BinOpCode == BinaryOperator::And) {
+ ExpectedPred = ICmpInst::ICMP_EQ;
+ } else
+ return nullptr;
+
+ // %A = icmp eq %TV, %FV
+ // %B = icmp eq %X, %Y (and one of these is a select operand)
+ // %C = and %A, %B
+ // %D = select %C, %TV, %FV
+ // -->
+ // %FV
+
+ // %A = icmp ne %TV, %FV
+ // %B = icmp ne %X, %Y (and one of these is a select operand)
+ // %C = or %A, %B
+ // %D = select %C, %TV, %FV
+ // -->
+ // %TV
+ Value *X, *Y;
+ if (!match(Cond, m_c_BinOp(m_c_ICmp(Pred1, m_Specific(TrueVal),
+ m_Specific(FalseVal)),
+ m_ICmp(Pred2, m_Value(X), m_Value(Y)))) ||
+ Pred1 != Pred2 || Pred1 != ExpectedPred)
+ return nullptr;
+
+ if (X == TrueVal || X == FalseVal || Y == TrueVal || Y == FalseVal)
+ return BinOpCode == BinaryOperator::Or ? TrueVal : FalseVal;
+
+ return nullptr;
+}
+
/// For a boolean type or a vector of boolean type, return false or a vector
/// with every element false.
static Constant *getFalse(Type *Ty) {
@@ -1283,6 +1325,23 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1))))
return X;
+ // ((X << A) | Y) >> A -> X if effective width of Y is not larger than A.
+ // We can return X as we do in the above case since OR alters no bits in X.
+ // SimplifyDemandedBits in InstCombine can do more general optimization for
+ // bit manipulation. This pattern aims to provide opportunities for other
+ // optimizers by supporting a simple but common case in InstSimplify.
+ Value *Y;
+ const APInt *ShRAmt, *ShLAmt;
+ if (match(Op1, m_APInt(ShRAmt)) &&
+ match(Op0, m_c_Or(m_NUWShl(m_Value(X), m_APInt(ShLAmt)), m_Value(Y))) &&
+ *ShRAmt == *ShLAmt) {
+ const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ const unsigned Width = Op0->getType()->getScalarSizeInBits();
+ const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
+ if (EffWidthY <= ShRAmt->getZExtValue())
+ return X;
+ }
+
return nullptr;
}
@@ -3752,6 +3811,9 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse))
return V;
+ if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal))
+ return V;
+
return nullptr;
}
@@ -4604,149 +4666,131 @@ static bool maskIsAllZeroOrUndef(Value *Mask) {
return true;
}
-template <typename IterTy>
-static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
- const SimplifyQuery &Q, unsigned MaxRecurse) {
+static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
+ const SimplifyQuery &Q) {
+ // Idempotent functions return the same result when called repeatedly.
Intrinsic::ID IID = F->getIntrinsicID();
- unsigned NumOperands = std::distance(ArgBegin, ArgEnd);
-
- // Unary Ops
- if (NumOperands == 1) {
- // Perform idempotent optimizations
- if (IsIdempotent(IID)) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) {
- if (II->getIntrinsicID() == IID)
- return II;
- }
- }
+ if (IsIdempotent(IID))
+ if (auto *II = dyn_cast<IntrinsicInst>(Op0))
+ if (II->getIntrinsicID() == IID)
+ return II;
- Value *IIOperand = *ArgBegin;
- Value *X;
- switch (IID) {
- case Intrinsic::fabs: {
- if (SignBitMustBeZero(IIOperand, Q.TLI))
- return IIOperand;
- return nullptr;
- }
- case Intrinsic::bswap: {
- // bswap(bswap(x)) -> x
- if (match(IIOperand, m_BSwap(m_Value(X))))
- return X;
- return nullptr;
- }
- case Intrinsic::bitreverse: {
- // bitreverse(bitreverse(x)) -> x
- if (match(IIOperand, m_BitReverse(m_Value(X))))
- return X;
- return nullptr;
- }
- case Intrinsic::exp: {
- // exp(log(x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
- match(IIOperand, m_Intrinsic<Intrinsic::log>(m_Value(X))))
- return X;
- return nullptr;
- }
- case Intrinsic::exp2: {
- // exp2(log2(x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
- match(IIOperand, m_Intrinsic<Intrinsic::log2>(m_Value(X))))
- return X;
- return nullptr;
- }
- case Intrinsic::log: {
- // log(exp(x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
- match(IIOperand, m_Intrinsic<Intrinsic::exp>(m_Value(X))))
- return X;
- return nullptr;
- }
- case Intrinsic::log2: {
- // log2(exp2(x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
- match(IIOperand, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) {
- return X;
- }
- return nullptr;
- }
- default:
- return nullptr;
- }
+ Value *X;
+ switch (IID) {
+ case Intrinsic::fabs:
+ if (SignBitMustBeZero(Op0, Q.TLI)) return Op0;
+ break;
+ case Intrinsic::bswap:
+ // bswap(bswap(x)) -> x
+ if (match(Op0, m_BSwap(m_Value(X)))) return X;
+ break;
+ case Intrinsic::bitreverse:
+ // bitreverse(bitreverse(x)) -> x
+ if (match(Op0, m_BitReverse(m_Value(X)))) return X;
+ break;
+ case Intrinsic::exp:
+ // exp(log(x)) -> x
+ if (Q.CxtI->hasAllowReassoc() &&
+ match(Op0, m_Intrinsic<Intrinsic::log>(m_Value(X)))) return X;
+ break;
+ case Intrinsic::exp2:
+ // exp2(log2(x)) -> x
+ if (Q.CxtI->hasAllowReassoc() &&
+ match(Op0, m_Intrinsic<Intrinsic::log2>(m_Value(X)))) return X;
+ break;
+ case Intrinsic::log:
+ // log(exp(x)) -> x
+ if (Q.CxtI->hasAllowReassoc() &&
+ match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X)))) return X;
+ break;
+ case Intrinsic::log2:
+ // log2(exp2(x)) -> x
+ if (Q.CxtI->hasAllowReassoc() &&
+ match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) return X;
+ break;
+ default:
+ break;
}
- // Binary Ops
- if (NumOperands == 2) {
- Value *LHS = *ArgBegin;
- Value *RHS = *(ArgBegin + 1);
- Type *ReturnType = F->getReturnType();
+ return nullptr;
+}
- switch (IID) {
- case Intrinsic::usub_with_overflow:
- case Intrinsic::ssub_with_overflow: {
- // X - X -> { 0, false }
- if (LHS == RHS)
- return Constant::getNullValue(ReturnType);
+static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
+ const SimplifyQuery &Q) {
+ Intrinsic::ID IID = F->getIntrinsicID();
+ Type *ReturnType = F->getReturnType();
+ switch (IID) {
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ // X - X -> { 0, false }
+ if (Op0 == Op1)
+ return Constant::getNullValue(ReturnType);
+ // X - undef -> undef
+ // undef - X -> undef
+ if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
+ return UndefValue::get(ReturnType);
+ break;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ // X + undef -> undef
+ if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
+ return UndefValue::get(ReturnType);
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ // 0 * X -> { 0, false }
+ // X * 0 -> { 0, false }
+ if (match(Op0, m_Zero()) || match(Op1, m_Zero()))
+ return Constant::getNullValue(ReturnType);
+ // undef * X -> { 0, false }
+ // X * undef -> { 0, false }
+ if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ return Constant::getNullValue(ReturnType);
+ break;
+ case Intrinsic::load_relative:
+ if (auto *C0 = dyn_cast<Constant>(Op0))
+ if (auto *C1 = dyn_cast<Constant>(Op1))
+ return SimplifyRelativeLoad(C0, C1, Q.DL);
+ break;
+ case Intrinsic::powi:
+ if (auto *Power = dyn_cast<ConstantInt>(Op1)) {
+ // powi(x, 0) -> 1.0
+ if (Power->isZero())
+ return ConstantFP::get(Op0->getType(), 1.0);
+ // powi(x, 1) -> x
+ if (Power->isOne())
+ return Op0;
+ }
+ break;
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ // If one argument is NaN, return the other argument.
+ if (match(Op0, m_NaN())) return Op1;
+ if (match(Op1, m_NaN())) return Op0;
+ break;
+ default:
+ break;
+ }
- // X - undef -> undef
- // undef - X -> undef
- if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
- return UndefValue::get(ReturnType);
+ return nullptr;
+}
- return nullptr;
- }
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::sadd_with_overflow: {
- // X + undef -> undef
- if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
- return UndefValue::get(ReturnType);
+template <typename IterTy>
+static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
+ const SimplifyQuery &Q) {
+ // Intrinsics with no operands have some kind of side effect. Don't simplify.
+ unsigned NumOperands = std::distance(ArgBegin, ArgEnd);
+ if (NumOperands == 0)
+ return nullptr;
- return nullptr;
- }
- case Intrinsic::umul_with_overflow:
- case Intrinsic::smul_with_overflow: {
- // 0 * X -> { 0, false }
- // X * 0 -> { 0, false }
- if (match(LHS, m_Zero()) || match(RHS, m_Zero()))
- return Constant::getNullValue(ReturnType);
-
- // undef * X -> { 0, false }
- // X * undef -> { 0, false }
- if (match(LHS, m_Undef()) || match(RHS, m_Undef()))
- return Constant::getNullValue(ReturnType);
+ Intrinsic::ID IID = F->getIntrinsicID();
+ if (NumOperands == 1)
+ return simplifyUnaryIntrinsic(F, ArgBegin[0], Q);
- return nullptr;
- }
- case Intrinsic::load_relative: {
- Constant *C0 = dyn_cast<Constant>(LHS);
- Constant *C1 = dyn_cast<Constant>(RHS);
- if (C0 && C1)
- return SimplifyRelativeLoad(C0, C1, Q.DL);
- return nullptr;
- }
- case Intrinsic::powi:
- if (ConstantInt *Power = dyn_cast<ConstantInt>(RHS)) {
- // powi(x, 0) -> 1.0
- if (Power->isZero())
- return ConstantFP::get(LHS->getType(), 1.0);
- // powi(x, 1) -> x
- if (Power->isOne())
- return LHS;
- }
- return nullptr;
- case Intrinsic::maxnum:
- case Intrinsic::minnum:
- // If one argument is NaN, return the other argument.
- if (match(LHS, m_NaN()))
- return RHS;
- if (match(RHS, m_NaN()))
- return LHS;
- return nullptr;
- default:
- return nullptr;
- }
- }
+ if (NumOperands == 2)
+ return simplifyBinaryIntrinsic(F, ArgBegin[0], ArgBegin[1], Q);
- // Simplify calls to llvm.masked.load.*
+ // Handle intrinsics with 3 or more arguments.
switch (IID) {
case Intrinsic::masked_load: {
Value *MaskArg = ArgBegin[2];
@@ -4756,6 +4800,19 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
return PassthruArg;
return nullptr;
}
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ Value *ShAmtArg = ArgBegin[2];
+ const APInt *ShAmtC;
+ if (match(ShAmtArg, m_APInt(ShAmtC))) {
+ // If there's effectively no shift, return the 1st arg or 2nd arg.
+ // TODO: For vectors, we could check each element of a non-splat constant.
+ APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth());
+ if (ShAmtC->urem(BitWidth).isNullValue())
+ return ArgBegin[IID == Intrinsic::fshl ? 0 : 1];
+ }
+ return nullptr;
+ }
default:
return nullptr;
}
@@ -4780,7 +4837,7 @@ static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin,
return nullptr;
if (F->isIntrinsic())
- if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse))
+ if (Value *Ret = simplifyIntrinsic(F, ArgBegin, ArgEnd, Q))
return Ret;
if (!canConstantFoldCallTo(CS, F))
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 435b6f205199..ee0148e0d795 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -725,7 +725,7 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV,
// frequently arranged such that dominating ones come first and we quickly
// find a path to function entry. TODO: We should consider explicitly
// canonicalizing to make this true rather than relying on this happy
- // accident.
+ // accident.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
ValueLatticeElement EdgeResult;
if (!getEdgeValue(Val, *PI, BB, EdgeResult))
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index c6175bf9bee9..a24d66011b8d 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -176,8 +176,8 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
/// Calculate Start and End points of memory access.
/// Let's assume A is the first access and B is a memory access on N-th loop
-/// iteration. Then B is calculated as:
-/// B = A + Step*N .
+/// iteration. Then B is calculated as:
+/// B = A + Step*N .
/// Step value may be positive or negative.
/// N is a calculated back-edge taken count:
/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
@@ -1317,7 +1317,7 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
return false;
}
-/// Given a non-constant (unknown) dependence-distance \p Dist between two
+/// Given a non-constant (unknown) dependence-distance \p Dist between two
/// memory accesses, that have the same stride whose absolute value is given
/// in \p Stride, and that have the same type size \p TypeByteSize,
/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
@@ -1336,19 +1336,19 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
// If we can prove that
// (**) |Dist| > BackedgeTakenCount * Step
- // where Step is the absolute stride of the memory accesses in bytes,
+ // where Step is the absolute stride of the memory accesses in bytes,
// then there is no dependence.
//
- // Ratioanle:
- // We basically want to check if the absolute distance (|Dist/Step|)
- // is >= the loop iteration count (or > BackedgeTakenCount).
- // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
- // Section 4.2.1); Note, that for vectorization it is sufficient to prove
+ // Ratioanle:
+ // We basically want to check if the absolute distance (|Dist/Step|)
+ // is >= the loop iteration count (or > BackedgeTakenCount).
+ // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
+ // Section 4.2.1); Note, that for vectorization it is sufficient to prove
// that the dependence distance is >= VF; This is checked elsewhere.
- // But in some cases we can prune unknown dependence distances early, and
- // even before selecting the VF, and without a runtime test, by comparing
- // the distance against the loop iteration count. Since the vectorized code
- // will be executed only if LoopCount >= VF, proving distance >= LoopCount
+ // But in some cases we can prune unknown dependence distances early, and
+ // even before selecting the VF, and without a runtime test, by comparing
+ // the distance against the loop iteration count. Since the vectorized code
+ // will be executed only if LoopCount >= VF, proving distance >= LoopCount
// also guarantees that distance >= VF.
//
const uint64_t ByteStride = Stride * TypeByteSize;
@@ -1360,8 +1360,8 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
- // The dependence distance can be positive/negative, so we sign extend Dist;
- // The multiplication of the absolute stride in bytes and the
+ // The dependence distance can be positive/negative, so we sign extend Dist;
+ // The multiplication of the absolute stride in bytes and the
// backdgeTakenCount is non-negative, so we zero extend Product.
if (DistTypeSize > ProductTypeSize)
CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
@@ -2212,24 +2212,24 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
"versioning:");
LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
- // Avoid adding the "Stride == 1" predicate when we know that
+ // Avoid adding the "Stride == 1" predicate when we know that
// Stride >= Trip-Count. Such a predicate will effectively optimize a single
// or zero iteration loop, as Trip-Count <= Stride == 1.
- //
+ //
// TODO: We are currently not making a very informed decision on when it is
// beneficial to apply stride versioning. It might make more sense that the
- // users of this analysis (such as the vectorizer) will trigger it, based on
- // their specific cost considerations; For example, in cases where stride
+ // users of this analysis (such as the vectorizer) will trigger it, based on
+ // their specific cost considerations; For example, in cases where stride
// versioning does not help resolving memory accesses/dependences, the
- // vectorizer should evaluate the cost of the runtime test, and the benefit
- // of various possible stride specializations, considering the alternatives
- // of using gather/scatters (if available).
-
+ // vectorizer should evaluate the cost of the runtime test, and the benefit
+ // of various possible stride specializations, considering the alternatives
+ // of using gather/scatters (if available).
+
const SCEV *StrideExpr = PSE->getSCEV(Stride);
- const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
+ const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
// Match the types so we can compare the stride and the BETakenCount.
- // The Stride can be positive/negative, so we sign extend Stride;
+ // The Stride can be positive/negative, so we sign extend Stride;
// The backdgeTakenCount is non-negative, so we zero extend BETakenCount.
const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
@@ -2243,7 +2243,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
// Since TripCount == BackEdgeTakenCount + 1, checking:
- // "Stride >= TripCount" is equivalent to checking:
+ // "Stride >= TripCount" is equivalent to checking:
// Stride - BETakenCount > 0
if (SE->isKnownPositive(StrideMinusBETaken)) {
LLVM_DEBUG(
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 5c0cbb26484c..5a6bbd7b2ac6 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -118,7 +118,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
} else {
SmallVector<NonLocalDepResult, 4> NLDI;
assert( (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
- isa<VAArgInst>(Inst)) && "Unknown memory instruction!");
+ isa<VAArgInst>(Inst)) && "Unknown memory instruction!");
MDA.getNonLocalPointerDependency(Inst, NLDI);
DepSet &InstDeps = Deps[Inst];
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 7eeefd54f007..feae53c54ecb 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/PhiValues.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
@@ -1513,6 +1514,8 @@ void MemoryDependenceResults::invalidateCachedPointerInfo(Value *Ptr) {
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
// Flush load info for the pointer.
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
+ // Invalidate phis that use the pointer.
+ PV.invalidateValue(Ptr);
}
void MemoryDependenceResults::invalidateCachedPredecessors() {
@@ -1671,6 +1674,9 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
}
}
+ // Invalidate phis that use the removed instruction.
+ PV.invalidateValue(RemInst);
+
assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
LLVM_DEBUG(verifyRemoved(RemInst));
}
@@ -1730,7 +1736,8 @@ MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- return MemoryDependenceResults(AA, AC, TLI, DT);
+ auto &PV = AM.getResult<PhiValuesAnalysis>(F);
+ return MemoryDependenceResults(AA, AC, TLI, DT, PV);
}
char MemoryDependenceWrapperPass::ID = 0;
@@ -1741,6 +1748,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass)
INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
"Memory Dependence Analysis", false, true)
@@ -1758,6 +1766,7 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PhiValuesWrapperPass>();
AU.addRequiredTransitive<AAResultsWrapperPass>();
AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
}
@@ -1773,7 +1782,8 @@ bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &P
// Check whether the analyses we depend on became invalid for any reason.
if (Inv.invalidate<AAManager>(F, PA) ||
Inv.invalidate<AssumptionAnalysis>(F, PA) ||
- Inv.invalidate<DominatorTreeAnalysis>(F, PA))
+ Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
+ Inv.invalidate<PhiValuesAnalysis>(F, PA))
return true;
// Otherwise this analysis result remains valid.
@@ -1789,6 +1799,7 @@ bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- MemDep.emplace(AA, AC, TLI, DT);
+ auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
+ MemDep.emplace(AA, AC, TLI, DT, PV);
return false;
}
diff --git a/lib/Analysis/MustExecute.cpp b/lib/Analysis/MustExecute.cpp
index fc4049874622..8e85366b4618 100644
--- a/lib/Analysis/MustExecute.cpp
+++ b/lib/Analysis/MustExecute.cpp
@@ -235,7 +235,7 @@ public:
}
- void printInfoComment(const Value &V, formatted_raw_ostream &OS) override {
+ void printInfoComment(const Value &V, formatted_raw_ostream &OS) override {
if (!MustExec.count(&V))
return;
@@ -245,7 +245,7 @@ public:
OS << " ; (mustexec in " << NumLoops << " loops: ";
else
OS << " ; (mustexec in: ";
-
+
bool first = true;
for (const Loop *L : Loops) {
if (!first)
@@ -264,6 +264,6 @@ bool MustExecutePrinter::runOnFunction(Function &F) {
MustExecuteAnnotatedWriter Writer(F, DT, LI);
F.print(dbgs(), &Writer);
-
+
return false;
}
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index aa95ace93014..0e715b8814ff 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -4839,7 +4839,7 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI
// Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy)
// for each of StartVal and Accum
- auto getExtendedExpr = [&](const SCEV *Expr,
+ auto getExtendedExpr = [&](const SCEV *Expr,
bool CreateSignExtend) -> const SCEV * {
assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant");
const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy);
@@ -4935,11 +4935,11 @@ ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
return Rewrite;
}
-// FIXME: This utility is currently required because the Rewriter currently
-// does not rewrite this expression:
-// {0, +, (sext ix (trunc iy to ix) to iy)}
+// FIXME: This utility is currently required because the Rewriter currently
+// does not rewrite this expression:
+// {0, +, (sext ix (trunc iy to ix) to iy)}
// into {0, +, %step},
-// even when the following Equal predicate exists:
+// even when the following Equal predicate exists:
// "%step == (sext ix (trunc iy to ix) to iy)".
bool PredicatedScalarEvolution::areAddRecsEqualWithPreds(
const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const {
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 9de2f789c89c..7233a86e5daf 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -721,7 +721,7 @@ struct ReductionData {
static Optional<ReductionData> getReductionData(Instruction *I) {
Value *L, *R;
if (m_BinOp(m_Value(L), m_Value(R)).match(I))
- return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
+ return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
if (auto *SI = dyn_cast<SelectInst>(I)) {
if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
m_SMax(m_Value(L), m_Value(R)).match(SI) ||
@@ -730,8 +730,8 @@ static Optional<ReductionData> getReductionData(Instruction *I) {
m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
auto *CI = cast<CmpInst>(SI->getCondition());
- return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
- }
+ return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
+ }
if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
m_UMax(m_Value(L), m_Value(R)).match(SI)) {
auto *CI = cast<CmpInst>(SI->getCondition());
@@ -851,11 +851,11 @@ static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
// We look for a sequence of shuffle,shuffle,add triples like the following
// that builds a pairwise reduction tree.
- //
+ //
// (X0, X1, X2, X3)
// (X0 + X1, X2 + X3, undef, undef)
// ((X0 + X1) + (X2 + X3), undef, undef, undef)
- //
+ //
// %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
// <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
// %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
@@ -916,7 +916,7 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
// We look for a sequence of shuffles and adds like the following matching one
// fadd, shuffle vector pair at a time.
- //
+ //
// %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
// <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
// %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
@@ -927,7 +927,7 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
unsigned MaskStart = 1;
Instruction *RdxOp = RdxStart;
- SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
+ SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
unsigned NumVecElemsRemain = NumVecElems;
while (NumVecElemsRemain - 1) {
// Check for the right reduction operation.
@@ -1093,7 +1093,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
case Instruction::InsertElement: {
const InsertElementInst * IE = cast<InsertElementInst>(I);
ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
- unsigned Idx = -1;
+ unsigned Idx = -1;
if (CI)
Idx = CI->getZExtValue();
return getVectorInstrCost(I->getOpcode(),
@@ -1104,7 +1104,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
// TODO: Identify and add costs for insert/extract subvector, etc.
if (Shuffle->changesLength())
return -1;
-
+
if (Shuffle->isIdentity())
return 0;
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 04a7b73c22bf..0ef39163bda3 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -71,7 +71,7 @@
#include <cassert>
#include <cstdint>
#include <iterator>
-#include <utility>
+#include <utility>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -3828,7 +3828,7 @@ static bool checkRippleForSignedAdd(const KnownBits &LHSKnown,
// If either of the values is known to be non-negative, adding them can only
// overflow if the second is also non-negative, so we can assume that.
- // Two non-negative numbers will only overflow if there is a carry to the
+ // Two non-negative numbers will only overflow if there is a carry to the
// sign bit, so we can check if even when the values are as big as possible
// there is no overflow to the sign bit.
if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) {
@@ -3855,7 +3855,7 @@ static bool checkRippleForSignedAdd(const KnownBits &LHSKnown,
}
// If we reached here it means that we know nothing about the sign bits.
- // In this case we can't know if there will be an overflow, since by
+ // In this case we can't know if there will be an overflow, since by
// changing the sign bits any two values can be made to overflow.
return false;
}
@@ -3905,7 +3905,7 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
// operands.
bool LHSOrRHSKnownNonNegative =
(LHSKnown.isNonNegative() || RHSKnown.isNonNegative());
- bool LHSOrRHSKnownNegative =
+ bool LHSOrRHSKnownNegative =
(LHSKnown.isNegative() || RHSKnown.isNegative());
if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT);
@@ -4454,7 +4454,7 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth);
if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
return SPR;
-
+
if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
return {SPF_UNKNOWN, SPNB_NA, false};
@@ -4630,7 +4630,7 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered};
}
}
-
+
if (isKnownNegation(TrueVal, FalseVal)) {
// Sign-extending LHS does not change its sign, so TrueVal/FalseVal can
// match against either LHS or sext(LHS).
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 599b59bf61e8..7cf74dd16f5a 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -842,7 +842,7 @@ static void maybeSetDSOLocal(bool DSOLocal, GlobalValue &GV) {
}
/// parseIndirectSymbol:
-/// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
+/// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
/// OptionalVisibility OptionalDLLStorageClass
/// OptionalThreadLocal OptionalUnnamedAddr
// 'alias|ifunc' IndirectSymbol
@@ -3935,7 +3935,7 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, EmissionKindField &Result
Lex.Lex();
return false;
}
-
+
template <>
bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
DwarfAttEncodingField &Result) {
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index be75df0820d9..87b47dc354b5 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -3809,7 +3809,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
continue;
// The mapping from OriginalId to GUID may return a GUID
// that corresponds to a static variable. Filter it out here.
- // This can happen when
+ // This can happen when
// 1) There is a call to a library function which does not have
// a CallValidId;
// 2) There is a static variable with the OriginalGUID identical
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index 181da83dc88b..d93716287981 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -46,7 +46,7 @@ public:
MachineBasicBlock::iterator End,
unsigned InsertPosIndex,
DbgValueVector &DbgValues) = 0;
-
+
/// Update liveness information to account for the current
/// instruction, which will not be scheduled.
virtual void Observe(MachineInstr &MI, unsigned Count,
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 4a226527cb5b..c8305ad9c547 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -24,8 +24,26 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
return IterBool.first->second.Number;
}
+
+void AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
+ static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize();
+ Asm.OutStreamer->SwitchSection(Section);
+
+ uint64_t Length = sizeof(uint16_t) // version
+ + sizeof(uint8_t) // address_size
+ + sizeof(uint8_t) // segment_selector_size
+ + AddrSize * Pool.size(); // entries
+ Asm.emitInt32(Length); // TODO: Support DWARF64 format.
+ Asm.emitInt16(Asm.getDwarfVersion());
+ Asm.emitInt8(AddrSize);
+ Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size.
+}
+
// Emit addresses into the section given.
void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
+ if (Asm.getDwarfVersion() >= 5)
+ emitHeader(Asm, AddrSection);
+
if (Pool.empty())
return;
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h
index 5350006bf744..d5008fab5563 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -50,6 +50,9 @@ public:
bool hasBeenUsed() const { return HasBeenUsed; }
void resetUsedFlag() { HasBeenUsed = false; }
+
+private:
+ void emitHeader(AsmPrinter &Asm, MCSection *Section);
};
} // end namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 8761fae9dd22..500e7a00196f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -364,7 +364,9 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
else
UseSectionsAsReferences = DwarfSectionsAsReferences == Enable;
- GenerateTypeUnits = GenerateDwarfTypeUnits;
+ // Don't generate type units for unsupported object file formats.
+ GenerateTypeUnits =
+ A->TM.getTargetTriple().isOSBinFormatELF() && GenerateDwarfTypeUnits;
TheAccelTableKind = computeAccelTableKind(
DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple());
@@ -886,8 +888,7 @@ void DwarfDebug::endModule() {
emitDebugInfoDWO();
emitDebugAbbrevDWO();
emitDebugLineDWO();
- // Emit DWO addresses.
- AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
+ emitDebugAddr();
}
// Emit info into the dwarf accelerator table sections.
@@ -2136,7 +2137,7 @@ void DwarfDebug::emitDebugRanges() {
return;
}
- if (getDwarfVersion() >= 5 && NoRangesPresent())
+ if (NoRangesPresent())
return;
// Start the dwarf ranges section.
@@ -2297,6 +2298,12 @@ void DwarfDebug::emitDebugStrDWO() {
OffSec, /* UseRelativeOffsets = */ false);
}
+// Emit DWO addresses.
+void DwarfDebug::emitDebugAddr() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
+}
+
MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
if (!useSplitDwarf())
return nullptr;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 0c7be5d27dfe..abf2e43b1312 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -447,6 +447,9 @@ class DwarfDebug : public DebugHandlerBase {
/// Emit the debug str dwo section.
void emitDebugStrDWO();
+ /// Emit DWO addresses.
+ void emitDebugAddr();
+
/// Flags to let the linker know we have emitted new style pubnames. Only
/// emit it here if we don't have a skeleton CU for split dwarf.
void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const;
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 952b0d99a95a..0637d952eba4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -112,7 +112,7 @@ protected:
uint64_t OffsetInBits = 0;
unsigned DwarfVersion;
- /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
+ /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
unsigned SubRegisterSizeInBits = 0;
unsigned SubRegisterOffsetInBits = 0;
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index c90bd568162d..049f349b009a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -95,6 +95,6 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
}
} else {
ScopeVars.Locals.push_back(Var);
- }
+ }
return true;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 43b835b2c4aa..600f4a78fda0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1182,7 +1182,7 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath());
if (!M->getISysRoot().empty())
addString(MDie, dwarf::DW_AT_LLVM_isysroot, M->getISysRoot());
-
+
return &MDie;
}
@@ -1691,7 +1691,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
}
void DwarfTypeUnit::emitHeader(bool UseOffsets) {
- DwarfUnit::emitCommonHeader(UseOffsets,
+ DwarfUnit::emitCommonHeader(UseOffsets,
DD->useSplitDwarf() ? dwarf::DW_UT_split_type
: dwarf::DW_UT_type);
Asm->OutStreamer->AddComment("Type Signature");
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index f2615edaece2..e28fc6fb9d4f 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -362,19 +362,19 @@ IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
/// Convert an atomic load of a non-integral type to an integer load of the
/// equivalent bitwidth. See the function comment on
-/// convertAtomicStoreToIntegerType for background.
+/// convertAtomicStoreToIntegerType for background.
LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
auto *M = LI->getModule();
Type *NewTy = getCorrespondingIntegerType(LI->getType(),
M->getDataLayout());
IRBuilder<> Builder(LI);
-
+
Value *Addr = LI->getPointerOperand();
Type *PT = PointerType::get(NewTy,
Addr->getType()->getPointerAddressSpace());
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
-
+
auto *NewLI = Builder.CreateLoad(NewAddr);
NewLI->setAlignment(LI->getAlignment());
NewLI->setVolatile(LI->isVolatile());
@@ -452,7 +452,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
M->getDataLayout());
Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
-
+
Value *Addr = SI->getPointerOperand();
Type *PT = PointerType::get(NewTy,
Addr->getType()->getPointerAddressSpace());
@@ -920,14 +920,14 @@ Value *AtomicExpand::insertRMWLLSCLoop(
/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
/// IR. As a migration step, we convert back to what use to be the standard
/// way to represent a pointer cmpxchg so that we can update backends one by
-/// one.
+/// one.
AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
auto *M = CI->getModule();
Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
M->getDataLayout());
IRBuilder<> Builder(CI);
-
+
Value *Addr = CI->getPointerOperand();
Type *PT = PointerType::get(NewTy,
Addr->getType()->getPointerAddressSpace());
@@ -935,8 +935,8 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *
Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
-
-
+
+
auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
CI->getSuccessOrdering(),
CI->getFailureOrdering(),
diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp
index abac555d6602..3a9b20aa661d 100644
--- a/lib/CodeGen/BuiltinGCs.cpp
+++ b/lib/CodeGen/BuiltinGCs.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This file contains the boilerplate required to define our various built in
-// gc lowering strategies.
+// gc lowering strategies.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 840e5ede6444..5a5960b16130 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -530,7 +530,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
// Kill instructions can define registers but are really nops, and there
// might be a real definition earlier that needs to be paired with uses
// dominated by this kill.
-
+
// FIXME: It may be possible to remove the isKill() restriction once PR18663
// has been properly fixed. There can be value in processing kills as seen
// in the AggressiveAntiDepBreaker class.
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 456fa799e8e1..fe3d29657942 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -159,7 +159,7 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
auto NMI = GCStrategyMap.find(Name);
if (NMI != GCStrategyMap.end())
return NMI->getValue();
-
+
for (auto& Entry : GCRegistry::entries()) {
if (Name == Entry.getName()) {
std::unique_ptr<GCStrategy> S = Entry.instantiate();
@@ -171,11 +171,11 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
}
if (GCRegistry::begin() == GCRegistry::end()) {
- // In normal operation, the registry should not be empty. There should
+ // In normal operation, the registry should not be empty. There should
// be the builtin GCs if nothing else. The most likely scenario here is
- // that we got here without running the initializers used by the Registry
+ // that we got here without running the initializers used by the Registry
// itself and it's registration mechanism.
- const std::string error = ("unsupported GC: " + Name).str() +
+ const std::string error = ("unsupported GC: " + Name).str() +
" (did you remember to link and initialize the CodeGen library?)";
report_fatal_error(error);
} else
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index bafb7a05536d..80da50562d32 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
@@ -33,6 +34,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -1503,6 +1505,8 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
}
EntryBuilder.buildMerge(Reg, Ops);
+ } else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
+ EntryBuilder.buildBlockAddress(Reg, BA);
} else
return false;
@@ -1611,19 +1615,20 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
ArgIt++;
}
- // And translate the function!
- for (const BasicBlock &BB : F) {
- MachineBasicBlock &MBB = getMBB(BB);
+ // Need to visit defs before uses when translating instructions.
+ ReversePostOrderTraversal<const Function *> RPOT(&F);
+ for (const BasicBlock *BB : RPOT) {
+ MachineBasicBlock &MBB = getMBB(*BB);
// Set the insertion point of all the following translations to
// the end of this basic block.
CurBuilder.setMBB(MBB);
- for (const Instruction &Inst : BB) {
+ for (const Instruction &Inst : *BB) {
if (translate(Inst))
continue;
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
- Inst.getDebugLoc(), &BB);
+ Inst.getDebugLoc(), BB);
R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 9df931eb81b3..3271b54aa830 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -809,6 +809,15 @@ MachineIRBuilderBase::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
MMO);
}
+MachineInstrBuilder
+MachineIRBuilderBase::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
+#ifndef NDEBUG
+ assert(getMRI()->getType(Res).isPointer() && "invalid res type");
+#endif
+
+ return buildInstr(TargetOpcode::G_BLOCK_ADDR).addDef(Res).addBlockAddress(BA);
+}
+
void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src,
bool IsExtend) {
#ifndef NDEBUG
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index ca56f4e0c4f1..9f7f5e392a9a 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -56,7 +56,7 @@
// - it makes linker optimizations less useful (order files, LOHs, ...)
// - it forces usage of indexed addressing (which isn't necessarily "free")
// - it can increase register pressure when the uses are disparate enough.
-//
+//
// We use heuristics to discover the best global grouping we can (cf cl::opts).
//
// ===---------------------------------------------------------------------===//
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index eb4099964242..707113bd973b 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -113,22 +113,22 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
case Intrinsic::memcpy:
M.getOrInsertFunction("memcpy",
Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
DL.getIntPtrType(Context));
break;
case Intrinsic::memmove:
M.getOrInsertFunction("memmove",
Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
DL.getIntPtrType(Context));
break;
case Intrinsic::memset:
M.getOrInsertFunction("memset",
Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- Type::getInt32Ty(M.getContext()),
+ Type::getInt8PtrTy(Context),
+ Type::getInt32Ty(M.getContext()),
DL.getIntPtrType(Context));
break;
case Intrinsic::sqrt:
@@ -210,13 +210,13 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
"bswap.5");
Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
"bswap.4");
- Value* Tmp3 = Builder.CreateLShr(V,
+ Value* Tmp3 = Builder.CreateLShr(V,
ConstantInt::get(V->getType(), 24),
"bswap.3");
- Value* Tmp2 = Builder.CreateLShr(V,
+ Value* Tmp2 = Builder.CreateLShr(V,
ConstantInt::get(V->getType(), 40),
"bswap.2");
- Value* Tmp1 = Builder.CreateLShr(V,
+ Value* Tmp1 = Builder.CreateLShr(V,
ConstantInt::get(V->getType(), 56),
"bswap.1");
Tmp7 = Builder.CreateAnd(Tmp7,
@@ -274,7 +274,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
for (unsigned n = 0; n < WordSize; ++n) {
Value *PartValue = V;
- for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
+ for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
i <<= 1, ++ct) {
Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
@@ -381,7 +381,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
case Intrinsic::siglongjmp: {
// Insert the call to abort
- ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
+ ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
Type::getVoidTy(Context));
break;
}
@@ -392,7 +392,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
case Intrinsic::bswap:
CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
break;
-
+
case Intrinsic::ctlz:
CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
break;
@@ -420,7 +420,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
break;
}
-
+
case Intrinsic::get_dynamic_area_offset:
errs() << "WARNING: this target does not support the custom llvm.get."
"dynamic.area.offset. It is being lowered to a constant 0\n";
@@ -473,7 +473,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
case Intrinsic::assume:
case Intrinsic::var_annotation:
break; // Strip out these intrinsics
-
+
case Intrinsic::memcpy: {
Type *IntPtr = DL.getIntPtrType(Context);
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index fea83e92de8f..417bd9d5aebe 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -340,7 +340,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
/// address the spill location in a target independent way.
int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI,
unsigned &Reg) {
- assert(MI.hasOneMemOperand() &&
+ assert(MI.hasOneMemOperand() &&
"Spill instruction does not have exactly one memory operand?");
auto MMOI = MI.memoperands_begin();
const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
@@ -472,7 +472,7 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
int FI;
const MachineMemOperand *MMO;
- // TODO: Handle multiple stores folded into one.
+ // TODO: Handle multiple stores folded into one.
if (!MI.hasOneMemOperand())
return false;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 054cc97f8374..639cd80768fc 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -314,10 +314,10 @@ public:
MMI.deleteMachineFunctionFor(F);
return true;
}
-
+
StringRef getPassName() const override {
return "Free MachineFunction";
- }
+ }
};
} // end anonymous namespace
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
index 28e4e2c6c87a..a712afec0959 100644
--- a/lib/CodeGen/MachineOutliner.cpp
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -620,10 +620,8 @@ struct InstructionMapper {
/// queried for candidates.
///
/// \param MBB The \p MachineBasicBlock to be translated into integers.
- /// \param TRI \p TargetRegisterInfo for the module.
- /// \param TII \p TargetInstrInfo for the module.
+ /// \param TII \p TargetInstrInfo for the function.
void convertToUnsignedVec(MachineBasicBlock &MBB,
- const TargetRegisterInfo &TRI,
const TargetInstrInfo &TII) {
unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB);
@@ -729,7 +727,6 @@ struct MachineOutliner : public ModulePass {
/// its leaf children to find the locations of its substring.
///
/// \param ST A suffix tree to query.
- /// \param TII TargetInstrInfo for the target.
/// \param Mapper Contains outlining mapping information.
/// \param[out] CandidateList Filled with candidates representing each
/// beneficial substring.
@@ -738,7 +735,7 @@ struct MachineOutliner : public ModulePass {
///
/// \returns The length of the longest candidate found.
unsigned
- findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
+ findCandidates(SuffixTree &ST,
InstructionMapper &Mapper,
std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList);
@@ -770,14 +767,12 @@ struct MachineOutliner : public ModulePass {
/// \param[out] FunctionList Filled with functions corresponding to each type
/// of \p Candidate.
/// \param ST The suffix tree for the module.
- /// \param TII TargetInstrInfo for the module.
///
/// \returns The length of the longest candidate found. 0 if there are none.
unsigned
buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList,
- SuffixTree &ST, InstructionMapper &Mapper,
- const TargetInstrInfo &TII);
+ SuffixTree &ST, InstructionMapper &Mapper);
/// Helper function for pruneOverlaps.
/// Removes \p C from the candidate list, and updates its \p OutlinedFunction.
@@ -795,11 +790,9 @@ struct MachineOutliner : public ModulePass {
/// \param[in,out] FunctionList A list of functions to be outlined.
/// \param Mapper Contains instruction mapping info for outlining.
/// \param MaxCandidateLen The length of the longest candidate.
- /// \param TII TargetInstrInfo for the module.
void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList,
- InstructionMapper &Mapper, unsigned MaxCandidateLen,
- const TargetInstrInfo &TII);
+ InstructionMapper &Mapper, unsigned MaxCandidateLen);
/// Construct a suffix tree on the instructions in \p M and outline repeated
/// strings from that tree.
@@ -892,7 +885,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
}
unsigned MachineOutliner::findCandidates(
- SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper,
+ SuffixTree &ST, InstructionMapper &Mapper,
std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList) {
CandidateList.clear();
@@ -945,7 +938,7 @@ unsigned MachineOutliner::findCandidates(
// AA (where each "A" is an instruction).
//
// We might have some portion of the module that looks like this:
- // AAAAAA (6 A's)
+ // AAAAAA (6 A's)
//
// In this case, there are 5 different copies of "AA" in this range, but
// at most 3 can be outlined. If only outlining 3 of these is going to
@@ -979,8 +972,16 @@ unsigned MachineOutliner::findCandidates(
// We've found something we might want to outline.
// Create an OutlinedFunction to store it and check if it'd be beneficial
// to outline.
+ if (CandidatesForRepeatedSeq.empty())
+ continue;
+
+ // Arbitrarily choose a TII from the first candidate.
+ // FIXME: Should getOutliningCandidateInfo move to TargetMachine?
+ const TargetInstrInfo *TII =
+ CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
+
OutlinedFunction OF =
- TII.getOutliningCandidateInfo(CandidatesForRepeatedSeq);
+ TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);
// If we deleted every candidate, then there's nothing to outline.
if (OF.Candidates.empty())
@@ -1036,7 +1037,7 @@ void MachineOutliner::prune(Candidate &C,
void MachineOutliner::pruneOverlaps(
std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper,
- unsigned MaxCandidateLen, const TargetInstrInfo &TII) {
+ unsigned MaxCandidateLen) {
// Return true if this candidate became unbeneficial for outlining in a
// previous step.
@@ -1127,13 +1128,13 @@ void MachineOutliner::pruneOverlaps(
unsigned MachineOutliner::buildCandidateList(
std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST,
- InstructionMapper &Mapper, const TargetInstrInfo &TII) {
+ InstructionMapper &Mapper) {
std::vector<unsigned> CandidateSequence; // Current outlining candidate.
unsigned MaxCandidateLen = 0; // Length of the longest candidate.
MaxCandidateLen =
- findCandidates(ST, TII, Mapper, CandidateList, FunctionList);
+ findCandidates(ST, Mapper, CandidateList, FunctionList);
// Sort the candidates in decending order. This will simplify the outlining
// process when we have to remove the candidates from the mapping by
@@ -1339,10 +1340,6 @@ bool MachineOutliner::runOnModule(Module &M) {
return false;
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
- const TargetSubtargetInfo &STI =
- MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget();
- const TargetRegisterInfo *TRI = STI.getRegisterInfo();
- const TargetInstrInfo *TII = STI.getInstrInfo();
// If the user passed -enable-machine-outliner=always or
// -enable-machine-outliner, the pass will run on all functions in the module.
@@ -1382,6 +1379,8 @@ bool MachineOutliner::runOnModule(Module &M) {
if (!MF)
continue;
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF))
continue;
@@ -1405,7 +1404,7 @@ bool MachineOutliner::runOnModule(Module &M) {
continue;
// MBB is suitable for outlining. Map it to a list of unsigneds.
- Mapper.convertToUnsignedVec(MBB, *TRI, *TII);
+ Mapper.convertToUnsignedVec(MBB, *TII);
}
}
@@ -1416,10 +1415,10 @@ bool MachineOutliner::runOnModule(Module &M) {
// Find all of the outlining candidates.
unsigned MaxCandidateLen =
- buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
+ buildCandidateList(CandidateList, FunctionList, ST, Mapper);
// Remove candidates that overlap with other candidates.
- pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII);
+ pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen);
// Outline each of the candidates and return true if something was outlined.
bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper);
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 6095bdd06b69..f632a9bd457f 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -383,7 +383,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
assert(FromReg != ToReg && "Cannot replace a reg with itself");
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
-
+
// TODO: This could be more efficient by bulk changing the operands.
for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
MachineOperand &O = *I;
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 773661965f18..542491eabbf2 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -254,14 +254,14 @@ public:
private:
MachineInstr *PHI;
unsigned idx;
-
+
public:
explicit PHI_iterator(MachineInstr *P) // begin iterator
: PHI(P), idx(1) {}
PHI_iterator(MachineInstr *P, bool) // end iterator
: PHI(P), idx(PHI->getNumOperands()) {}
- PHI_iterator &operator++() { idx += 2; return *this; }
+ PHI_iterator &operator++() { idx += 2; return *this; }
bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 354f46e9e625..1fd40f757351 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -509,7 +509,7 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
}
ToSplit.insert(std::make_pair(FromBB, ToBB));
-
+
return true;
}
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index b444cd31eba2..79ca6adf95c4 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -655,7 +655,7 @@ static bool getDataDeps(const MachineInstr &UseMI,
// Debug values should not be included in any calculations.
if (UseMI.isDebugInstr())
return false;
-
+
bool HasPhysRegs = false;
for (MachineInstr::const_mop_iterator I = UseMI.operands_begin(),
E = UseMI.operands_end(); I != E; ++I) {
@@ -1167,7 +1167,7 @@ MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
computeInstrDepths(MBB);
if (!TBI.HasValidInstrHeights)
computeInstrHeights(MBB);
-
+
return Trace(*this, TBI);
}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index d644e41abc5b..318776136e24 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -1077,8 +1077,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
auto VerifyStackMapConstant = [&](unsigned Offset) {
if (!MI->getOperand(Offset).isImm() ||
- MI->getOperand(Offset).getImm() != StackMaps::ConstantOp ||
- !MI->getOperand(Offset + 1).isImm())
+ MI->getOperand(Offset).getImm() != StackMaps::ConstantOp ||
+ !MI->getOperand(Offset + 1).isImm())
report("stack map constant to STATEPOINT not well formed!", MI);
};
const unsigned VarStart = StatepointOpers(MI).getVarIdx();
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index a878c34f9aa4..3660586c1358 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -594,7 +594,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator ReloadAfter =
RestoreAfter ? std::next(MBBI) : MBBI;
MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
- LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
+ if (ReloadBefore != MBB.end())
+ LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
Scavenged.Restore = &*std::prev(SpillBefore);
LiveUnits.removeReg(Reg);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7a99687757f8..a8c4b85df321 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -72,7 +72,6 @@
#include <string>
#include <tuple>
#include <utility>
-#include <vector>
using namespace llvm;
@@ -483,9 +482,6 @@ namespace {
/// returns false.
bool findBetterNeighborChains(StoreSDNode *St);
- /// Match "(X shl/srl V1) & V2" where V2 may not be present.
- bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
-
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
@@ -2671,6 +2667,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
N0.getOperand(1).getOperand(0));
+ // fold (A-(B-C)) -> A+(C-B)
+ if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
+ return DAG.getNode(ISD::ADD, DL, VT, N0,
+ DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
+ N1.getOperand(0)));
+
// fold (X - (-Y * Z)) -> (X + (Y * Z))
if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
if (N1.getOperand(0).getOpcode() == ISD::SUB &&
@@ -2740,6 +2742,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // Prefer an add for more folding potential and possibly better codegen:
+ // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
+ if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
+ SDValue ShAmt = N1.getOperand(1);
+ ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
+ if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
+ return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
+ }
+ }
+
return SDValue();
}
@@ -4205,8 +4218,8 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
// Allow one node which will masked along with any loads found.
if (NodeToMask)
return false;
-
- // Also ensure that the node to be masked only produces one data result.
+
+ // Also ensure that the node to be masked only produces one data result.
NodeToMask = Op.getNode();
if (NodeToMask->getNumValues() > 1) {
bool HasValue = false;
@@ -5148,25 +5161,140 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return SDValue();
}
-/// Match "(X shl/srl V1) & V2" where V2 may not be present.
-bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
- if (Op.getOpcode() == ISD::AND) {
- if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
- Mask = Op.getOperand(1);
- Op = Op.getOperand(0);
- } else {
- return false;
- }
+static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
+ if (Op.getOpcode() == ISD::AND &&
+ DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
+ Mask = Op.getOperand(1);
+ return Op.getOperand(0);
}
+ return Op;
+}
+/// Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
+ SDValue &Mask) {
+ Op = stripConstantMask(DAG, Op, Mask);
if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
Shift = Op;
return true;
}
-
return false;
}
+/// Helper function for visitOR to extract the needed side of a rotate idiom
+/// from a shl/srl/mul/udiv. This is meant to handle cases where
+/// InstCombine merged some outside op with one of the shifts from
+/// the rotate pattern.
+/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
+/// Otherwise, returns an expansion of \p ExtractFrom based on the following
+/// patterns:
+///
+/// (or (mul v c0) (shrl (mul v c1) c2)):
+/// expands (mul v c0) -> (shl (mul v c1) c3)
+///
+/// (or (udiv v c0) (shl (udiv v c1) c2)):
+/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
+///
+/// (or (shl v c0) (shrl (shl v c1) c2)):
+/// expands (shl v c0) -> (shl (shl v c1) c3)
+///
+/// (or (shrl v c0) (shl (shrl v c1) c2)):
+/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
+///
+/// Such that in all cases, c3+c2==bitwidth(op v c1).
+static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
+ SDValue ExtractFrom, SDValue &Mask,
+ const SDLoc &DL) {
+ assert(OppShift && ExtractFrom && "Empty SDValue");
+ assert(
+ (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
+ "Existing shift must be valid as a rotate half");
+
+ ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
+ // Preconditions:
+ // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
+ //
+ // Find opcode of the needed shift to be extracted from (op0 v c0).
+ unsigned Opcode = ISD::DELETED_NODE;
+ bool IsMulOrDiv = false;
+ // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
+ // opcode or its arithmetic (mul or udiv) variant.
+ auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
+ IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
+ if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
+ return false;
+ Opcode = NeededShift;
+ return true;
+ };
+ // op0 must be either the needed shift opcode or the mul/udiv equivalent
+ // that the needed shift can be extracted from.
+ if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
+ (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
+ return SDValue();
+
+ // op0 must be the same opcode on both sides, have the same LHS argument,
+ // and produce the same value type.
+ SDValue OppShiftLHS = OppShift.getOperand(0);
+ EVT ShiftedVT = OppShiftLHS.getValueType();
+ if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
+ OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
+ ShiftedVT != ExtractFrom.getValueType())
+ return SDValue();
+
+ // Amount of the existing shift.
+ ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
+ // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
+ ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
+ // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
+ ConstantSDNode *ExtractFromCst =
+ isConstOrConstSplat(ExtractFrom.getOperand(1));
+ // TODO: We should be able to handle non-uniform constant vectors for these values
+ // Check that we have constant values.
+ if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
+ !OppLHSCst || !OppLHSCst->getAPIntValue() ||
+ !ExtractFromCst || !ExtractFromCst->getAPIntValue())
+ return SDValue();
+
+ // Compute the shift amount we need to extract to complete the rotate.
+ const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
+ APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
+ if (NeededShiftAmt.isNegative())
+ return SDValue();
+ // Normalize the bitwidth of the two mul/udiv/shift constant operands.
+ APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
+ APInt OppLHSAmt = OppLHSCst->getAPIntValue();
+ zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
+
+ // Now try extract the needed shift from the ExtractFrom op and see if the
+ // result matches up with the existing shift's LHS op.
+ if (IsMulOrDiv) {
+ // Op to extract from is a mul or udiv by a constant.
+ // Check:
+ // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
+ // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
+ const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
+ NeededShiftAmt.getZExtValue());
+ APInt ResultAmt;
+ APInt Rem;
+ APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
+ if (Rem != 0 || ResultAmt != OppLHSAmt)
+ return SDValue();
+ } else {
+ // Op to extract from is a shift by a constant.
+ // Check:
+ // c2 - (bitwidth(op0 v c0) - c1) == c0
+ if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
+ ExtractFromAmt.getBitWidth()))
+ return SDValue();
+ }
+
+ // Return the expanded shift op that should allow a rotate to be formed.
+ EVT ShiftVT = OppShift.getOperand(1).getValueType();
+ EVT ResVT = ExtractFrom.getValueType();
+ SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
+ return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
+}
+
// Return true if we can prove that, whenever Neg and Pos are both in the
// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
@@ -5333,13 +5461,40 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// Match "(X shl/srl V1) & V2" where V2 may not be present.
SDValue LHSShift; // The shift.
SDValue LHSMask; // AND value if any.
- if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
- return nullptr; // Not part of a rotate.
+ matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
SDValue RHSShift; // The shift.
SDValue RHSMask; // AND value if any.
- if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
- return nullptr; // Not part of a rotate.
+ matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
+
+ // If neither side matched a rotate half, bail
+ if (!LHSShift && !RHSShift)
+ return nullptr;
+
+ // InstCombine may have combined a constant shl, srl, mul, or udiv with one
+ // side of the rotate, so try to handle that here. In all cases we need to
+ // pass the matched shift from the opposite side to compute the opcode and
+ // needed shift amount to extract. We still want to do this if both sides
+ // matched a rotate half because one half may be a potential overshift that
+ // can be broken down (ie if InstCombine merged two shl or srl ops into a
+ // single one).
+
+ // Have LHS side of the rotate, try to extract the needed shift from the RHS.
+ if (LHSShift)
+ if (SDValue NewRHSShift =
+ extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
+ RHSShift = NewRHSShift;
+ // Have RHS side of the rotate, try to extract the needed shift from the LHS.
+ if (RHSShift)
+ if (SDValue NewLHSShift =
+ extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
+ LHSShift = NewLHSShift;
+
+ // If a side is still missing, nothing else we can do.
+ if (!RHSShift || !LHSShift)
+ return nullptr;
+
+ // At this point we've matched or extracted a shift op on each side.
if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
return nullptr; // Not shifting the same value.
@@ -10270,7 +10425,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N10.getOperand(0))),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(1)),
- N0, Flags);
+ N0, Flags);
}
}
@@ -10333,7 +10488,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ N1), Flags), Flags);
}
// fold (fsub x, (fma y, z, (fmul u, v)))
@@ -10348,7 +10503,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N20),
- N21, N0, Flags), Flags);
+ N21, N0, Flags), Flags);
}
@@ -10368,7 +10523,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N020.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ N1), Flags), Flags);
}
}
}
@@ -10396,7 +10551,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N002.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ N1), Flags), Flags);
}
}
}
@@ -10419,7 +10574,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
VT, N1200)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N1201),
- N0, Flags), Flags);
+ N0, Flags), Flags);
}
}
@@ -10450,7 +10605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
VT, N1020)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N1021),
- N0, Flags), Flags);
+ N0, Flags), Flags);
}
}
}
@@ -10506,7 +10661,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
Y, Flags);
if (XC1 && XC1->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
}
return SDValue();
};
@@ -10530,7 +10685,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
if (XC0 && XC0->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
if (XC1 && XC1->isExactlyValue(+1.0))
@@ -10838,12 +10993,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (Options.UnsafeFPMath ||
+ if (Options.UnsafeFPMath ||
(Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
// fold (fmul A, 0) -> 0
if (N1CFP && N1CFP->isZero())
return N1;
- }
+ }
if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
// fmul (fmul X, C1), C2 -> fmul X, C1 * C2
@@ -11258,7 +11413,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
SDNodeFlags Flags = N->getFlags();
- if (!DAG.getTarget().Options.UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!Flags.hasApproximateFuncs())
return SDValue();
@@ -17913,9 +18068,9 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
if (C->isNullValue())
return SDValue();
- std::vector<SDNode *> Built;
+ SmallVector<SDNode *, 8> Built;
SDValue S =
- TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
+ TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
for (SDNode *N : Built)
AddToWorklist(N);
@@ -17933,8 +18088,8 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
if (C->isNullValue())
return SDValue();
- std::vector<SDNode *> Built;
- SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
+ SmallVector<SDNode *, 8> Built;
+ SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built);
for (SDNode *N : Built)
AddToWorklist(N);
@@ -17959,9 +18114,9 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
if (C->isNullValue())
return SDValue();
- std::vector<SDNode *> Built;
+ SmallVector<SDNode *, 8> Built;
SDValue S =
- TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
+ TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
for (SDNode *N : Built)
AddToWorklist(N);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e4a9d557d386..795ade588b8f 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1130,7 +1130,7 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys);
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL);
+ GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL);
bool CanLowerReturn = TLI.CanLowerReturn(
CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext());
@@ -1548,7 +1548,7 @@ void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
{
MachineInstr *CurLastLocalValue = getLastLocalValue();
if (CurLastLocalValue != SavedLastLocalValue) {
- // Find the first local value instruction to be deleted.
+ // Find the first local value instruction to be deleted.
// This is the instruction after SavedLastLocalValue if it is non-NULL.
// Otherwise it's the first instruction in the block.
MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue);
@@ -1569,7 +1569,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
// PHI node handling may have generated local value instructions,
// even though it failed to handle all PHI nodes.
- // We remove these instructions because SelectionDAGISel will generate
+ // We remove these instructions because SelectionDAGISel will generate
// them again.
removeDeadLocalValueCode(SavedLastLocalValue);
return false;
@@ -1630,7 +1630,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
DbgLoc = DebugLoc();
// Undo phi node updates, because they will be added again by SelectionDAG.
if (isa<TerminatorInst>(I)) {
- // PHI node handling may have generated local value instructions.
+ // PHI node handling may have generated local value instructions.
// We remove them because SelectionDAGISel will generate them again.
removeDeadLocalValueCode(SavedLastLocalValue);
FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 42c7181dac41..d3c31911d677 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -89,10 +89,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI,
+ CallingConv::ID CC = Fn->getCallingConv();
+
+ GetReturnInfo(CC, Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI,
mf.getDataLayout());
- CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF,
- Fn->isVarArg(), Outs, Fn->getContext());
+ CanLowerReturn =
+ TLI->CanLowerReturn(CC, *MF, Fn->isVarArg(), Outs, Fn->getContext());
// If this personality uses funclets, we need to do a bit more work.
DenseMap<const AllocaInst *, TinyPtrVector<int *>> CatchObjects;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b0ae1e0399fb..9aa0ea15f3b7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -153,7 +153,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
// of Endianness. LLVM's APFloat representation is not Endian sensitive,
// and so always converts into a 128-bit APInt in a non-Endian-sensitive
// way. However, APInt's are serialized in an Endian-sensitive fashion,
- // so on big-Endian targets, the two doubles are output in the wrong
+ // so on big-Endian targets, the two doubles are output in the wrong
// order. Fix this by manually flipping the order of the high 64 bits
// and the low 64 bits here.
if (DAG.getDataLayout().isBigEndian() &&
@@ -815,7 +815,7 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
switch (N->getOpcode()) {
case ISD::ConstantFP: // Leaf node.
- case ISD::CopyFromReg: // Operand is a register that we know to be left
+ case ISD::CopyFromReg: // Operand is a register that we know to be left
// unchanged by SoftenFloatResult().
case ISD::Register: // Leaf node.
return true;
@@ -838,7 +838,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) {
if (N->getNumOperands() == 3)
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0);
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2,
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2,
N->getOperand(3)),
0);
}
@@ -1898,7 +1898,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
- case ISD::FTRUNC: R = PromoteFloatRes_UnaryOp(N); break;
+ case ISD::FTRUNC:
+ case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break;
// Binary FP Operations
case ISD::FADD:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 2c6b1ee7900f..135922d6f267 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -510,7 +510,7 @@ private:
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
// Return true if we can skip softening the given operand or SDNode because
- // either it was soften before by SoftenFloatResult and references to the
+ // either it was soften before by SoftenFloatResult and references to the
// operand were replaced by ReplaceValueWith or it's value type is legal in HW
// registers and the operand can be left unchanged.
bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 67928d4bdbd5..3a98a7a904cb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -131,7 +131,7 @@ class VectorLegalizer {
SDValue ExpandCTLZ(SDValue Op);
SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
SDValue ExpandStrictFPOp(SDValue Op);
-
+
/// Implements vector promotion.
///
/// This is essentially just bitcasting the operands to a different type and
@@ -315,7 +315,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
// ISD::STRICT_FSQRT.
- Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
Node->getValueType(0));
break;
case ISD::ADD:
@@ -397,12 +397,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
case ISD::FP_ROUND_INREG:
- Action = TLI.getOperationAction(Node->getOpcode(),
+ Action = TLI.getOperationAction(Node->getOpcode(),
cast<VTSDNode>(Node->getOperand(1))->getVT());
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- Action = TLI.getOperationAction(Node->getOpcode(),
+ Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
case ISD::MSCATTER:
@@ -736,7 +736,7 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::CTTZ_ZERO_UNDEF:
return ExpandCTTZ_ZERO_UNDEF(Op);
case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
+ case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
case ISD::STRICT_FSQRT:
@@ -1153,24 +1153,24 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
SmallVector<SDValue, 32> OpChains;
for (unsigned i = 0; i < NumElems; ++i) {
SmallVector<SDValue, 4> Opers;
- SDValue Idx = DAG.getConstant(i, dl,
+ SDValue Idx = DAG.getConstant(i, dl,
TLI.getVectorIdxTy(DAG.getDataLayout()));
// The Chain is the first operand.
Opers.push_back(Chain);
- // Now process the remaining operands.
+ // Now process the remaining operands.
for (unsigned j = 1; j < NumOpers; ++j) {
SDValue Oper = Op.getOperand(j);
EVT OperVT = Oper.getValueType();
if (OperVT.isVector())
- Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
EltVT, Oper, Idx);
Opers.push_back(Oper);
}
-
+
SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
OpValues.push_back(ScalarOp.getValue(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 1cd43ace48f3..f5d9dd234afd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1068,14 +1068,14 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
OpsLo.push_back(Chain);
OpsHi.push_back(Chain);
- // Now process the remaining operands.
+ // Now process the remaining operands.
for (unsigned i = 1; i < NumOps; ++i) {
- SDValue Op = N->getOperand(i);
- SDValue OpLo = Op;
- SDValue OpHi = Op;
+ SDValue Op = N->getOperand(i);
+ SDValue OpLo = Op;
+ SDValue OpHi = Op;
EVT InVT = Op.getValueType();
- if (InVT.isVector()) {
+ if (InVT.isVector()) {
// If the input also splits, handle it directly for a
// compile time speedup. Otherwise split it by hand.
if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
@@ -1092,10 +1092,10 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
EVT HiValueVTs[] = {HiVT, MVT::Other};
Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo);
Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi);
-
+
// Build a factor node to remember that this Op is independent of the
// other one.
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Lo.getValue(1), Hi.getValue(1));
// Legalize the chain result - switch anything that used the old chain to
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 1aa8df29af3b..5f6b6010cae2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -157,31 +157,36 @@ static cl::opt<unsigned> SwitchPeelThreshold(
// store [4096 x i8] %data, [4096 x i8]* %buffer
static const unsigned MaxParallelChains = 64;
-// True if the Value passed requires ABI mangling as it is a parameter to a
-// function or a return value from a function which is not an intrinsic.
-static bool isABIRegCopy(const Value *V) {
- const bool IsRetInst = V && isa<ReturnInst>(V);
- const bool IsCallInst = V && isa<CallInst>(V);
- const bool IsInLineAsm =
- IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm();
- const bool IsIndirectFunctionCall =
- IsCallInst && !IsInLineAsm &&
- !static_cast<const CallInst *>(V)->getCalledFunction();
- // It is possible that the call instruction is an inline asm statement or an
- // indirect function call in which case the return value of
- // getCalledFunction() would be nullptr.
- const bool IsInstrinsicCall =
- IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall &&
- static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() !=
- Intrinsic::not_intrinsic;
-
- return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall));
+// Return the calling convention if the Value passed requires ABI mangling as it
+// is a parameter to a function or a return value from a function which is not
+// an intrinsic.
+static Optional<CallingConv::ID> getABIRegCopyCC(const Value *V) {
+ if (auto *R = dyn_cast<ReturnInst>(V))
+ return R->getParent()->getParent()->getCallingConv();
+
+ if (auto *CI = dyn_cast<CallInst>(V)) {
+ const bool IsInlineAsm = CI->isInlineAsm();
+ const bool IsIndirectFunctionCall =
+ !IsInlineAsm && !CI->getCalledFunction();
+
+ // It is possible that the call instruction is an inline asm statement or an
+ // indirect function call in which case the return value of
+ // getCalledFunction() would be nullptr.
+ const bool IsInstrinsicCall =
+ !IsInlineAsm && !IsIndirectFunctionCall &&
+ CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic;
+
+ if (!IsInlineAsm && !IsInstrinsicCall)
+ return CI->getCallingConv();
+ }
+
+ return None;
}
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
- bool IsABIRegCopy);
+ Optional<CallingConv::ID> CC);
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
@@ -191,11 +196,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
- Optional<ISD::NodeType> AssertOp = None,
- bool IsABIRegCopy = false) {
+ Optional<CallingConv::ID> CC = None,
+ Optional<ISD::NodeType> AssertOp = None) {
if (ValueVT.isVector())
- return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
- PartVT, ValueVT, V, IsABIRegCopy);
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
+ CC);
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -236,8 +241,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
// Assemble the trailing non-power-of-2 part.
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
- Hi = getCopyFromParts(DAG, DL,
- Parts + RoundParts, OddParts, PartVT, OddVT, V);
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
+ OddVT, V, CC);
// Combine the round and odd parts.
Lo = Val;
@@ -267,7 +272,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
- Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
}
}
@@ -340,9 +345,11 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
- bool IsABIRegCopy) {
+ Optional<CallingConv::ID> CallConv) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
+ const bool IsABIRegCopy = CallConv.hasValue();
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
@@ -355,8 +362,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
- *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
- RegisterVT);
+ *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
} else {
NumRegs =
TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
@@ -470,7 +477,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
- MVT PartVT, const Value *V, bool IsABIRegCopy);
+ MVT PartVT, const Value *V,
+ Optional<CallingConv::ID> CallConv);
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
@@ -478,14 +486,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
SDValue *Parts, unsigned NumParts, MVT PartVT,
const Value *V,
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
- bool IsABIRegCopy = false) {
+ Optional<CallingConv::ID> CallConv = None,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
EVT ValueVT = Val.getValueType();
// Handle the vector case separately.
if (ValueVT.isVector())
return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
- IsABIRegCopy);
+ CallConv);
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
@@ -564,7 +572,8 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
DAG.getIntPtrConstant(RoundBits, DL));
- getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
+ CallConv);
if (DAG.getDataLayout().isBigEndian())
// The odd parts were reversed by getCopyToParts - unreverse them.
@@ -605,16 +614,16 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
std::reverse(Parts, Parts + OrigNumParts);
}
-
/// getCopyToPartsVector - Create a series of nodes that contain the specified
/// value split into legal parts.
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V,
- bool IsABIRegCopy) {
+ Optional<CallingConv::ID> CallConv) {
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const bool IsABIRegCopy = CallConv.hasValue();
if (NumParts == 1) {
EVT PartEVT = PartVT;
@@ -679,8 +688,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
unsigned NumRegs;
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
- *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
- RegisterVT);
+ *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
} else {
NumRegs =
TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
@@ -720,7 +729,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// If the register was not expanded, promote or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
- getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
@@ -729,29 +738,32 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
- getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V,
+ CallConv);
}
}
RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
- EVT valuevt, bool IsABIMangledValue)
+ EVT valuevt, Optional<CallingConv::ID> CC)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
- RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
+ RegCount(1, regs.size()), CallConv(CC) {}
RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty,
- bool IsABIMangledValue) {
+ Optional<CallingConv::ID> CC) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
- IsABIMangled = IsABIMangledValue;
+ CallConv = CC;
for (EVT ValueVT : ValueVTs) {
- unsigned NumRegs = IsABIMangledValue
- ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
- : TLI.getNumRegisters(Context, ValueVT);
- MVT RegisterVT = IsABIMangledValue
- ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
- : TLI.getRegisterType(Context, ValueVT);
+ unsigned NumRegs =
+ isABIMangled()
+ ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT)
+ : TLI.getNumRegisters(Context, ValueVT);
+ MVT RegisterVT =
+ isABIMangled()
+ ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT)
+ : TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(Reg + i);
RegVTs.push_back(RegisterVT);
@@ -777,9 +789,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// Copy the legal parts from the registers.
EVT ValueVT = ValueVTs[Value];
unsigned NumRegs = RegCount[Value];
- MVT RegisterVT = IsABIMangled
- ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value])
- : RegVTs[Value];
+ MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(),
+ CallConv.getValue(), RegVTs[Value])
+ : RegVTs[Value];
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
@@ -837,8 +850,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
RegisterVT, P, DAG.getValueType(FromVT));
}
- Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
- NumRegs, RegisterVT, ValueVT, V);
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
+ RegisterVT, ValueVT, V, CallConv);
Part += NumRegs;
Parts.clear();
}
@@ -859,15 +872,16 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
unsigned NumParts = RegCount[Value];
- MVT RegisterVT = IsABIMangled
- ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value])
- : RegVTs[Value];
+ MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(),
+ CallConv.getValue(), RegVTs[Value])
+ : RegVTs[Value];
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
- getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
- &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
+ NumParts, RegisterVT, V, CallConv, ExtendKind);
Part += NumParts;
}
@@ -1164,7 +1178,7 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V));
+ DAG.getDataLayout(), InReg, Ty, getABIRegCopyCC(V));
SDValue Chain = DAG.getEntryNode();
Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
V);
@@ -1355,7 +1369,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
- Inst->getType(), isABIRegCopy(V));
+ Inst->getType(), getABIRegCopyCC(V));
SDValue Chain = DAG.getEntryNode();
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
@@ -1589,12 +1603,14 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
- unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
- MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
+ CallingConv::ID CC = F->getCallingConv();
+
+ unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
+ MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
- &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
+ &Parts[0], NumParts, PartVT, &I, CC, ExtendKind);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -4929,7 +4945,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (VMI != FuncInfo.ValueMap.end()) {
const auto &TLI = DAG.getTargetLoweringInfo();
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
- V->getType(), isABIRegCopy(V));
+ V->getType(), getABIRegCopyCC(V));
if (RFV.occupiesMultipleRegs()) {
unsigned Offset = 0;
for (auto RegAndSize : RFV.getRegsAndSizes()) {
@@ -4971,7 +4987,7 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
unsigned DbgSDNodeOrder) {
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
- // stack slot locations.
+ // stack slot locations.
//
// Consider "int x = 0; int *px = &x;". There are two kinds of interesting
// debug values here after optimization:
@@ -5288,7 +5304,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// The PHI node may be split up into several MI PHI nodes (in
// FunctionLoweringInfo::set).
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
- V->getType(), false);
+ V->getType(), None);
if (RFV.occupiesMultipleRegs()) {
unsigned Offset = 0;
unsigned BitsToDescribe = 0;
@@ -7182,10 +7198,11 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
/// uses features that we can't model on machineinstrs, we have SDISel do the
/// allocation. This produces generally horrible, but correct, code.
///
-/// OpInfo describes the operand.
+/// OpInfo describes the operand
+/// RefOpInfo describes the matching operand if any, the operand otherwise
static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
- const SDLoc &DL,
- SDISelAsmOperandInfo &OpInfo) {
+ const SDLoc &DL, SDISelAsmOperandInfo &OpInfo,
+ SDISelAsmOperandInfo &RefOpInfo) {
LLVMContext &Context = *DAG.getContext();
MachineFunction &MF = DAG.getMachineFunction();
@@ -7195,8 +7212,8 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
std::pair<unsigned, const TargetRegisterClass *> PhysReg =
- TLI.getRegForInlineAsmConstraint(&TRI, OpInfo.ConstraintCode,
- OpInfo.ConstraintVT);
+ TLI.getRegForInlineAsmConstraint(&TRI, RefOpInfo.ConstraintCode,
+ RefOpInfo.ConstraintVT);
unsigned NumRegs = 1;
if (OpInfo.ConstraintVT != MVT::Other) {
@@ -7238,6 +7255,11 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
}
+ // No need to allocate a matching input constraint since the constraint it's
+ // matching to has already been allocated.
+ if (OpInfo.isMatchingInputConstraint())
+ return;
+
MVT RegVT;
EVT ValueVT = OpInfo.ConstraintVT;
@@ -7486,19 +7508,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this constraint is for a specific register, allocate it before
// anything else.
- if (OpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
+ SDISelAsmOperandInfo &RefOpInfo =
+ OpInfo.isMatchingInputConstraint()
+ ? ConstraintOperands[OpInfo.getMatchedOperand()]
+ : ConstraintOperands[i];
+ if (RefOpInfo.ConstraintType == TargetLowering::C_Register)
+ GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);
}
// Third pass - Loop over all of the operands, assigning virtual or physregs
// to register class operands.
for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+ SDISelAsmOperandInfo &RefOpInfo =
+ OpInfo.isMatchingInputConstraint()
+ ? ConstraintOperands[OpInfo.getMatchedOperand()]
+ : ConstraintOperands[i];
// C_Register operands have already been allocated, Other/Memory don't need
// to be.
- if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
+ if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+ GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -8289,7 +8319,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
+ GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
bool CanLowerReturn =
this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
@@ -8305,7 +8335,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
MachineFunction &MF = CLI.DAG.getMachineFunction();
DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
- Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
+ Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
+ DL.getAllocaAddrSpace());
DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
ArgListEntry Entry;
@@ -8331,10 +8362,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
} else {
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
- MVT RegisterVT =
- getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
- unsigned NumRegs =
- getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
+ MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
MyFlags.VT = RegisterVT;
@@ -8443,9 +8474,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setInConsecutiveRegs();
Flags.setOrigAlign(OriginalAlignment);
- MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
- unsigned NumParts =
- getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
+ MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
SmallVector<SDValue, 4> Parts(NumParts);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
@@ -8477,7 +8509,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
- CLI.CS.getInstruction(), ExtendKind, true);
+ CLI.CS.getInstruction(), CLI.CallConv, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
@@ -8577,14 +8609,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned CurReg = 0;
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
- MVT RegisterVT =
- getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
- unsigned NumRegs =
- getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
+ MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
NumRegs, RegisterVT, VT, nullptr,
- AssertOp, true));
+ CLI.CallConv, AssertOp));
CurReg += NumRegs;
}
@@ -8623,8 +8655,8 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
// If this is an InlineAsm we have to match the registers required, not the
// notional registers required by the type.
- RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
- V->getType(), isABIRegCopy(V));
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
+ getABIRegCopyCC(V));
SDValue Chain = DAG.getEntryNode();
ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@@ -8937,10 +8969,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (ArgCopyElisionCandidates.count(&Arg))
Flags.setCopyElisionCandidate();
- MVT RegisterVT =
- TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
- unsigned NumRegs =
- TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
+ MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
+ *CurDAG->getContext(), F.getCallingConv(), VT);
+ unsigned NumRegs = TLI->getNumRegistersForCallingConv(
+ *CurDAG->getContext(), F.getCallingConv(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
ArgNo, PartBase+i*RegisterVT.getStoreSize());
@@ -8995,8 +9027,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
Optional<ISD::NodeType> AssertOp = None;
- SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
- RegVT, VT, nullptr, AssertOp);
+ SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
+ nullptr, F.getCallingConv(), AssertOp);
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
@@ -9046,10 +9078,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
for (unsigned Val = 0; Val != NumValues; ++Val) {
EVT VT = ValueVTs[Val];
- MVT PartVT =
- TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
- unsigned NumParts =
- TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
+ MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(),
+ F.getCallingConv(), VT);
+ unsigned NumParts = TLI->getNumRegistersForCallingConv(
+ *CurDAG->getContext(), F.getCallingConv(), VT);
// Even an apparant 'unused' swifterror argument needs to be returned. So
// we do generate a copy for it that can be used on return from the
@@ -9062,8 +9094,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
AssertOp = ISD::AssertZext;
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
- PartVT, VT, nullptr, AssertOp,
- true));
+ PartVT, VT, nullptr,
+ F.getCallingConv(), AssertOp));
}
i += NumParts;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index e421984b8af2..4b5dda982f1b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -1015,14 +1015,18 @@ struct RegsForValue {
/// Records if this value needs to be treated in an ABI dependant manner,
/// different to normal type legalization.
- bool IsABIMangled = false;
+ Optional<CallingConv::ID> CallConv;
RegsForValue() = default;
RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
- bool IsABIMangledValue = false);
+ Optional<CallingConv::ID> CC = None);
RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty,
- bool IsABIMangledValue = false);
+ Optional<CallingConv::ID> CC);
+
+ bool isABIMangled() const {
+ return CallConv.hasValue();
+ }
/// Add the specified values to this one.
void append(const RegsForValue &RHS) {
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 5cf06e62b80c..54cbd6859f70 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -419,10 +419,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
Builder.getFrameIndexTy()));
} else if (LiveInOnly) {
// If this value is live in (not live-on-return, or live-through), we can
- // treat it the same way patchpoint treats it's "live in" values. We'll
- // end up folding some of these into stack references, but they'll be
+ // treat it the same way patchpoint treats it's "live in" values. We'll
+ // end up folding some of these into stack references, but they'll be
// handled by the register allocator. Note that we do not have the notion
- // of a late use so these values might be placed in registers which are
+ // of a late use so these values might be placed in registers which are
// clobbered by the call. This is fine for live-in.
Ops.push_back(Incoming);
} else {
@@ -498,7 +498,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
auto isGCValue =[&](const Value *V) {
return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V);
};
-
+
// Before we actually start lowering (and allocating spill slots for values),
// reserve any stack slots which we judge to be profitable to reuse for a
// particular value. This is purely an optimization over the code below and
@@ -861,7 +861,8 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
// completely and make statepoint call to return a tuple.
unsigned Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), Reg, RetTy, true);
+ DAG.getDataLayout(), Reg, RetTy,
+ ISP.getCallSite().getCallingConv());
SDValue Chain = DAG.getEntryNode();
RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index fa867fcec366..e317268fa5f4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3421,7 +3421,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
/// with the multiplicative inverse of the constant.
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
const SDLoc &dl, SelectionDAG &DAG,
- std::vector<SDNode *> &Created) {
+ SmallVectorImpl<SDNode *> &Created) {
assert(d != 0 && "Division by zero!");
// Shift the value upfront if it is even, so the LSB is one.
@@ -3450,8 +3450,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
}
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
- SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const {
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isIntDivCheap(N->getValueType(0), Attr))
@@ -3465,9 +3465,7 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG, bool IsAfterLegalization,
- std::vector<SDNode *> *Created) const {
- assert(Created && "No vector to hold sdiv ops.");
-
+ SmallVectorImpl<SDNode *> &Created) const {
EVT VT = N->getValueType(0);
SDLoc dl(N);
@@ -3478,7 +3476,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
// If the sdiv has an 'exact' bit we can use a simpler lowering.
if (N->getFlags().hasExact())
- return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created);
+ return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, Created);
APInt::ms magics = Divisor.magic();
@@ -3496,15 +3494,18 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
else
return SDValue(); // No mulhs or equvialent
+
+ Created.push_back(Q.getNode());
+
// If d > 0 and m < 0, add the numerator
if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
- Created->push_back(Q.getNode());
+ Created.push_back(Q.getNode());
}
// If d < 0 and m > 0, subtract the numerator.
if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
- Created->push_back(Q.getNode());
+ Created.push_back(Q.getNode());
}
auto &DL = DAG.getDataLayout();
// Shift right algebraic if shift value is nonzero
@@ -3512,14 +3513,14 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
Q = DAG.getNode(
ISD::SRA, dl, VT, Q,
DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
- Created->push_back(Q.getNode());
+ Created.push_back(Q.getNode());
}
// Extract the sign bit and add it to the quotient
SDValue T =
DAG.getNode(ISD::SRL, dl, VT, Q,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
getShiftAmountTy(Q.getValueType(), DL)));
- Created->push_back(T.getNode());
+ Created.push_back(T.getNode());
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
}
@@ -3529,9 +3530,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG, bool IsAfterLegalization,
- std::vector<SDNode *> *Created) const {
- assert(Created && "No vector to hold udiv ops.");
-
+ SmallVectorImpl<SDNode *> &Created) const {
EVT VT = N->getValueType(0);
SDLoc dl(N);
auto &DL = DAG.getDataLayout();
@@ -3554,7 +3553,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
Q = DAG.getNode(
ISD::SRL, dl, VT, Q,
DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL)));
- Created->push_back(Q.getNode());
+ Created.push_back(Q.getNode());
// Get magic number for the shifted divisor.
magics = Divisor.lshr(Shift).magicu(Shift);
@@ -3573,7 +3572,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
else
return SDValue(); // No mulhu or equivalent
- Created->push_back(Q.getNode());
+ Created.push_back(Q.getNode());
if (magics.a == 0) {
assert(magics.s < Divisor.getBitWidth() &&
@@ -3583,13 +3582,13 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
} else {
SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
- Created->push_back(NPQ.getNode());
+ Created.push_back(NPQ.getNode());
NPQ = DAG.getNode(
ISD::SRL, dl, VT, NPQ,
DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL)));
- Created->push_back(NPQ.getNode());
+ Created.push_back(NPQ.getNode());
NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
- Created->push_back(NPQ.getNode());
+ Created.push_back(NPQ.getNode());
return DAG.getNode(
ISD::SRL, dl, VT, NPQ,
DAG.getConstant(magics.s - 1, dl,
@@ -3994,7 +3993,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
// Scalarize the load and let the individual components be handled.
SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
- return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
+ return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
}
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index 25d405bf63de..3e12b32b12d4 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -175,7 +175,7 @@ bool ShadowStackGCLowering::doInitialization(Module &M) {
}
if (!Active)
return false;
-
+
// struct FrameMap {
// int32_t NumRoots; // Number of roots in stack frame.
// int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
@@ -286,7 +286,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
if (!F.hasGC() ||
F.getGC() != std::string("shadow-stack"))
return false;
-
+
LLVMContext &Context = F.getContext();
// Find calls to llvm.gcroot.
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index ed664e4f81a3..8fbe724045e6 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -233,7 +233,7 @@ public:
/// - Create a SplitEditor from a SplitAnalysis.
/// - Start a new live interval with openIntv.
/// - Mark the places where the new interval is entered using enterIntv*
-/// - Mark the ranges where the new interval is used with useIntv*
+/// - Mark the ranges where the new interval is used with useIntv*
/// - Mark the places where the interval is exited with exitIntv*.
/// - Finish the current interval with closeIntv and repeat from 2.
/// - Rewrite instructions with finish().
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 43f4bad595e3..7b1b76821daa 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -632,7 +632,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::BITREVERSE, VT, Expand);
-
+
// These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
@@ -924,7 +924,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
// STATEPOINT Deopt Spill - live-through, read only, indirect
// STATEPOINT Deopt Alloca - live-through, read only, direct
// (We're currently conservative and mark the deopt slots read/write in
- // practice.)
+ // practice.)
// STATEPOINT GC Spill - live-through, read/write, indirect
// STATEPOINT GC Alloca - live-through, read/write, direct
// The live-in vs live-through is handled already (the live through ones are
@@ -1337,7 +1337,8 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr,
+void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
+ AttributeList attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
const TargetLowering &TLI, const DataLayout &DL) {
SmallVector<EVT, 4> ValueVTs;
@@ -1365,9 +1366,9 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr,
}
unsigned NumParts =
- TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT);
+ TLI.getNumRegistersForCallingConv(ReturnType->getContext(), CC, VT);
MVT PartVT =
- TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT);
+ TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), CC, VT);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -1410,7 +1411,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
*Fast = true;
return true;
}
-
+
// This is a misaligned access.
return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index b5dd2d4cca89..f6b91a2f0231 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -422,32 +422,34 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
return ".data.rel.ro";
}
+static unsigned getEntrySizeForKind(SectionKind Kind) {
+ if (Kind.isMergeable1ByteCString())
+ return 1;
+ else if (Kind.isMergeable2ByteCString())
+ return 2;
+ else if (Kind.isMergeable4ByteCString())
+ return 4;
+ else if (Kind.isMergeableConst4())
+ return 4;
+ else if (Kind.isMergeableConst8())
+ return 8;
+ else if (Kind.isMergeableConst16())
+ return 16;
+ else if (Kind.isMergeableConst32())
+ return 32;
+ else {
+ // We shouldn't have mergeable C strings or mergeable constants that we
+ // didn't handle above.
+ assert(!Kind.isMergeableCString() && "unknown string width");
+ assert(!Kind.isMergeableConst() && "unknown data width");
+ return 0;
+ }
+}
+
static MCSectionELF *selectELFSectionForGlobal(
MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags,
unsigned *NextUniqueID, const MCSymbolELF *AssociatedSymbol) {
- unsigned EntrySize = 0;
- if (Kind.isMergeableCString()) {
- if (Kind.isMergeable2ByteCString()) {
- EntrySize = 2;
- } else if (Kind.isMergeable4ByteCString()) {
- EntrySize = 4;
- } else {
- EntrySize = 1;
- assert(Kind.isMergeable1ByteCString() && "unknown string width");
- }
- } else if (Kind.isMergeableConst()) {
- if (Kind.isMergeableConst4()) {
- EntrySize = 4;
- } else if (Kind.isMergeableConst8()) {
- EntrySize = 8;
- } else if (Kind.isMergeableConst16()) {
- EntrySize = 16;
- } else {
- assert(Kind.isMergeableConst32() && "unknown data width");
- EntrySize = 32;
- }
- }
StringRef Group = "";
if (const Comdat *C = getELFComdat(GO)) {
@@ -455,7 +457,9 @@ static MCSectionELF *selectELFSectionForGlobal(
Group = C->getName();
}
- bool UniqueSectionNames = TM.getUniqueSectionNames();
+ // Get the section entry size based on the kind.
+ unsigned EntrySize = getEntrySizeForKind(Kind);
+
SmallString<128> Name;
if (Kind.isMergeableCString()) {
// We also need alignment here.
@@ -479,16 +483,17 @@ static MCSectionELF *selectELFSectionForGlobal(
Name += *OptionalPrefix;
}
- if (EmitUniqueSection && UniqueSectionNames) {
- Name.push_back('.');
- TM.getNameWithPrefix(Name, GO, Mang, true);
- }
unsigned UniqueID = MCContext::GenericSectionID;
- if (EmitUniqueSection && !UniqueSectionNames) {
- UniqueID = *NextUniqueID;
- (*NextUniqueID)++;
+ if (EmitUniqueSection) {
+ if (TM.getUniqueSectionNames()) {
+ Name.push_back('.');
+ TM.getNameWithPrefix(Name, GO, Mang, true /*MayAlwaysUsePrivate*/);
+ } else {
+ UniqueID = *NextUniqueID;
+ (*NextUniqueID)++;
+ }
}
- // Use 0 as the unique ID for execute-only text
+ // Use 0 as the unique ID for execute-only text.
if (Kind.isExecuteOnly())
UniqueID = 0;
return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 3fca2f4ee4fe..2db03288f2ac 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -166,7 +166,7 @@ static cl::opt<CFLAAType> UseCFLAA(
"Enable unification-based CFL-AA"),
clEnumValN(CFLAAType::Andersen, "anders",
"Enable inclusion-based CFL-AA"),
- clEnumValN(CFLAAType::Both, "both",
+ clEnumValN(CFLAAType::Both, "both",
"Enable both variants of CFL-AA")));
/// Option names for limiting the codegen pipeline.
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index e629c13f133f..65d0a7a774fe 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -54,7 +54,7 @@ static cl::opt<bool> DemoteCatchSwitchPHIOnlyOpt(
cl::desc("Demote catchswitch BBs only (for wasm EH)"), cl::init(false));
namespace {
-
+
class WinEHPrepare : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
diff --git a/lib/DebugInfo/CodeView/RecordName.cpp b/lib/DebugInfo/CodeView/RecordName.cpp
index e50c43a1d481..d868ae237a44 100644
--- a/lib/DebugInfo/CodeView/RecordName.cpp
+++ b/lib/DebugInfo/CodeView/RecordName.cpp
@@ -307,6 +307,9 @@ static int getSymbolNameOffset(CVSymbol Sym) {
// See BPRelativeSym
case SymbolKind::S_BPREL32:
return 8;
+ // See UsingNamespaceSym
+ case SymbolKind::S_UNAMESPACE:
+ return 0;
default:
return -1;
}
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index af249adc9774..f8bf961f22a1 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -611,6 +611,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) {
return Error::success();
}
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+ UsingNamespaceSym &UN) {
+ W.printString("Namespace", UN.Name);
+ return Error::success();
+}
+
Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) {
W.printNumber("Length", CVR.length());
return Error::success();
diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
index 923837a45d9f..e77c8e8f02f5 100644
--- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -463,3 +463,11 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) {
return Error::success();
}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+ UsingNamespaceSym &UN) {
+
+ error(IO.mapStringZ(UN.Name));
+
+ return Error::success();
+}
diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
index 95082d4a8e03..839ab6f0a705 100644
--- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
+++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
@@ -428,7 +428,7 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
case SymbolKind::S_DEFRANGE_SUBFIELD:
break;
- // No type refernces.
+ // No type references.
case SymbolKind::S_LABEL32:
case SymbolKind::S_OBJNAME:
case SymbolKind::S_COMPILE:
@@ -439,6 +439,7 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
case SymbolKind::S_FRAMEPROC:
case SymbolKind::S_THUNK32:
case SymbolKind::S_FRAMECOOKIE:
+ case SymbolKind::S_UNAMESPACE:
break;
// Scope ending symbols.
case SymbolKind::S_END:
diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index e4f39dd988e1..2e29c9d7dfa0 100644
--- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -226,7 +226,10 @@ bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx,
if (IsSecondPass && MapPos >= Map.size()) {
// FIXME: Print a more useful error. We can give the current record and the
// index that we think its pointing to.
- LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
+ if (LastError)
+ LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
+ else
+ LastError = errorCorruptRecord();
}
++NumBadIndices;
diff --git a/lib/DebugInfo/DWARF/CMakeLists.txt b/lib/DebugInfo/DWARF/CMakeLists.txt
index d88a02721700..b4770e561f71 100644
--- a/lib/DebugInfo/DWARF/CMakeLists.txt
+++ b/lib/DebugInfo/DWARF/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMDebugInfoDWARF
DWARFContext.cpp
DWARFDataExtractor.cpp
DWARFDebugAbbrev.cpp
+ DWARFDebugAddr.cpp
DWARFDebugArangeSet.cpp
DWARFDebugAranges.cpp
DWARFDebugFrame.cpp
diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index adada672af00..f49ab40fad9a 100644
--- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -38,7 +38,7 @@ DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() {
}
bool
-DWARFAbbreviationDeclaration::extract(DataExtractor Data,
+DWARFAbbreviationDeclaration::extract(DataExtractor Data,
uint32_t* OffsetPtr) {
clear();
const uint32_t Offset = *OffsetPtr;
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index da13c5047f77..9d2554ff9e2e 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -17,6 +17,7 @@
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
@@ -249,6 +250,36 @@ static void dumpStringOffsetsSection(
}
}
+// Dump the .debug_addr section.
+static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData,
+ DIDumpOptions DumpOpts, uint16_t Version,
+ uint8_t AddrSize) {
+ // TODO: Make this more general: add callback types to Error.h, create
+ // implementation and make all DWARF classes use them.
+ static auto WarnCallback = [](Error Warn) {
+ handleAllErrors(std::move(Warn), [](ErrorInfoBase &Info) {
+ WithColor::warning() << Info.message() << '\n';
+ });
+ };
+ uint32_t Offset = 0;
+ while (AddrData.isValidOffset(Offset)) {
+ DWARFDebugAddrTable AddrTable;
+ uint32_t TableOffset = Offset;
+ if (Error Err = AddrTable.extract(AddrData, &Offset, Version,
+ AddrSize, WarnCallback)) {
+ WithColor::error() << toString(std::move(Err)) << '\n';
+ // Keep going after an error, if we can, assuming that the length field
+ // could be read. If it couldn't, stop reading the section.
+ if (!AddrTable.hasValidLength())
+ break;
+ uint64_t Length = AddrTable.getLength();
+ Offset = TableOffset + Length;
+ } else {
+ AddrTable.dump(OS, DumpOpts);
+ }
+ }
+}
+
// Dump the .debug_rnglists or .debug_rnglists.dwo section (DWARF v5).
static void dumpRnglistsSection(raw_ostream &OS,
DWARFDataExtractor &rnglistData,
@@ -455,18 +486,16 @@ void DWARFContext::dump(
}
}
+ if (shouldDump(Explicit, ".debug_addr", DIDT_ID_DebugAddr,
+ DObj->getAddrSection().Data)) {
+ DWARFDataExtractor AddrData(*DObj, DObj->getAddrSection(),
+ isLittleEndian(), 0);
+ dumpAddrSection(OS, AddrData, DumpOpts, getMaxVersion(), getCUAddrSize());
+ }
+
if (shouldDump(Explicit, ".debug_ranges", DIDT_ID_DebugRanges,
DObj->getRangeSection().Data)) {
- // In fact, different compile units may have different address byte
- // sizes, but for simplicity we just use the address byte size of the
- // last compile unit (there is no easy and fast way to associate address
- // range list and the compile unit it describes).
- // FIXME: savedAddressByteSize seems sketchy.
- uint8_t savedAddressByteSize = 0;
- for (const auto &CU : compile_units()) {
- savedAddressByteSize = CU->getAddressByteSize();
- break;
- }
+ uint8_t savedAddressByteSize = getCUAddrSize();
DWARFDataExtractor rangesData(*DObj, DObj->getRangeSection(),
isLittleEndian(), savedAddressByteSize);
uint32_t offset = 0;
@@ -474,7 +503,7 @@ void DWARFContext::dump(
while (rangesData.isValidOffset(offset)) {
if (Error E = rangeList.extract(rangesData, &offset)) {
WithColor::error() << toString(std::move(E)) << '\n';
- break;
+ break;
}
rangeList.dump(OS);
}
@@ -1584,3 +1613,17 @@ Error DWARFContext::loadRegisterInfo(const object::ObjectFile &Obj) {
RegInfo.reset(TheTarget->createMCRegInfo(TT.str()));
return Error::success();
}
+
+uint8_t DWARFContext::getCUAddrSize() {
+ // In theory, different compile units may have different address byte
+ // sizes, but for simplicity we just use the address byte size of the
+ // last compile unit. In practice the address size field is repeated across
+ // various DWARF headers (at least in version 5) to make it easier to dump
+ // them independently, not to enable varying the address size.
+ uint8_t Addr = 0;
+ for (const auto &CU : compile_units()) {
+ Addr = CU->getAddressByteSize();
+ break;
+ }
+ return Addr;
+}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
new file mode 100644
index 000000000000..7085ca067ba6
--- /dev/null
+++ b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
@@ -0,0 +1,198 @@
+//===- DWARFDebugAddr.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+
+using namespace llvm;
+
+void DWARFDebugAddrTable::clear() {
+ HeaderData = {};
+ Addrs.clear();
+ invalidateLength();
+}
+
+Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data,
+ uint32_t *OffsetPtr,
+ uint16_t Version,
+ uint8_t AddrSize,
+ std::function<void(Error)> WarnCallback) {
+ clear();
+ HeaderOffset = *OffsetPtr;
+ // Read and verify the length field.
+ if (!Data.isValidOffsetForDataOfSize(*OffsetPtr, sizeof(uint32_t)))
+ return createStringError(errc::invalid_argument,
+ "section is not large enough to contain a "
+ ".debug_addr table length at offset 0x%"
+ PRIx32, *OffsetPtr);
+ uint16_t UnitVersion;
+ if (Version == 0) {
+ WarnCallback(createStringError(errc::invalid_argument,
+ "DWARF version is not defined in CU,"
+ " assuming version 5"));
+ UnitVersion = 5;
+ } else {
+ UnitVersion = Version;
+ }
+ // TODO: Add support for DWARF64.
+ Format = dwarf::DwarfFormat::DWARF32;
+ if (UnitVersion >= 5) {
+ HeaderData.Length = Data.getU32(OffsetPtr);
+ if (HeaderData.Length == 0xffffffffu) {
+ invalidateLength();
+ return createStringError(errc::not_supported,
+ "DWARF64 is not supported in .debug_addr at offset 0x%" PRIx32,
+ HeaderOffset);
+ }
+ if (HeaderData.Length + sizeof(uint32_t) < sizeof(Header)) {
+ uint32_t TmpLength = getLength();
+ invalidateLength();
+ return createStringError(errc::invalid_argument,
+ ".debug_addr table at offset 0x%" PRIx32
+ " has too small length (0x%" PRIx32
+ ") to contain a complete header",
+ HeaderOffset, TmpLength);
+ }
+ uint32_t End = HeaderOffset + getLength();
+ if (!Data.isValidOffsetForDataOfSize(HeaderOffset, End - HeaderOffset)) {
+ uint32_t TmpLength = getLength();
+ invalidateLength();
+ return createStringError(errc::invalid_argument,
+ "section is not large enough to contain a .debug_addr table "
+ "of length 0x%" PRIx32 " at offset 0x%" PRIx32,
+ TmpLength, HeaderOffset);
+ }
+
+ HeaderData.Version = Data.getU16(OffsetPtr);
+ HeaderData.AddrSize = Data.getU8(OffsetPtr);
+ HeaderData.SegSize = Data.getU8(OffsetPtr);
+ DataSize = getDataSize();
+ } else {
+ HeaderData.Version = UnitVersion;
+ HeaderData.AddrSize = AddrSize;
+ // TODO: Support for non-zero SegSize.
+ HeaderData.SegSize = 0;
+ DataSize = Data.size();
+ }
+
+ // Perform basic validation of the remaining header fields.
+
+ // We support DWARF version 5 for now as well as pre-DWARF5
+ // implementations of .debug_addr table, which doesn't contain a header
+ // and consists only of a series of addresses.
+ if (HeaderData.Version > 5) {
+ return createStringError(errc::not_supported, "version %" PRIu16
+ " of .debug_addr section at offset 0x%" PRIx32 " is not supported",
+ HeaderData.Version, HeaderOffset);
+ }
+ // FIXME: For now we just treat version mismatch as an error,
+ // however the correct way to associate a .debug_addr table
+ // with a .debug_info table is to look at the DW_AT_addr_base
+ // attribute in the info table.
+ if (HeaderData.Version != UnitVersion)
+ return createStringError(errc::invalid_argument,
+ ".debug_addr table at offset 0x%" PRIx32
+ " has version %" PRIu16
+ " which is different from the version suggested"
+ " by the DWARF unit header: %" PRIu16,
+ HeaderOffset, HeaderData.Version, UnitVersion);
+ if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)
+ return createStringError(errc::not_supported,
+ ".debug_addr table at offset 0x%" PRIx32
+ " has unsupported address size %" PRIu8,
+ HeaderOffset, HeaderData.AddrSize);
+ if (HeaderData.AddrSize != AddrSize && AddrSize != 0)
+ return createStringError(errc::invalid_argument,
+ ".debug_addr table at offset 0x%" PRIx32
+ " has address size %" PRIu8
+ " which is different from CU address size %" PRIu8,
+ HeaderOffset, HeaderData.AddrSize, AddrSize);
+
+ // TODO: add support for non-zero segment selector size.
+ if (HeaderData.SegSize != 0)
+ return createStringError(errc::not_supported,
+ ".debug_addr table at offset 0x%" PRIx32
+ " has unsupported segment selector size %" PRIu8,
+ HeaderOffset, HeaderData.SegSize);
+ if (DataSize % HeaderData.AddrSize != 0) {
+ invalidateLength();
+ return createStringError(errc::invalid_argument,
+ ".debug_addr table at offset 0x%" PRIx32
+ " contains data of size %" PRIu32
+ " which is not a multiple of addr size %" PRIu8,
+ HeaderOffset, DataSize, HeaderData.AddrSize);
+ }
+ Data.setAddressSize(HeaderData.AddrSize);
+ uint32_t AddrCount = DataSize / HeaderData.AddrSize;
+ for (uint32_t I = 0; I < AddrCount; ++I)
+ if (HeaderData.AddrSize == 4)
+ Addrs.push_back(Data.getU32(OffsetPtr));
+ else
+ Addrs.push_back(Data.getU64(OffsetPtr));
+ return Error::success();
+}
+
+void DWARFDebugAddrTable::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
+ if (DumpOpts.Verbose)
+ OS << format("0x%8.8" PRIx32 ": ", HeaderOffset);
+ OS << format("Addr Section: length = 0x%8.8" PRIx32
+ ", version = 0x%4.4" PRIx16 ", "
+ "addr_size = 0x%2.2" PRIx8 ", seg_size = 0x%2.2" PRIx8 "\n",
+ HeaderData.Length, HeaderData.Version, HeaderData.AddrSize,
+ HeaderData.SegSize);
+
+ static const char *Fmt32 = "0x%8.8" PRIx32;
+ static const char *Fmt64 = "0x%16.16" PRIx64;
+ std::string AddrFmt = "\n";
+ std::string AddrFmtVerbose = " => ";
+ if (HeaderData.AddrSize == 4) {
+ AddrFmt.append(Fmt32);
+ AddrFmtVerbose.append(Fmt32);
+ }
+ else {
+ AddrFmt.append(Fmt64);
+ AddrFmtVerbose.append(Fmt64);
+ }
+
+ if (Addrs.size() > 0) {
+ OS << "Addrs: [";
+ for (uint64_t Addr : Addrs) {
+ OS << format(AddrFmt.c_str(), Addr);
+ if (DumpOpts.Verbose)
+ OS << format(AddrFmtVerbose.c_str(),
+ Addr + HeaderOffset + sizeof(HeaderData));
+ }
+ OS << "\n]\n";
+ }
+}
+
+Expected<uint64_t> DWARFDebugAddrTable::getAddrEntry(uint32_t Index) const {
+ if (Index < Addrs.size())
+ return Addrs[Index];
+ return createStringError(errc::invalid_argument,
+ "Index %" PRIu32 " is out of range of the "
+ ".debug_addr table at offset 0x%" PRIx32,
+ Index, HeaderOffset);
+}
+
+uint32_t DWARFDebugAddrTable::getLength() const {
+ if (HeaderData.Length == 0)
+ return 0;
+ // TODO: DWARF64 support.
+ return HeaderData.Length + sizeof(uint32_t);
+}
+
+uint32_t DWARFDebugAddrTable::getDataSize() const {
+ if (DataSize != 0)
+ return DataSize;
+ if (getLength() == 0)
+ return 0;
+ return getLength() - getHeaderSize();
+}
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 2a89faff9647..08be524ab464 100644
--- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -155,7 +155,7 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
// of the function's code, not the descriptor.
uint64_t OpdOffset = SymbolAddress - OpdAddress;
uint32_t OpdOffset32 = OpdOffset;
- if (OpdOffset == OpdOffset32 &&
+ if (OpdOffset == OpdOffset32 &&
OpdExtractor->isValidOffsetForAddress(OpdOffset32))
SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
}
diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp
index 5bfd2e6ff87e..72e4b56c05e3 100644
--- a/lib/Demangle/ItaniumDemangle.cpp
+++ b/lib/Demangle/ItaniumDemangle.cpp
@@ -450,6 +450,8 @@ class ReferenceType : public Node {
const Node *Pointee;
ReferenceKind RK;
+ mutable bool Printing = false;
+
// Dig through any refs to refs, collapsing the ReferenceTypes as we go. The
// rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any
// other combination collapses to a lvalue ref.
@@ -476,6 +478,9 @@ public:
}
void printLeft(OutputStream &s) const override {
+ if (Printing)
+ return;
+ SwapAndRestore<bool> SavePrinting(Printing, true);
std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
Collapsed.second->printLeft(s);
if (Collapsed.second->hasArray(s))
@@ -486,6 +491,9 @@ public:
s += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&");
}
void printRight(OutputStream &s) const override {
+ if (Printing)
+ return;
+ SwapAndRestore<bool> SavePrinting(Printing, true);
std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
s += ")";
diff --git a/lib/Demangle/MicrosoftDemangle.cpp b/lib/Demangle/MicrosoftDemangle.cpp
index 596359b7d990..3eac87d61011 100644
--- a/lib/Demangle/MicrosoftDemangle.cpp
+++ b/lib/Demangle/MicrosoftDemangle.cpp
@@ -29,15 +29,27 @@
// the demangler is 3x faster with this allocator compared to one with
// STL containers.
namespace {
+ constexpr size_t AllocUnit = 4096;
+
class ArenaAllocator {
struct AllocatorNode {
uint8_t *Buf = nullptr;
size_t Used = 0;
+ size_t Capacity = 0;
AllocatorNode *Next = nullptr;
};
+ void addNode(size_t Capacity) {
+ AllocatorNode *NewHead = new AllocatorNode;
+ NewHead->Buf = new uint8_t[Capacity];
+ NewHead->Next = Head;
+ NewHead->Capacity = Capacity;
+ Head = NewHead;
+ NewHead->Used = 0;
+ }
+
public:
- ArenaAllocator() : Head(new AllocatorNode) { Head->Buf = new uint8_t[Unit]; }
+ ArenaAllocator() { addNode(AllocUnit); }
~ArenaAllocator() {
while (Head) {
@@ -49,10 +61,25 @@ public:
}
}
+ char *allocUnalignedBuffer(size_t Length) {
+ uint8_t *Buf = Head->Buf + Head->Used;
+
+ Head->Used += Length;
+ if (Head->Used > Head->Capacity) {
+ // It's possible we need a buffer which is larger than our default unit
+ // size, so we need to be careful to add a node with capacity that is at
+ // least as large as what we need.
+ addNode(std::max(AllocUnit, Length));
+ Head->Used = Length;
+ Buf = Head->Buf;
+ }
+
+ return reinterpret_cast<char *>(Buf);
+ }
+
template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) {
size_t Size = sizeof(T);
- assert(Size < Unit);
assert(Head && Head->Buf);
size_t P = (size_t)Head->Buf + Head->Used;
@@ -62,20 +89,15 @@ public:
size_t Adjustment = AlignedP - P;
Head->Used += Size + Adjustment;
- if (Head->Used < Unit)
+ if (Head->Used < Head->Capacity)
return new (PP) T(std::forward<Args>(ConstructorArgs)...);
- AllocatorNode *NewHead = new AllocatorNode;
- NewHead->Buf = new uint8_t[ArenaAllocator::Unit];
- NewHead->Next = Head;
- Head = NewHead;
- NewHead->Used = Size;
- return new (NewHead->Buf) T(std::forward<Args>(ConstructorArgs)...);
+ addNode(AllocUnit);
+ Head->Used = Size;
+ return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...);
}
private:
- static constexpr size_t Unit = 4096;
-
AllocatorNode *Head = nullptr;
};
} // namespace
@@ -117,7 +139,7 @@ enum class StorageClass : uint8_t {
enum class QualifierMangleMode { Drop, Mangle, Result };
-enum class PointerAffinity { Pointer, Reference };
+enum class PointerAffinity { Pointer, Reference, RValueReference };
// Calling conventions
enum class CallingConv : uint8_t {
@@ -141,7 +163,6 @@ enum class PrimTy : uint8_t {
None,
Function,
Ptr,
- Ref,
MemberPtr,
Array,
@@ -155,6 +176,8 @@ enum class PrimTy : uint8_t {
Char,
Schar,
Uchar,
+ Char16,
+ Char32,
Short,
Ushort,
Int,
@@ -167,6 +190,7 @@ enum class PrimTy : uint8_t {
Float,
Double,
Ldouble,
+ Nullptr
};
// Function classes
@@ -183,15 +207,30 @@ enum FuncClass : uint8_t {
namespace {
struct Type;
+struct Name;
-// Represents a list of parameters (template params or function arguments.
-// It's represented as a linked list.
-struct ParamList {
+struct FunctionParams {
bool IsVariadic = false;
Type *Current = nullptr;
- ParamList *Next = nullptr;
+ FunctionParams *Next = nullptr;
+};
+
+struct TemplateParams {
+ bool IsTemplateTemplate = false;
+ bool IsAliasTemplate = false;
+
+ // Type can be null if this is a template template parameter. In that case
+ // only Name will be valid.
+ Type *ParamType = nullptr;
+
+ // Name can be valid if this is a template template parameter (see above) or
+ // this is a function declaration (e.g. foo<&SomeFunc>). In the latter case
+ // Name contains the name of the function and Type contains the signature.
+ Name *ParamName = nullptr;
+
+ TemplateParams *Next = nullptr;
};
// The type class. Mangled symbols are first parsed and converted to
@@ -232,7 +271,7 @@ struct Name {
StringView Operator;
// Template parameters. Null if not a template.
- ParamList TemplateParams;
+ TemplateParams *TParams = nullptr;
// Nested BackReferences (e.g. "A::B::C") are represented as a linked list.
Name *Next = nullptr;
@@ -243,6 +282,8 @@ struct PointerType : public Type {
void outputPre(OutputStream &OS) override;
void outputPost(OutputStream &OS) override;
+ PointerAffinity Affinity;
+
// Represents a type X in "a pointer to X", "a reference to X",
// "an array of X", or "a function returning X".
Type *Pointee = nullptr;
@@ -276,7 +317,7 @@ struct FunctionType : public Type {
CallingConv CallConvention;
FuncClass FunctionClass;
- ParamList Params;
+ FunctionParams Params;
};
struct UdtType : public Type {
@@ -302,9 +343,13 @@ struct ArrayType : public Type {
static bool isMemberPointer(StringView MangledName) {
switch (MangledName.popFront()) {
+ case '$':
+ // This is probably an rvalue reference (e.g. $$Q), and you cannot have an
+ // rvalue reference to a member.
+ return false;
case 'A':
// 'A' indicates a reference, and you cannot have a reference to a member
- // function or member variable.
+ // function or member.
return false;
case 'P':
case 'Q':
@@ -386,14 +431,58 @@ static void outputCallingConvention(OutputStream &OS, CallingConv CC) {
}
}
+static bool startsWithLocalScopePattern(StringView S) {
+ if (!S.consumeFront('?'))
+ return false;
+ if (S.size() < 2)
+ return false;
+
+ size_t End = S.find('?');
+ if (End == StringView::npos)
+ return false;
+ StringView Candidate = S.substr(0, End);
+ if (Candidate.empty())
+ return false;
+
+ // \?[0-9]\?
+ // ?@? is the discriminator 0.
+ if (Candidate.size() == 1)
+ return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9');
+
+ // If it's not 0-9, then it's an encoded number terminated with an @
+ if (Candidate.back() != '@')
+ return false;
+ Candidate = Candidate.dropBack();
+
+ // An encoded number starts with B-P and all subsequent digits are in A-P.
+ // Note that the reason the first digit cannot be A is two fold. First, it
+ // would create an ambiguity with ?A which delimits the beginning of an
+ // anonymous namespace. Second, A represents 0, and you don't start a multi
+ // digit number with a leading 0. Presumably the anonymous namespace
+ // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J.
+ if (Candidate[0] < 'B' || Candidate[0] > 'P')
+ return false;
+ Candidate = Candidate.dropFront();
+ while (!Candidate.empty()) {
+ if (Candidate[0] < 'A' || Candidate[0] > 'P')
+ return false;
+ Candidate = Candidate.dropFront();
+ }
+
+ return true;
+}
+
+static void outputName(OutputStream &OS, const Name *TheName);
+
// Write a function or template parameter list.
-static void outputParameterList(OutputStream &OS, const ParamList &Params) {
+static void outputParameterList(OutputStream &OS,
+ const FunctionParams &Params) {
if (!Params.Current) {
OS << "void";
return;
}
- const ParamList *Head = &Params;
+ const FunctionParams *Head = &Params;
while (Head) {
Type::outputPre(OS, *Head->Current);
Type::outputPost(OS, *Head->Current);
@@ -405,12 +494,39 @@ static void outputParameterList(OutputStream &OS, const ParamList &Params) {
}
}
-static void outputTemplateParams(OutputStream &OS, const Name &TheName) {
- if (!TheName.TemplateParams.Current)
+static void outputParameterList(OutputStream &OS,
+ const TemplateParams &Params) {
+ if (!Params.ParamType && !Params.ParamName) {
+ OS << "<>";
return;
+ }
OS << "<";
- outputParameterList(OS, TheName.TemplateParams);
+ const TemplateParams *Head = &Params;
+ while (Head) {
+ // Type can be null if this is a template template parameter,
+ // and Name can be null if this is a simple type.
+
+ if (Head->ParamType && Head->ParamName) {
+ // Function pointer.
+ OS << "&";
+ Type::outputPre(OS, *Head->ParamType);
+ outputName(OS, Head->ParamName);
+ Type::outputPost(OS, *Head->ParamType);
+ } else if (Head->ParamType) {
+ // simple type.
+ Type::outputPre(OS, *Head->ParamType);
+ Type::outputPost(OS, *Head->ParamType);
+ } else {
+ // Template alias.
+ outputName(OS, Head->ParamName);
+ }
+
+ Head = Head->Next;
+
+ if (Head)
+ OS << ", ";
+ }
OS << ">";
}
@@ -420,29 +536,32 @@ static void outputName(OutputStream &OS, const Name *TheName) {
outputSpaceIfNecessary(OS);
+ const Name *Previous = nullptr;
// Print out namespaces or outer class BackReferences.
for (; TheName->Next; TheName = TheName->Next) {
+ Previous = TheName;
OS << TheName->Str;
- outputTemplateParams(OS, *TheName);
+ if (TheName->TParams)
+ outputParameterList(OS, *TheName->TParams);
OS << "::";
}
// Print out a regular name.
if (TheName->Operator.empty()) {
OS << TheName->Str;
- outputTemplateParams(OS, *TheName);
+ if (TheName->TParams)
+ outputParameterList(OS, *TheName->TParams);
return;
}
// Print out ctor or dtor.
+ if (TheName->Operator == "dtor")
+ OS << "~";
+
if (TheName->Operator == "ctor" || TheName->Operator == "dtor") {
- OS << TheName->Str;
- outputTemplateParams(OS, *TheName);
- OS << "::";
- if (TheName->Operator == "dtor")
- OS << "~";
- OS << TheName->Str;
- outputTemplateParams(OS, *TheName);
+ OS << Previous->Str;
+ if (Previous->TParams)
+ outputParameterList(OS, *Previous->TParams);
return;
}
@@ -514,6 +633,12 @@ void Type::outputPre(OutputStream &OS) {
case PrimTy::Uchar:
OS << "unsigned char";
break;
+ case PrimTy::Char16:
+ OS << "char16_t";
+ break;
+ case PrimTy::Char32:
+ OS << "char32_t";
+ break;
case PrimTy::Short:
OS << "short";
break;
@@ -550,6 +675,9 @@ void Type::outputPre(OutputStream &OS) {
case PrimTy::Ldouble:
OS << "long double";
break;
+ case PrimTy::Nullptr:
+ OS << "std::nullptr_t";
+ break;
default:
assert(false && "Invalid primitive type!");
}
@@ -584,8 +712,10 @@ static void outputPointerIndicator(OutputStream &OS, PointerAffinity Affinity,
if (Affinity == PointerAffinity::Pointer)
OS << "*";
- else
+ else if (Affinity == PointerAffinity::Reference)
OS << "&";
+ else
+ OS << "&&";
}
void PointerType::outputPre(OutputStream &OS) {
@@ -596,9 +726,6 @@ void PointerType::outputPre(OutputStream &OS) {
if (Quals & Q_Unaligned)
OS << "__unaligned ";
- PointerAffinity Affinity = (Prim == PrimTy::Ptr) ? PointerAffinity::Pointer
- : PointerAffinity::Reference;
-
outputPointerIndicator(OS, Affinity, nullptr, Pointee);
// FIXME: We should output this, but it requires updating lots of tests.
@@ -668,6 +795,15 @@ void FunctionType::outputPost(OutputStream &OS) {
OS << " const";
if (Quals & Q_Volatile)
OS << " volatile";
+ if (Quals & Q_Restrict)
+ OS << " __restrict";
+ if (Quals & Q_Unaligned)
+ OS << " __unaligned";
+
+ if (RefKind == ReferenceKind::LValueRef)
+ OS << " &";
+ else if (RefKind == ReferenceKind::RValueRef)
+ OS << " &&";
if (ReturnType)
Type::outputPost(OS, *ReturnType);
@@ -716,6 +852,11 @@ void ArrayType::outputPost(OutputStream &OS) {
Type::outputPost(OS, *ElementType);
}
+struct Symbol {
+ Name *SymbolName = nullptr;
+ Type *SymbolType = nullptr;
+};
+
} // namespace
namespace {
@@ -725,63 +866,68 @@ namespace {
// It also has a set of functions to cnovert Type instances to strings.
class Demangler {
public:
- Demangler(OutputStream &OS, StringView s) : OS(OS), MangledName(s) {}
+ Demangler() = default;
// You are supposed to call parse() first and then check if error is true. If
// it is false, call output() to write the formatted name to the given stream.
- void parse();
- void output();
+ Symbol *parse(StringView &MangledName);
+ void output(const Symbol *S, OutputStream &OS);
// True if an error occurred.
bool Error = false;
private:
- Type *demangleVariableEncoding();
- Type *demangleFunctionEncoding();
+ Type *demangleVariableEncoding(StringView &MangledName);
+ Type *demangleFunctionEncoding(StringView &MangledName);
- Qualifiers demanglePointerExtQualifiers();
+ Qualifiers demanglePointerExtQualifiers(StringView &MangledName);
// Parser functions. This is a recursive-descent parser.
- Type *demangleType(QualifierMangleMode QMM);
- Type *demangleBasicType();
- UdtType *demangleClassType();
- PointerType *demanglePointerType();
- MemberPointerType *demangleMemberPointerType();
- FunctionType *demangleFunctionType(bool HasThisQuals, bool IsFunctionPointer);
+ Type *demangleType(StringView &MangledName, QualifierMangleMode QMM);
+ Type *demangleBasicType(StringView &MangledName);
+ UdtType *demangleClassType(StringView &MangledName);
+ PointerType *demanglePointerType(StringView &MangledName);
+ MemberPointerType *demangleMemberPointerType(StringView &MangledName);
+ FunctionType *demangleFunctionType(StringView &MangledName, bool HasThisQuals,
+ bool IsFunctionPointer);
- ArrayType *demangleArrayType();
+ ArrayType *demangleArrayType(StringView &MangledName);
- ParamList demangleTemplateParameterList();
- ParamList demangleFunctionParameterList();
+ TemplateParams *demangleTemplateParameterList(StringView &MangledName);
+ FunctionParams demangleFunctionParameterList(StringView &MangledName);
- int demangleNumber();
- void demangleNamePiece(Name &Node, bool IsHead);
+ int demangleNumber(StringView &MangledName);
- StringView demangleString(bool memorize);
void memorizeString(StringView s);
- Name *demangleName();
- void demangleOperator(Name *);
- StringView demangleOperatorName();
- FuncClass demangleFunctionClass();
- CallingConv demangleCallingConvention();
- StorageClass demangleVariableStorageClass();
- ReferenceKind demangleReferenceKind();
- void demangleThrowSpecification();
- std::pair<Qualifiers, bool> demangleQualifiers();
+ /// Allocate a copy of \p Borrowed into memory that we own.
+ StringView copyString(StringView Borrowed);
- // The result is written to this stream.
- OutputStream OS;
+ Name *demangleFullyQualifiedTypeName(StringView &MangledName);
+ Name *demangleFullyQualifiedSymbolName(StringView &MangledName);
- // Mangled symbol. demangle* functions shorten this string
- // as they parse it.
- StringView MangledName;
+ Name *demangleUnqualifiedTypeName(StringView &MangledName);
+ Name *demangleUnqualifiedSymbolName(StringView &MangledName);
- // A parsed mangled symbol.
- Type *SymbolType = nullptr;
+ Name *demangleNameScopeChain(StringView &MangledName, Name *UnqualifiedName);
+ Name *demangleNameScopePiece(StringView &MangledName);
- // The main symbol name. (e.g. "ns::foo" in "int ns::foo()".)
- Name *SymbolName = nullptr;
+ Name *demangleBackRefName(StringView &MangledName);
+ Name *demangleClassTemplateName(StringView &MangledName);
+ Name *demangleOperatorName(StringView &MangledName);
+ Name *demangleSimpleName(StringView &MangledName, bool Memorize);
+ Name *demangleAnonymousNamespaceName(StringView &MangledName);
+ Name *demangleLocallyScopedNamePiece(StringView &MangledName);
+
+ StringView demangleSimpleString(StringView &MangledName, bool Memorize);
+
+ FuncClass demangleFunctionClass(StringView &MangledName);
+ CallingConv demangleCallingConvention(StringView &MangledName);
+ StorageClass demangleVariableStorageClass(StringView &MangledName);
+ ReferenceKind demangleReferenceKind(StringView &MangledName);
+ void demangleThrowSpecification(StringView &MangledName);
+
+ std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName);
// Memory allocator.
ArenaAllocator Arena;
@@ -809,28 +955,36 @@ private:
};
} // namespace
+StringView Demangler::copyString(StringView Borrowed) {
+ char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1);
+ std::strcpy(Stable, Borrowed.begin());
+
+ return {Stable, Borrowed.size()};
+}
+
// Parser entry point.
-void Demangler::parse() {
+Symbol *Demangler::parse(StringView &MangledName) {
+ Symbol *S = Arena.alloc<Symbol>();
+
// MSVC-style mangled symbols must start with '?'.
if (!MangledName.consumeFront("?")) {
- SymbolName = Arena.alloc<Name>();
- SymbolName->Str = MangledName;
- SymbolType = Arena.alloc<Type>();
- SymbolType->Prim = PrimTy::Unknown;
+ S->SymbolName = Arena.alloc<Name>();
+ S->SymbolName->Str = MangledName;
+ S->SymbolType = Arena.alloc<Type>();
+ S->SymbolType->Prim = PrimTy::Unknown;
+ return S;
}
// What follows is a main symbol name. This may include
// namespaces or class BackReferences.
- SymbolName = demangleName();
+ S->SymbolName = demangleFullyQualifiedSymbolName(MangledName);
// Read a variable.
- if (startsWithDigit(MangledName)) {
- SymbolType = demangleVariableEncoding();
- return;
- }
+ S->SymbolType = startsWithDigit(MangledName)
+ ? demangleVariableEncoding(MangledName)
+ : demangleFunctionEncoding(MangledName);
- // Read a function.
- SymbolType = demangleFunctionEncoding();
+ return S;
}
// <type-encoding> ::= <storage-class> <variable-type>
@@ -840,10 +994,10 @@ void Demangler::parse() {
// ::= 3 # global
// ::= 4 # static local
-Type *Demangler::demangleVariableEncoding() {
- StorageClass SC = demangleVariableStorageClass();
+Type *Demangler::demangleVariableEncoding(StringView &MangledName) {
+ StorageClass SC = demangleVariableStorageClass(MangledName);
- Type *Ty = demangleType(QualifierMangleMode::Drop);
+ Type *Ty = demangleType(MangledName, QualifierMangleMode::Drop);
Ty->Storage = SC;
@@ -851,17 +1005,17 @@ Type *Demangler::demangleVariableEncoding() {
// ::= <type> <pointee-cvr-qualifiers> # pointers, references
switch (Ty->Prim) {
case PrimTy::Ptr:
- case PrimTy::Ref:
case PrimTy::MemberPtr: {
Qualifiers ExtraChildQuals = Q_None;
- Ty->Quals = Qualifiers(Ty->Quals | demanglePointerExtQualifiers());
+ Ty->Quals =
+ Qualifiers(Ty->Quals | demanglePointerExtQualifiers(MangledName));
bool IsMember = false;
- std::tie(ExtraChildQuals, IsMember) = demangleQualifiers();
+ std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
if (Ty->Prim == PrimTy::MemberPtr) {
assert(IsMember);
- Name *BackRefName = demangleName();
+ Name *BackRefName = demangleFullyQualifiedTypeName(MangledName);
(void)BackRefName;
MemberPointerType *MPTy = static_cast<MemberPointerType *>(Ty);
MPTy->Pointee->Quals = Qualifiers(MPTy->Pointee->Quals | ExtraChildQuals);
@@ -873,7 +1027,7 @@ Type *Demangler::demangleVariableEncoding() {
break;
}
default:
- Ty->Quals = demangleQualifiers().first;
+ Ty->Quals = demangleQualifiers(MangledName).first;
break;
}
@@ -891,7 +1045,7 @@ Type *Demangler::demangleVariableEncoding() {
// ::= <hex digit>+ @ # when Numbrer == 0 or >= 10
//
// <hex-digit> ::= [A-P] # A = 0, B = 1, ...
-int Demangler::demangleNumber() {
+int Demangler::demangleNumber(StringView &MangledName) {
bool neg = MangledName.consumeFront("?");
if (startsWithDigit(MangledName)) {
@@ -918,23 +1072,6 @@ int Demangler::demangleNumber() {
return 0;
}
-// Read until the next '@'.
-StringView Demangler::demangleString(bool Memorize) {
- for (size_t i = 0; i < MangledName.size(); ++i) {
- if (MangledName[i] != '@')
- continue;
- StringView ret = MangledName.substr(0, i);
- MangledName = MangledName.dropFront(i + 1);
-
- if (Memorize)
- memorizeString(ret);
- return ret;
- }
-
- Error = true;
- return "";
-}
-
// First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
// Memorize it.
void Demangler::memorizeString(StringView S) {
@@ -946,179 +1083,322 @@ void Demangler::memorizeString(StringView S) {
BackReferences[BackRefCount++] = S;
}
-void Demangler::demangleNamePiece(Name &Node, bool IsHead) {
- if (startsWithDigit(MangledName)) {
- size_t I = MangledName[0] - '0';
- if (I >= BackRefCount) {
- Error = true;
- return;
- }
- MangledName = MangledName.dropFront();
- Node.Str = BackReferences[I];
- } else if (MangledName.consumeFront("?$")) {
- // Class template.
- Node.Str = demangleString(false);
- Node.TemplateParams = demangleTemplateParameterList();
- } else if (!IsHead && MangledName.consumeFront("?A")) {
- // Anonymous namespace starts with ?A. So does overloaded operator[],
- // but the distinguishing factor is that namespace themselves are not
- // mangled, only the variables and functions inside of them are. So
- // an anonymous namespace will never occur as the first item in the
- // name.
- Node.Str = "`anonymous namespace'";
- if (!MangledName.consumeFront('@')) {
- Error = true;
- return;
- }
- } else if (MangledName.consumeFront("?")) {
- // Overloaded operator.
- demangleOperator(&Node);
- } else {
- // Non-template functions or classes.
- Node.Str = demangleString(true);
+Name *Demangler::demangleBackRefName(StringView &MangledName) {
+ assert(startsWithDigit(MangledName));
+
+ size_t I = MangledName[0] - '0';
+ if (I >= BackRefCount) {
+ Error = true;
+ return nullptr;
}
-}
-// Parses a name in the form of A@B@C@@ which represents C::B::A.
-Name *Demangler::demangleName() {
- Name *Head = nullptr;
+ MangledName = MangledName.dropFront();
+ Name *Node = Arena.alloc<Name>();
+ Node->Str = BackReferences[I];
+ return Node;
+}
- while (!MangledName.consumeFront("@")) {
- Name *Elem = Arena.alloc<Name>();
+Name *Demangler::demangleClassTemplateName(StringView &MangledName) {
+ assert(MangledName.startsWith("?$"));
+ MangledName.consumeFront("?$");
- assert(!Error);
- demangleNamePiece(*Elem, Head == nullptr);
- if (Error)
- return nullptr;
+ Name *Node = demangleSimpleName(MangledName, false);
+ Node->TParams = demangleTemplateParameterList(MangledName);
- Elem->Next = Head;
- Head = Elem;
- if (MangledName.empty()) {
- Error = true;
- return nullptr;
- }
- }
+ // Render this class template name into a string buffer so that we can
+ // memorize it for the purpose of back-referencing.
+ OutputStream OS = OutputStream::create(nullptr, nullptr, 1024);
+ outputName(OS, Node);
+ OS << '\0';
+ char *Name = OS.getBuffer();
- return Head;
-}
+ StringView Owned = copyString(Name);
+ memorizeString(Owned);
+ std::free(Name);
-void Demangler::demangleOperator(Name *OpName) {
- OpName->Operator = demangleOperatorName();
- if (!Error && !MangledName.empty() && MangledName.front() != '@')
- demangleNamePiece(*OpName, false);
+ return Node;
}
-StringView Demangler::demangleOperatorName() {
- SwapAndRestore<StringView> RestoreOnError(MangledName, MangledName);
- RestoreOnError.shouldRestore(false);
-
- switch (MangledName.popFront()) {
- case '0':
- return "ctor";
- case '1':
- return "dtor";
- case '2':
- return " new";
- case '3':
- return " delete";
- case '4':
- return "=";
- case '5':
- return ">>";
- case '6':
- return "<<";
- case '7':
- return "!";
- case '8':
- return "==";
- case '9':
- return "!=";
- case 'A':
- return "[]";
- case 'C':
- return "->";
- case 'D':
- return "*";
- case 'E':
- return "++";
- case 'F':
- return "--";
- case 'G':
- return "-";
- case 'H':
- return "+";
- case 'I':
- return "&";
- case 'J':
- return "->*";
- case 'K':
- return "/";
- case 'L':
- return "%";
- case 'M':
- return "<";
- case 'N':
- return "<=";
- case 'O':
- return ">";
- case 'P':
- return ">=";
- case 'Q':
- return ",";
- case 'R':
- return "()";
- case 'S':
- return "~";
- case 'T':
- return "^";
- case 'U':
- return "|";
- case 'V':
- return "&&";
- case 'W':
- return "||";
- case 'X':
- return "*=";
- case 'Y':
- return "+=";
- case 'Z':
- return "-=";
- case '_': {
- if (MangledName.empty())
- break;
+Name *Demangler::demangleOperatorName(StringView &MangledName) {
+ assert(MangledName.startsWith('?'));
+ MangledName.consumeFront('?');
+ auto NameString = [this, &MangledName]() -> StringView {
switch (MangledName.popFront()) {
case '0':
- return "/=";
+ return "ctor";
case '1':
- return "%=";
+ return "dtor";
case '2':
- return ">>=";
+ return " new";
case '3':
- return "<<=";
+ return " delete";
case '4':
- return "&=";
+ return "=";
case '5':
- return "|=";
+ return ">>";
case '6':
- return "^=";
+ return "<<";
+ case '7':
+ return "!";
+ case '8':
+ return "==";
+ case '9':
+ return "!=";
+ case 'A':
+ return "[]";
+ case 'C':
+ return "->";
+ case 'D':
+ return "*";
+ case 'E':
+ return "++";
+ case 'F':
+ return "--";
+ case 'G':
+ return "-";
+ case 'H':
+ return "+";
+ case 'I':
+ return "&";
+ case 'J':
+ return "->*";
+ case 'K':
+ return "/";
+ case 'L':
+ return "%";
+ case 'M':
+ return "<";
+ case 'N':
+ return "<=";
+ case 'O':
+ return ">";
+ case 'P':
+ return ">=";
+ case 'Q':
+ return ",";
+ case 'R':
+ return "()";
+ case 'S':
+ return "~";
+ case 'T':
+ return "^";
case 'U':
- return " new[]";
+ return "|";
case 'V':
- return " delete[]";
- case '_':
- if (MangledName.consumeFront("L"))
- return " co_await";
+ return "&&";
+ case 'W':
+ return "||";
+ case 'X':
+ return "*=";
+ case 'Y':
+ return "+=";
+ case 'Z':
+ return "-=";
+ case '_': {
+ if (MangledName.empty())
+ break;
+
+ switch (MangledName.popFront()) {
+ case '0':
+ return "/=";
+ case '1':
+ return "%=";
+ case '2':
+ return ">>=";
+ case '3':
+ return "<<=";
+ case '4':
+ return "&=";
+ case '5':
+ return "|=";
+ case '6':
+ return "^=";
+ case 'U':
+ return " new[]";
+ case 'V':
+ return " delete[]";
+ case '_':
+ if (MangledName.consumeFront("L"))
+ return " co_await";
+ if (MangledName.consumeFront("K")) {
+ size_t EndPos = MangledName.find('@');
+ if (EndPos == StringView::npos)
+ break;
+ StringView OpName = demangleSimpleString(MangledName, false);
+ size_t FullSize = OpName.size() + 3; // <space>""OpName
+ char *Buffer = Arena.allocUnalignedBuffer(FullSize);
+ Buffer[0] = ' ';
+ Buffer[1] = '"';
+ Buffer[2] = '"';
+ std::memcpy(Buffer + 3, OpName.begin(), OpName.size());
+ return {Buffer, FullSize};
+ }
+ }
}
- }
+ }
+ Error = true;
+ return "";
+ };
+
+ Name *Node = Arena.alloc<Name>();
+ Node->Operator = NameString();
+ return Node;
+}
+
+Name *Demangler::demangleSimpleName(StringView &MangledName, bool Memorize) {
+ StringView S = demangleSimpleString(MangledName, Memorize);
+ if (Error)
+ return nullptr;
+
+ Name *Node = Arena.alloc<Name>();
+ Node->Str = S;
+ return Node;
+}
+
+StringView Demangler::demangleSimpleString(StringView &MangledName,
+ bool Memorize) {
+ StringView S;
+ for (size_t i = 0; i < MangledName.size(); ++i) {
+ if (MangledName[i] != '@')
+ continue;
+ S = MangledName.substr(0, i);
+ MangledName = MangledName.dropFront(i + 1);
+
+ if (Memorize)
+ memorizeString(S);
+ return S;
}
Error = true;
- RestoreOnError.shouldRestore(true);
- return "";
+ return {};
+}
+
+Name *Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
+ assert(MangledName.startsWith("?A"));
+ MangledName.consumeFront("?A");
+
+ Name *Node = Arena.alloc<Name>();
+ Node->Str = "`anonymous namespace'";
+ if (MangledName.consumeFront('@'))
+ return Node;
+
+ Error = true;
+ return nullptr;
+}
+
+Name *Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
+ assert(startsWithLocalScopePattern(MangledName));
+
+ Name *Node = Arena.alloc<Name>();
+ MangledName.consumeFront('?');
+ int ScopeIdentifier = demangleNumber(MangledName);
+
+ // One ? to terminate the number
+ MangledName.consumeFront('?');
+
+ assert(!Error);
+ Symbol *Scope = parse(MangledName);
+ if (Error)
+ return nullptr;
+
+ // Render the parent symbol's name into a buffer.
+ OutputStream OS = OutputStream::create(nullptr, nullptr, 1024);
+ OS << '`';
+ output(Scope, OS);
+ OS << '\'';
+ OS << "::`" << ScopeIdentifier << "'";
+ OS << '\0';
+ char *Result = OS.getBuffer();
+ Node->Str = copyString(Result);
+ std::free(Result);
+ return Node;
+}
+
+// Parses a type name in the form of A@B@C@@ which represents C::B::A.
+Name *Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
+ Name *TypeName = demangleUnqualifiedTypeName(MangledName);
+ assert(TypeName);
+
+ Name *QualName = demangleNameScopeChain(MangledName, TypeName);
+ assert(QualName);
+ return QualName;
+}
+
+// Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
+// Symbol names have slightly different rules regarding what can appear
+// so we separate out the implementations for flexibility.
+Name *Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
+ Name *SymbolName = demangleUnqualifiedSymbolName(MangledName);
+ assert(SymbolName);
+
+ Name *QualName = demangleNameScopeChain(MangledName, SymbolName);
+ assert(QualName);
+ return QualName;
+}
+
+Name *Demangler::demangleUnqualifiedTypeName(StringView &MangledName) {
+ // An inner-most name can be a back-reference, because a fully-qualified name
+ // (e.g. Scope + Inner) can contain other fully qualified names inside of
+ // them (for example template parameters), and these nested parameters can
+ // refer to previously mangled types.
+ if (startsWithDigit(MangledName))
+ return demangleBackRefName(MangledName);
+
+ if (MangledName.startsWith("?$"))
+ return demangleClassTemplateName(MangledName);
+
+ return demangleSimpleName(MangledName, true);
+}
+
+Name *Demangler::demangleUnqualifiedSymbolName(StringView &MangledName) {
+ if (startsWithDigit(MangledName))
+ return demangleBackRefName(MangledName);
+ if (MangledName.startsWith("?$"))
+ return demangleClassTemplateName(MangledName);
+ if (MangledName.startsWith('?'))
+ return demangleOperatorName(MangledName);
+ return demangleSimpleName(MangledName, true);
+}
+
+Name *Demangler::demangleNameScopePiece(StringView &MangledName) {
+ if (startsWithDigit(MangledName))
+ return demangleBackRefName(MangledName);
+
+ if (MangledName.startsWith("?$"))
+ return demangleClassTemplateName(MangledName);
+
+ if (MangledName.startsWith("?A"))
+ return demangleAnonymousNamespaceName(MangledName);
+
+ if (startsWithLocalScopePattern(MangledName))
+ return demangleLocallyScopedNamePiece(MangledName);
+
+ return demangleSimpleName(MangledName, true);
+}
+
+Name *Demangler::demangleNameScopeChain(StringView &MangledName,
+ Name *UnqualifiedName) {
+ Name *Head = UnqualifiedName;
+
+ while (!MangledName.consumeFront("@")) {
+ if (MangledName.empty()) {
+ Error = true;
+ return nullptr;
+ }
+
+ assert(!Error);
+ Name *Elem = demangleNameScopePiece(MangledName);
+ if (Error)
+ return nullptr;
+
+ Elem->Next = Head;
+ Head = Elem;
+ }
+ return Head;
}
-FuncClass Demangler::demangleFunctionClass() {
+FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
SwapAndRestore<StringView> RestoreOnError(MangledName, MangledName);
RestoreOnError.shouldRestore(false);
@@ -1170,7 +1450,7 @@ FuncClass Demangler::demangleFunctionClass() {
return Public;
}
-CallingConv Demangler::demangleCallingConvention() {
+CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
switch (MangledName.popFront()) {
case 'A':
case 'B':
@@ -1200,7 +1480,7 @@ CallingConv Demangler::demangleCallingConvention() {
return CallingConv::None;
}
-StorageClass Demangler::demangleVariableStorageClass() {
+StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
assert(std::isdigit(MangledName.front()));
switch (MangledName.popFront()) {
@@ -1219,7 +1499,8 @@ StorageClass Demangler::demangleVariableStorageClass() {
return StorageClass::None;
}
-std::pair<Qualifiers, bool> Demangler::demangleQualifiers() {
+std::pair<Qualifiers, bool>
+Demangler::demangleQualifiers(StringView &MangledName) {
switch (MangledName.popFront()) {
// Member qualifiers
@@ -1245,54 +1526,88 @@ std::pair<Qualifiers, bool> Demangler::demangleQualifiers() {
return std::make_pair(Q_None, false);
}
+static bool isTagType(StringView S) {
+ switch (S.front()) {
+ case 'T': // union
+ case 'U': // struct
+ case 'V': // class
+ case 'W': // enum
+ return true;
+ }
+ return false;
+}
+
+static bool isPointerType(StringView S) {
+ if (S.startsWith("$$Q")) // foo &&
+ return true;
+
+ switch (S.front()) {
+ case 'A': // foo &
+ case 'P': // foo *
+ case 'Q': // foo *const
+ case 'R': // foo *volatile
+ case 'S': // foo *const volatile
+ return true;
+ }
+ return false;
+}
+
+static bool isArrayType(StringView S) { return S[0] == 'Y'; }
+
+static bool isFunctionType(StringView S) {
+ return S.startsWith("$$A8@@") || S.startsWith("$$A6");
+}
+
// <variable-type> ::= <type> <cvr-qualifiers>
// ::= <type> <pointee-cvr-qualifiers> # pointers, references
-Type *Demangler::demangleType(QualifierMangleMode QMM) {
+Type *Demangler::demangleType(StringView &MangledName,
+ QualifierMangleMode QMM) {
Qualifiers Quals = Q_None;
bool IsMember = false;
bool IsMemberKnown = false;
if (QMM == QualifierMangleMode::Mangle) {
- std::tie(Quals, IsMember) = demangleQualifiers();
+ std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
IsMemberKnown = true;
} else if (QMM == QualifierMangleMode::Result) {
if (MangledName.consumeFront('?')) {
- std::tie(Quals, IsMember) = demangleQualifiers();
+ std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
IsMemberKnown = true;
}
}
Type *Ty = nullptr;
- switch (MangledName.front()) {
- case 'T': // union
- case 'U': // struct
- case 'V': // class
- case 'W': // enum
- Ty = demangleClassType();
- break;
- case 'A': // foo &
- case 'P': // foo *
- case 'Q': // foo *const
- case 'R': // foo *volatile
- case 'S': // foo *const volatile
+ if (isTagType(MangledName))
+ Ty = demangleClassType(MangledName);
+ else if (isPointerType(MangledName)) {
if (!IsMemberKnown)
IsMember = isMemberPointer(MangledName);
+
if (IsMember)
- Ty = demangleMemberPointerType();
+ Ty = demangleMemberPointerType(MangledName);
else
- Ty = demanglePointerType();
- break;
- case 'Y':
- Ty = demangleArrayType();
- break;
- default:
- Ty = demangleBasicType();
- break;
+ Ty = demanglePointerType(MangledName);
+ } else if (isArrayType(MangledName))
+ Ty = demangleArrayType(MangledName);
+ else if (isFunctionType(MangledName)) {
+ if (MangledName.consumeFront("$$A8@@"))
+ Ty = demangleFunctionType(MangledName, true, false);
+ else {
+ assert(MangledName.startsWith("$$A6"));
+ MangledName.consumeFront("$$A6");
+ Ty = demangleFunctionType(MangledName, false, false);
+ }
+ } else {
+ Ty = demangleBasicType(MangledName);
+ assert(Ty && !Error);
+ if (!Ty || Error)
+ return Ty;
}
+
Ty->Quals = Qualifiers(Ty->Quals | Quals);
return Ty;
}
-ReferenceKind Demangler::demangleReferenceKind() {
+ReferenceKind Demangler::demangleReferenceKind(StringView &MangledName) {
if (MangledName.consumeFront('G'))
return ReferenceKind::LValueRef;
else if (MangledName.consumeFront('H'))
@@ -1300,55 +1615,61 @@ ReferenceKind Demangler::demangleReferenceKind() {
return ReferenceKind::None;
}
-void Demangler::demangleThrowSpecification() {
+void Demangler::demangleThrowSpecification(StringView &MangledName) {
if (MangledName.consumeFront('Z'))
return;
Error = true;
}
-FunctionType *Demangler::demangleFunctionType(bool HasThisQuals,
+FunctionType *Demangler::demangleFunctionType(StringView &MangledName,
+ bool HasThisQuals,
bool IsFunctionPointer) {
FunctionType *FTy = Arena.alloc<FunctionType>();
FTy->Prim = PrimTy::Function;
FTy->IsFunctionPointer = IsFunctionPointer;
if (HasThisQuals) {
- FTy->Quals = demanglePointerExtQualifiers();
- FTy->RefKind = demangleReferenceKind();
- FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers().first);
+ FTy->Quals = demanglePointerExtQualifiers(MangledName);
+ FTy->RefKind = demangleReferenceKind(MangledName);
+ FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
}
// Fields that appear on both member and non-member functions.
- FTy->CallConvention = demangleCallingConvention();
+ FTy->CallConvention = demangleCallingConvention(MangledName);
// <return-type> ::= <type>
// ::= @ # structors (they have no declared return type)
bool IsStructor = MangledName.consumeFront('@');
if (!IsStructor)
- FTy->ReturnType = demangleType(QualifierMangleMode::Result);
+ FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
- FTy->Params = demangleFunctionParameterList();
+ FTy->Params = demangleFunctionParameterList(MangledName);
- demangleThrowSpecification();
+ demangleThrowSpecification(MangledName);
return FTy;
}
-Type *Demangler::demangleFunctionEncoding() {
- FuncClass FC = demangleFunctionClass();
+Type *Demangler::demangleFunctionEncoding(StringView &MangledName) {
+ FuncClass FC = demangleFunctionClass(MangledName);
bool HasThisQuals = !(FC & (Global | Static));
- FunctionType *FTy = demangleFunctionType(HasThisQuals, false);
+ FunctionType *FTy = demangleFunctionType(MangledName, HasThisQuals, false);
FTy->FunctionClass = FC;
return FTy;
}
// Reads a primitive type.
-Type *Demangler::demangleBasicType() {
+Type *Demangler::demangleBasicType(StringView &MangledName) {
Type *Ty = Arena.alloc<Type>();
+ if (MangledName.consumeFront("$$T")) {
+ Ty->Prim = PrimTy::Nullptr;
+ return Ty;
+ }
+
switch (MangledName.popFront()) {
case 'X':
Ty->Prim = PrimTy::Void;
@@ -1407,16 +1728,26 @@ Type *Demangler::demangleBasicType() {
case 'W':
Ty->Prim = PrimTy::Wchar;
break;
+ case 'S':
+ Ty->Prim = PrimTy::Char16;
+ break;
+ case 'U':
+ Ty->Prim = PrimTy::Char32;
+ break;
default:
- assert(false);
+ Error = true;
+ return nullptr;
}
break;
}
+ default:
+ Error = true;
+ return nullptr;
}
return Ty;
}
-UdtType *Demangler::demangleClassType() {
+UdtType *Demangler::demangleClassType(StringView &MangledName) {
UdtType *UTy = Arena.alloc<UdtType>();
switch (MangledName.popFront()) {
@@ -1440,12 +1771,15 @@ UdtType *Demangler::demangleClassType() {
assert(false);
}
- UTy->UdtName = demangleName();
+ UTy->UdtName = demangleFullyQualifiedTypeName(MangledName);
return UTy;
}
static std::pair<Qualifiers, PointerAffinity>
demanglePointerCVQualifiers(StringView &MangledName) {
+ if (MangledName.consumeFront("$$Q"))
+ return std::make_pair(Q_None, PointerAffinity::RValueReference);
+
switch (MangledName.popFront()) {
case 'A':
return std::make_pair(Q_None, PointerAffinity::Reference);
@@ -1466,27 +1800,27 @@ demanglePointerCVQualifiers(StringView &MangledName) {
// <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
// # the E is required for 64-bit non-static pointers
-PointerType *Demangler::demanglePointerType() {
+PointerType *Demangler::demanglePointerType(StringView &MangledName) {
PointerType *Pointer = Arena.alloc<PointerType>();
- PointerAffinity Affinity;
- std::tie(Pointer->Quals, Affinity) = demanglePointerCVQualifiers(MangledName);
+ std::tie(Pointer->Quals, Pointer->Affinity) =
+ demanglePointerCVQualifiers(MangledName);
- Pointer->Prim =
- (Affinity == PointerAffinity::Pointer) ? PrimTy::Ptr : PrimTy::Ref;
+ Pointer->Prim = PrimTy::Ptr;
if (MangledName.consumeFront("6")) {
- Pointer->Pointee = demangleFunctionType(false, true);
+ Pointer->Pointee = demangleFunctionType(MangledName, false, true);
return Pointer;
}
- Qualifiers ExtQuals = demanglePointerExtQualifiers();
+ Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
- Pointer->Pointee = demangleType(QualifierMangleMode::Mangle);
+ Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle);
return Pointer;
}
-MemberPointerType *Demangler::demangleMemberPointerType() {
+MemberPointerType *
+Demangler::demangleMemberPointerType(StringView &MangledName) {
MemberPointerType *Pointer = Arena.alloc<MemberPointerType>();
Pointer->Prim = PrimTy::MemberPtr;
@@ -1494,27 +1828,27 @@ MemberPointerType *Demangler::demangleMemberPointerType() {
std::tie(Pointer->Quals, Affinity) = demanglePointerCVQualifiers(MangledName);
assert(Affinity == PointerAffinity::Pointer);
- Qualifiers ExtQuals = demanglePointerExtQualifiers();
+ Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
if (MangledName.consumeFront("8")) {
- Pointer->MemberName = demangleName();
- Pointer->Pointee = demangleFunctionType(true, true);
+ Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName);
+ Pointer->Pointee = demangleFunctionType(MangledName, true, true);
} else {
Qualifiers PointeeQuals = Q_None;
bool IsMember = false;
- std::tie(PointeeQuals, IsMember) = demangleQualifiers();
+ std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
assert(IsMember);
- Pointer->MemberName = demangleName();
+ Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName);
- Pointer->Pointee = demangleType(QualifierMangleMode::Drop);
+ Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
Pointer->Pointee->Quals = PointeeQuals;
}
return Pointer;
}
-Qualifiers Demangler::demanglePointerExtQualifiers() {
+Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) {
Qualifiers Quals = Q_None;
if (MangledName.consumeFront('E'))
Quals = Qualifiers(Quals | Q_Pointer64);
@@ -1526,11 +1860,11 @@ Qualifiers Demangler::demanglePointerExtQualifiers() {
return Quals;
}
-ArrayType *Demangler::demangleArrayType() {
+ArrayType *Demangler::demangleArrayType(StringView &MangledName) {
assert(MangledName.front() == 'Y');
MangledName.popFront();
- int Dimension = demangleNumber();
+ int Dimension = demangleNumber(MangledName);
if (Dimension <= 0) {
Error = true;
return nullptr;
@@ -1540,7 +1874,7 @@ ArrayType *Demangler::demangleArrayType() {
ArrayType *Dim = ATy;
for (int I = 0; I < Dimension; ++I) {
Dim->Prim = PrimTy::Array;
- Dim->ArrayDimension = demangleNumber();
+ Dim->ArrayDimension = demangleNumber(MangledName);
Dim->NextDimension = Arena.alloc<ArrayType>();
Dim = Dim->NextDimension;
}
@@ -1554,19 +1888,20 @@ ArrayType *Demangler::demangleArrayType() {
Error = true;
}
- ATy->ElementType = demangleType(QualifierMangleMode::Drop);
+ ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
Dim->ElementType = ATy->ElementType;
return ATy;
}
// Reads a function or a template parameters.
-ParamList Demangler::demangleFunctionParameterList() {
+FunctionParams
+Demangler::demangleFunctionParameterList(StringView &MangledName) {
// Empty parameter list.
if (MangledName.consumeFront('X'))
return {};
- ParamList *Head;
- ParamList **Current = &Head;
+ FunctionParams *Head;
+ FunctionParams **Current = &Head;
while (!Error && !MangledName.startsWith('@') &&
!MangledName.startsWith('Z')) {
@@ -1578,7 +1913,7 @@ ParamList Demangler::demangleFunctionParameterList() {
}
MangledName = MangledName.dropFront();
- *Current = Arena.alloc<ParamList>();
+ *Current = Arena.alloc<FunctionParams>();
(*Current)->Current = FunctionParamBackRefs[N]->clone(Arena);
Current = &(*Current)->Next;
continue;
@@ -1586,8 +1921,8 @@ ParamList Demangler::demangleFunctionParameterList() {
size_t OldSize = MangledName.size();
- *Current = Arena.alloc<ParamList>();
- (*Current)->Current = demangleType(QualifierMangleMode::Drop);
+ *Current = Arena.alloc<FunctionParams>();
+ (*Current)->Current = demangleType(MangledName, QualifierMangleMode::Drop);
size_t CharsConsumed = OldSize - MangledName.size();
assert(CharsConsumed != 0);
@@ -1618,14 +1953,33 @@ ParamList Demangler::demangleFunctionParameterList() {
return {};
}
-ParamList Demangler::demangleTemplateParameterList() {
- ParamList *Head;
- ParamList **Current = &Head;
+TemplateParams *
+Demangler::demangleTemplateParameterList(StringView &MangledName) {
+ TemplateParams *Head;
+ TemplateParams **Current = &Head;
while (!Error && !MangledName.startsWith('@')) {
-
// Template parameter lists don't participate in back-referencing.
- *Current = Arena.alloc<ParamList>();
- (*Current)->Current = demangleType(QualifierMangleMode::Drop);
+ *Current = Arena.alloc<TemplateParams>();
+
+ // Empty parameter pack.
+ if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
+ MangledName.consumeFront("$$$V")) {
+ if (!MangledName.startsWith('@'))
+ Error = true;
+ continue;
+ }
+
+ if (MangledName.consumeFront("$$Y")) {
+ (*Current)->IsTemplateTemplate = true;
+ (*Current)->IsAliasTemplate = true;
+ (*Current)->ParamName = demangleFullyQualifiedTypeName(MangledName);
+ } else if (MangledName.consumeFront("$1?")) {
+ (*Current)->ParamName = demangleFullyQualifiedSymbolName(MangledName);
+ (*Current)->ParamType = demangleFunctionEncoding(MangledName);
+ } else {
+ (*Current)->ParamType =
+ demangleType(MangledName, QualifierMangleMode::Drop);
+ }
Current = &(*Current)->Next;
}
@@ -1636,12 +1990,12 @@ ParamList Demangler::demangleTemplateParameterList() {
// Template parameter lists cannot be variadic, so it can only be terminated
// by @.
if (MangledName.consumeFront('@'))
- return *Head;
+ return Head;
Error = true;
return {};
}
-void Demangler::output() {
+void Demangler::output(const Symbol *S, OutputStream &OS) {
// Converts an AST to a string.
//
// Converting an AST representing a C++ type to a string is tricky due
@@ -1659,26 +2013,24 @@ void Demangler::output() {
// the "first half" of type declaration, and outputPost() writes the
// "second half". For example, outputPre() writes a return type for a
// function and outputPost() writes an parameter list.
- Type::outputPre(OS, *SymbolType);
- outputName(OS, SymbolName);
- Type::outputPost(OS, *SymbolType);
-
- // Null terminate the buffer.
- OS << '\0';
+ Type::outputPre(OS, *S->SymbolType);
+ outputName(OS, S->SymbolName);
+ Type::outputPost(OS, *S->SymbolType);
}
char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N,
int *Status) {
- OutputStream OS = OutputStream::create(Buf, N, 1024);
-
- Demangler D(OS, StringView(MangledName));
- D.parse();
+ Demangler D;
+ StringView Name{MangledName};
+ Symbol *S = D.parse(Name);
if (D.Error)
*Status = llvm::demangle_invalid_mangled_name;
else
*Status = llvm::demangle_success;
- D.output();
+ OutputStream OS = OutputStream::create(Buf, N, 1024);
+ D.output(S, OS);
+ OS << '\0';
return OS.getBuffer();
}
diff --git a/lib/Demangle/StringView.h b/lib/Demangle/StringView.h
index 3416db2c2867..a89deda694c2 100644
--- a/lib/Demangle/StringView.h
+++ b/lib/Demangle/StringView.h
@@ -22,6 +22,8 @@ class StringView {
const char *Last;
public:
+ static const size_t npos = ~size_t(0);
+
template <size_t N>
StringView(const char (&Str)[N]) : First(Str), Last(Str + N - 1) {}
StringView(const char *First_, const char *Last_)
@@ -35,6 +37,17 @@ public:
return StringView(begin() + From, size() - From);
}
+ size_t find(char C, size_t From = 0) const {
+ size_t FindBegin = std::min(From, size());
+ // Avoid calling memchr with nullptr.
+ if (FindBegin < size()) {
+ // Just forward to memchr, which is faster than a hand-rolled loop.
+ if (const void *P = ::memchr(First + FindBegin, C, size() - FindBegin))
+ return static_cast<const char *>(P) - First;
+ }
+ return npos;
+ }
+
StringView substr(size_t From, size_t To) const {
if (To >= size())
To = size() - 1;
@@ -49,11 +62,22 @@ public:
return StringView(First + N, Last);
}
+ StringView dropBack(size_t N = 1) const {
+ if (N >= size())
+ N = size();
+ return StringView(First, Last - N);
+ }
+
char front() const {
assert(!empty());
return *begin();
}
+ char back() const {
+ assert(!empty());
+ return *(end() - 1);
+ }
+
char popFront() {
assert(!empty());
return *First++;
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index abcdaeba8eb0..3be4bec566a0 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -153,7 +153,7 @@ void LLVMInitializeMCJITCompilerOptions(LLVMMCJITCompilerOptions *PassedOptions,
LLVMMCJITCompilerOptions options;
memset(&options, 0, sizeof(options)); // Most fields are zero by default.
options.CodeModel = LLVMCodeModelJITDefault;
-
+
memcpy(PassedOptions, &options,
std::min(sizeof(options), SizeOfPassedOptions));
}
@@ -171,14 +171,14 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
"LLVM library mismatch.");
return 1;
}
-
+
// Defend against the user having an old version of the API by ensuring that
// any fields they didn't see are cleared. We must defend against fields being
// set to the bitwise equivalent of zero, and assume that this means "do the
// default" as if that option hadn't been available.
LLVMInitializeMCJITCompilerOptions(&options, sizeof(options));
memcpy(&options, PassedOptions, SizeOfPassedOptions);
-
+
TargetOptions targetOptions;
targetOptions.EnableFastISel = options.EnableFastISel;
std::unique_ptr<Module> Mod(unwrap(M));
@@ -241,12 +241,12 @@ LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
unsigned NumArgs,
LLVMGenericValueRef *Args) {
unwrap(EE)->finalizeObject();
-
+
std::vector<GenericValue> ArgVec;
ArgVec.reserve(NumArgs);
for (unsigned I = 0; I != NumArgs; ++I)
ArgVec.push_back(*unwrap(Args[I]));
-
+
GenericValue *Result = new GenericValue();
*Result = unwrap(EE)->runFunction(unwrap<Function>(F), ArgVec);
return wrap(Result);
@@ -297,7 +297,7 @@ void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) {
unwrap(EE)->finalizeObject();
-
+
return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global));
}
@@ -395,11 +395,11 @@ LLVMMCJITMemoryManagerRef LLVMCreateSimpleMCJITMemoryManager(
LLVMMemoryManagerAllocateDataSectionCallback AllocateDataSection,
LLVMMemoryManagerFinalizeMemoryCallback FinalizeMemory,
LLVMMemoryManagerDestroyCallback Destroy) {
-
+
if (!AllocateCodeSection || !AllocateDataSection || !FinalizeMemory ||
!Destroy)
return nullptr;
-
+
SimpleBindingMMFunctions functions;
functions.AllocateCodeSection = AllocateCodeSection;
functions.AllocateDataSection = AllocateDataSection;
diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
index 1f029fb1c45b..61d8cc75d9f2 100644
--- a/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
+++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
@@ -7,7 +7,7 @@
*
*===----------------------------------------------------------------------===*
*
- * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
* Profiling API internal config.
*
* NOTE: This file comes in a style different from the rest of LLVM
@@ -213,7 +213,7 @@ typedef pthread_mutex_t mutex_t;
#define __itt_thread_id() GetCurrentThreadId()
#define __itt_thread_yield() SwitchToThread()
#ifndef ITT_SIMPLE_INIT
-ITT_INLINE long
+ITT_INLINE long
__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
{
@@ -273,7 +273,7 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
}
#endif /* ITT_ARCH==ITT_ARCH_IA64 */
#ifndef ITT_SIMPLE_INIT
-ITT_INLINE long
+ITT_INLINE long
__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
{
diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
index 8d16ee85d141..efd2b1a33f75 100644
--- a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
+++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
@@ -7,7 +7,7 @@
*
*===----------------------------------------------------------------------===*
*
- * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
* Profiling API declaration.
*
* NOTE: This file comes in a style different from the rest of LLVM
@@ -28,54 +28,54 @@ typedef enum iJIT_jvm_event
{
/* shutdown */
-
- /*
+
+ /*
* Program exiting EventSpecificData NA
*/
- iJVM_EVENT_TYPE_SHUTDOWN = 2,
+ iJVM_EVENT_TYPE_SHUTDOWN = 2,
/* JIT profiling */
-
- /*
+
+ /*
* issued after method code jitted into memory but before code is executed
* EventSpecificData is an iJIT_Method_Load
*/
- iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED=13,
+ iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED=13,
- /* issued before unload. Method code will no longer be executed, but code
- * and info are still in memory. The VTune profiler may capture method
+ /* issued before unload. Method code will no longer be executed, but code
+ * and info are still in memory. The VTune profiler may capture method
* code only at this point EventSpecificData is iJIT_Method_Id
*/
- iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
+ iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
/* Method Profiling */
- /* method name, Id and stack is supplied
- * issued when a method is about to be entered EventSpecificData is
+ /* method name, Id and stack is supplied
+ * issued when a method is about to be entered EventSpecificData is
* iJIT_Method_NIDS
*/
- iJVM_EVENT_TYPE_ENTER_NIDS = 19,
+ iJVM_EVENT_TYPE_ENTER_NIDS = 19,
- /* method name, Id and stack is supplied
- * issued when a method is about to be left EventSpecificData is
+ /* method name, Id and stack is supplied
+ * issued when a method is about to be left EventSpecificData is
* iJIT_Method_NIDS
*/
- iJVM_EVENT_TYPE_LEAVE_NIDS
+ iJVM_EVENT_TYPE_LEAVE_NIDS
} iJIT_JVM_EVENT;
typedef enum _iJIT_ModeFlags
{
/* No need to Notify VTune, since VTune is not running */
- iJIT_NO_NOTIFICATIONS = 0x0000,
+ iJIT_NO_NOTIFICATIONS = 0x0000,
- /* when turned on the jit must call
+ /* when turned on the jit must call
* iJIT_NotifyEvent
* (
* iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
* )
* for all the method already jitted
*/
- iJIT_BE_NOTIFY_ON_LOAD = 0x0001,
+ iJIT_BE_NOTIFY_ON_LOAD = 0x0001,
/* when turned on the jit must call
* iJIT_NotifyEvent
@@ -83,19 +83,19 @@ typedef enum _iJIT_ModeFlags
* iJVM_EVENT_TYPE_METHOD_UNLOAD_FINISHED,
* ) for all the method that are unloaded
*/
- iJIT_BE_NOTIFY_ON_UNLOAD = 0x0002,
+ iJIT_BE_NOTIFY_ON_UNLOAD = 0x0002,
/* when turned on the jit must instrument all
* the currently jited code with calls on
* method entries
*/
- iJIT_BE_NOTIFY_ON_METHOD_ENTRY = 0x0004,
+ iJIT_BE_NOTIFY_ON_METHOD_ENTRY = 0x0004,
/* when turned on the jit must instrument all
* the currently jited code with calls
* on method exit
*/
- iJIT_BE_NOTIFY_ON_METHOD_EXIT = 0x0008
+ iJIT_BE_NOTIFY_ON_METHOD_EXIT = 0x0008
} iJIT_ModeFlags;
@@ -104,13 +104,13 @@ typedef enum _iJIT_ModeFlags
typedef enum _iJIT_IsProfilingActiveFlags
{
/* No profiler is running. Currently not used */
- iJIT_NOTHING_RUNNING = 0x0000,
+ iJIT_NOTHING_RUNNING = 0x0000,
/* Sampling is running. This is the default value
* returned by iJIT_IsProfilingActive()
*/
- iJIT_SAMPLING_ON = 0x0001,
-
+ iJIT_SAMPLING_ON = 0x0001,
+
/* Call Graph is running */
iJIT_CALLGRAPH_ON = 0x0002
@@ -135,7 +135,7 @@ typedef struct _iJIT_Method_Id
/* Id of the method (same as the one passed in
* the iJIT_Method_Load struct
*/
- unsigned int method_id;
+ unsigned int method_id;
} *piJIT_Method_Id, iJIT_Method_Id;
@@ -149,13 +149,13 @@ typedef struct _iJIT_Method_Id
typedef struct _iJIT_Method_NIDS
{
/* unique method ID */
- unsigned int method_id;
+ unsigned int method_id;
/* NOTE: no need to fill this field, it's filled by VTune */
- unsigned int stack_id;
+ unsigned int stack_id;
/* method name (just the method, without the class) */
- char* method_name;
+ char* method_name;
} *piJIT_Method_NIDS, iJIT_Method_NIDS;
/* structures for the events:
@@ -168,51 +168,51 @@ typedef struct _LineNumberInfo
unsigned int Offset;
/* source line number from the beginning of the source file */
- unsigned int LineNumber;
+ unsigned int LineNumber;
} *pLineNumberInfo, LineNumberInfo;
typedef struct _iJIT_Method_Load
{
/* unique method ID - can be any unique value, (except 0 - 999) */
- unsigned int method_id;
+ unsigned int method_id;
/* method name (can be with or without the class and signature, in any case
* the class name will be added to it)
*/
- char* method_name;
+ char* method_name;
/* virtual address of that method - This determines the method range for the
* iJVM_EVENT_TYPE_ENTER/LEAVE_METHOD_ADDR events
*/
- void* method_load_address;
+ void* method_load_address;
/* Size in memory - Must be exact */
- unsigned int method_size;
+ unsigned int method_size;
/* Line Table size in number of entries - Zero if none */
unsigned int line_number_size;
/* Pointer to the beginning of the line numbers info array */
- pLineNumberInfo line_number_table;
+ pLineNumberInfo line_number_table;
/* unique class ID */
- unsigned int class_id;
-
+ unsigned int class_id;
+
/* class file name */
- char* class_file_name;
+ char* class_file_name;
/* source file name */
- char* source_file_name;
+ char* source_file_name;
/* bits supplied by the user for saving in the JIT file */
- void* user_data;
+ void* user_data;
/* the size of the user data buffer */
- unsigned int user_data_size;
+ unsigned int user_data_size;
/* NOTE: no need to fill this field, it's filled by VTune */
- iJDEnvironmentType env;
+ iJDEnvironmentType env;
} *piJIT_Method_Load, iJIT_Method_Load;
@@ -241,7 +241,7 @@ typedef void (*iJIT_ModeChangedEx)(void *UserData, iJIT_ModeFlags Flags);
int JITAPI iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData);
/* The new mode call back routine */
-void JITAPI iJIT_RegisterCallbackEx(void *userdata,
+void JITAPI iJIT_RegisterCallbackEx(void *userdata,
iJIT_ModeChangedEx NewModeCallBackFuncEx);
iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive(void);
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 9e77d160c30b..39cf6d4a32a3 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -85,7 +85,7 @@ static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
}
}
-static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
+static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
GenericValue Src2, Type *Ty) {
switch (Ty->getTypeID()) {
IMPLEMENT_BINARY_OPERATOR(/, Float);
@@ -96,7 +96,7 @@ static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
}
}
-static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
+static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
GenericValue Src2, Type *Ty) {
switch (Ty->getTypeID()) {
case Type::FloatTyID:
@@ -281,7 +281,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue R; // Result
-
+
switch (I.getPredicate()) {
case ICmpInst::ICMP_EQ: R = executeICMP_EQ(Src1, Src2, Ty); break;
case ICmpInst::ICMP_NE: R = executeICMP_NE(Src1, Src2, Ty); break;
@@ -297,7 +297,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
dbgs() << "Don't know how to handle this ICmp predicate!\n-->" << I;
llvm_unreachable(nullptr);
}
-
+
SetValue(&I, R, SF);
}
@@ -552,10 +552,10 @@ static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
Src2.AggregateVal[_i].DoubleVal)));
}
} else if (Ty->isFloatTy())
- Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal &&
+ Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal &&
Src2.FloatVal == Src2.FloatVal));
else {
- Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal &&
+ Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal &&
Src2.DoubleVal == Src2.DoubleVal));
}
return Dest;
@@ -583,10 +583,10 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
Src2.AggregateVal[_i].DoubleVal)));
}
} else if (Ty->isFloatTy())
- Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal ||
+ Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal ||
Src2.FloatVal != Src2.FloatVal));
else {
- Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal ||
+ Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal ||
Src2.DoubleVal != Src2.DoubleVal));
}
return Dest;
@@ -613,15 +613,15 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue R; // Result
-
+
switch (I.getPredicate()) {
default:
dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
llvm_unreachable(nullptr);
break;
- case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false);
+ case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false);
break;
- case FCmpInst::FCMP_TRUE: R = executeFCMP_BOOL(Src1, Src2, Ty, true);
+ case FCmpInst::FCMP_TRUE: R = executeFCMP_BOOL(Src1, Src2, Ty, true);
break;
case FCmpInst::FCMP_ORD: R = executeFCMP_ORD(Src1, Src2, Ty); break;
case FCmpInst::FCMP_UNO: R = executeFCMP_UNO(Src1, Src2, Ty); break;
@@ -638,11 +638,11 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break;
case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break;
}
-
+
SetValue(&I, R, SF);
}
-static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
+static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
GenericValue Src2, Type *Ty) {
GenericValue Result;
switch (predicate) {
@@ -747,12 +747,12 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
case Instruction::FRem:
if (cast<VectorType>(Ty)->getElementType()->isFloatTy())
for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
- R.AggregateVal[i].FloatVal =
+ R.AggregateVal[i].FloatVal =
fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal);
else {
if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())
for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
- R.AggregateVal[i].DoubleVal =
+ R.AggregateVal[i].DoubleVal =
fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal);
else {
dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
@@ -965,7 +965,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) {
Type *Ty = I.getType()->getElementType(); // Type to be allocated
// Get the number of elements being allocated by the array...
- unsigned NumElements =
+ unsigned NumElements =
getOperandValue(I.getOperand(0), SF).IntVal.getZExtValue();
unsigned TypeSize = (size_t)getDataLayout().getTypeAllocSize(Ty);
@@ -1011,7 +1011,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
GenericValue IdxGV = getOperandValue(I.getOperand(), SF);
int64_t Idx;
- unsigned BitWidth =
+ unsigned BitWidth =
cast<IntegerType>(I.getOperand()->getType())->getBitWidth();
if (BitWidth == 32)
Idx = (int64_t)(int32_t)IdxGV.IntVal.getZExtValue();
@@ -2037,13 +2037,13 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
case Instruction::And: Dest.IntVal = Op0.IntVal & Op1.IntVal; break;
case Instruction::Or: Dest.IntVal = Op0.IntVal | Op1.IntVal; break;
case Instruction::Xor: Dest.IntVal = Op0.IntVal ^ Op1.IntVal; break;
- case Instruction::Shl:
+ case Instruction::Shl:
Dest.IntVal = Op0.IntVal.shl(Op1.IntVal.getZExtValue());
break;
- case Instruction::LShr:
+ case Instruction::LShr:
Dest.IntVal = Op0.IntVal.lshr(Op1.IntVal.getZExtValue());
break;
- case Instruction::AShr:
+ case Instruction::AShr:
Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue());
break;
default:
@@ -2100,7 +2100,7 @@ void Interpreter::callFunction(Function *F, ArrayRef<GenericValue> ArgVals) {
// Handle non-varargs arguments...
unsigned i = 0;
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI, ++i)
SetValue(&*AI, ArgVals[i], StackFrame);
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 5c16448404bb..33542e7e43ad 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -132,8 +132,8 @@ public:
void visitLoadInst(LoadInst &I);
void visitStoreInst(StoreInst &I);
void visitGetElementPtrInst(GetElementPtrInst &I);
- void visitPHINode(PHINode &PN) {
- llvm_unreachable("PHI nodes already handled!");
+ void visitPHINode(PHINode &PN) {
+ llvm_unreachable("PHI nodes already handled!");
}
void visitTruncInst(TruncInst &I);
void visitZExtInst(ZExtInst &I);
@@ -224,7 +224,7 @@ private: // Helper functions
ExecutionContext &SF);
GenericValue executeBitCastInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal,
+ GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal,
Type *Ty, ExecutionContext &SF);
void popStackAndReturnValueToCaller(Type *RetTy, GenericValue Result);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index e774af05ebdd..75d4c2b5134e 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
@@ -119,10 +119,10 @@ void RTDyldMemoryManager::deregisterEHFramesInProcess(uint8_t *Addr,
void RTDyldMemoryManager::registerEHFramesInProcess(uint8_t *Addr,
size_t Size) {
- // On Linux __register_frame takes a single argument:
+ // On Linux __register_frame takes a single argument:
// a pointer to the start of the .eh_frame section.
- // How can it find the end? Because crtendS.o is linked
+ // How can it find the end? Because crtendS.o is linked
// in and it has an .eh_frame section with four zero chars.
__register_frame(Addr);
}
@@ -255,7 +255,7 @@ RTDyldMemoryManager::getSymbolAddressInProcess(const std::string &Name) {
return (uint64_t)&__morestack;
#endif
#endif // __linux__ && __GLIBC__
-
+
// See ARM_MATH_IMPORTS definition for explanation
#if defined(__BIONIC__) && defined(__arm__)
if (Name.compare(0, 8, "__aeabi_") == 0) {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index cc6729d21320..f9a81c7bd1b0 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -1430,7 +1430,7 @@ RuntimeDyldELF::processRelocationRef(
} else {
processSimpleRelocation(SectionID, Offset, RelType, Value);
}
-
+
} else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) {
if (RelType == ELF::R_PPC64_REL24) {
// Determine ABI variant in use for this object.
diff --git a/lib/FuzzMutate/FuzzerCLI.cpp b/lib/FuzzMutate/FuzzerCLI.cpp
index 6f5a5c067a97..a70dad37dfcf 100644
--- a/lib/FuzzMutate/FuzzerCLI.cpp
+++ b/lib/FuzzMutate/FuzzerCLI.cpp
@@ -93,7 +93,7 @@ void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) {
Args.push_back("-passes=gvn");
} else if (Opt == "sccp") {
Args.push_back("-passes=sccp");
-
+
} else if (Opt == "loop_predication") {
Args.push_back("-passes=loop-predication");
} else if (Opt == "guard_widening") {
@@ -114,7 +114,7 @@ void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) {
Args.push_back("-passes=strength-reduce");
} else if (Opt == "irce") {
Args.push_back("-passes=irce");
-
+
} else if (Triple(Opt).getArch()) {
Args.push_back("-mtriple=" + Opt.str());
} else {
@@ -204,6 +204,6 @@ std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
auto M = parseModule(Data, Size, Context);
if (!M || verifyModule(*M, &errs()))
return nullptr;
-
+
return M;
}
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index 9e5f55d49756..d87187481be0 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -1709,6 +1709,15 @@ adjustMinLegalVectorWidth(Function &Caller, const Function &Callee) {
}
}
+/// If the inlined function has "null-pointer-is-valid=true" attribute,
+/// set this attribute in the caller post inlining.
+static void
+adjustNullPointerValidAttr(Function &Caller, const Function &Callee) {
+ if (Callee.nullPointerIsDefined() && !Caller.nullPointerIsDefined()) {
+ Caller.addFnAttr(Callee.getFnAttribute("null-pointer-is-valid"));
+ }
+}
+
#define GET_ATTR_COMPAT_FUNC
#include "AttributesCompatFunc.inc"
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index ef62a23b5358..f098ad9725b6 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -94,7 +94,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
- Name.startswith("avx512.kunpck") || //added in 6.0
+ Name.startswith("avx512.kunpck") || //added in 6.0
Name.startswith("avx2.pabs.") || // Added in 6.0
Name.startswith("avx512.mask.pabs.") || // Added in 6.0
Name.startswith("avx512.broadcastm") || // Added in 6.0
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index aba329b80508..72090f5bac3e 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -586,7 +586,7 @@ static std::string getMangledTypeStr(Type* Ty) {
if (FT->isVarArg())
Result += "vararg";
// Ensure nested function types are distinguishable.
- Result += "f";
+ Result += "f";
} else if (isa<VectorType>(Ty)) {
Result += "v" + utostr(Ty->getVectorNumElements()) +
getMangledTypeStr(Ty->getVectorElementType());
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index 8667d7aab583..4623f69bd9a3 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -57,7 +57,7 @@ void InlineAsm::destroyConstant() {
FunctionType *InlineAsm::getFunctionType() const {
return FTy;
}
-
+
/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
/// fields in this structure. If the constraint string is not understood,
/// return true, otherwise return false.
@@ -80,7 +80,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
isCommutative = false;
isIndirect = false;
currentAlternativeIndex = 0;
-
+
// Parse prefixes.
if (*I == '~') {
Type = isClobber;
@@ -100,7 +100,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
}
if (I == E) return true; // Just a prefix, like "==" or "~".
-
+
// Parse the modifiers.
bool DoneWithModifiers = false;
while (!DoneWithModifiers) {
@@ -124,13 +124,13 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
case '*': // Register preferencing.
return true; // Not supported.
}
-
+
if (!DoneWithModifiers) {
++I;
if (I == E) return true; // Just prefixes and modifiers!
}
}
-
+
// Parse the various constraints.
while (I != E) {
if (*I == '{') { // Physical register reference.
@@ -150,7 +150,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
if (N >= ConstraintsSoFar.size() || ConstraintsSoFar[N].Type != isOutput||
Type != isInput)
return true; // Invalid constraint number.
-
+
// If Operand N already has a matching input, reject this. An output
// can't be constrained to the same value as multiple inputs.
if (isMultipleAlternative) {
@@ -207,7 +207,7 @@ void InlineAsm::ConstraintInfo::selectAlternative(unsigned index) {
InlineAsm::ConstraintInfoVector
InlineAsm::ParseConstraints(StringRef Constraints) {
ConstraintInfoVector Result;
-
+
// Scan the constraints string.
for (StringRef::iterator I = Constraints.begin(),
E = Constraints.end(); I != E; ) {
@@ -223,7 +223,7 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
}
Result.push_back(Info);
-
+
// ConstraintEnd may be either the next comma or the end of the string. In
// the former case, we skip the comma.
I = ConstraintEnd;
@@ -235,7 +235,7 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
} // don't allow "xyz,"
}
}
-
+
return Result;
}
@@ -243,15 +243,15 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
/// specified function type, and otherwise validate the constraint string.
bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
if (Ty->isVarArg()) return false;
-
+
ConstraintInfoVector Constraints = ParseConstraints(ConstStr);
-
+
// Error parsing constraints.
if (Constraints.empty() && !ConstStr.empty()) return false;
-
+
unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0;
unsigned NumIndirect = 0;
-
+
for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
switch (Constraints[i].Type) {
case InlineAsm::isOutput:
@@ -272,7 +272,7 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
break;
}
}
-
+
switch (NumOutputs) {
case 0:
if (!Ty->getReturnType()->isVoidTy()) return false;
@@ -285,8 +285,8 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
if (!STy || STy->getNumElements() != NumOutputs)
return false;
break;
- }
-
+ }
+
if (Ty->getNumParams() != NumInputs) return false;
return true;
}
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index e0ad0d1ea1f1..32db918dab97 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -310,7 +310,7 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
"Calling a function with bad signature!");
for (unsigned i = 0; i != Args.size(); ++i)
- assert((i >= FTy->getNumParams() ||
+ assert((i >= FTy->getNumParams() ||
FTy->getParamType(i) == Args[i]->getType()) &&
"Calling a function with a bad signature!");
#endif
@@ -409,7 +409,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
"createMalloc needs either InsertBefore or InsertAtEnd");
- // malloc(type) becomes:
+ // malloc(type) becomes:
// bitcast (i8* malloc(typeSize)) to type*
// malloc(type, arraySize) becomes:
// bitcast (i8* malloc(typeSize*arraySize)) to type*
@@ -516,7 +516,7 @@ Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
/// responsibility of the caller.
Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
Type *IntPtrTy, Type *AllocTy,
- Value *AllocSize, Value *ArraySize,
+ Value *AllocSize, Value *ArraySize,
Function *MallocF, const Twine &Name) {
return createMalloc(nullptr, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
ArraySize, None, MallocF, Name);
@@ -612,7 +612,7 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
"Invoking a function with bad signature");
for (unsigned i = 0, e = Args.size(); i != e; i++)
- assert((i >= FTy->getNumParams() ||
+ assert((i >= FTy->getNumParams() ||
FTy->getParamType(i) == Args[i]->getType()) &&
"Invoking a function with a bad signature!");
#endif
@@ -912,7 +912,7 @@ FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
// UnreachableInst Implementation
//===----------------------------------------------------------------------===//
-UnreachableInst::UnreachableInst(LLVMContext &Context,
+UnreachableInst::UnreachableInst(LLVMContext &Context,
Instruction *InsertBefore)
: TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
nullptr, 0, InsertBefore) {
@@ -1072,7 +1072,7 @@ bool AllocaInst::isArrayAllocation() const {
bool AllocaInst::isStaticAlloca() const {
// Must be constant size.
if (!isa<ConstantInt>(getArraySize())) return false;
-
+
// Must be in the entry block.
const BasicBlock *Parent = getParent();
return Parent == &Parent->getParent()->front() && !isUsedWithInAlloca();
@@ -1125,7 +1125,7 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
setName(Name);
}
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
unsigned Align, AtomicOrdering Order,
SyncScope::ID SSID,
BasicBlock *InsertAE)
@@ -1380,7 +1380,7 @@ AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
// FenceInst Implementation
//===----------------------------------------------------------------------===//
-FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
+FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
SyncScope::ID SSID,
Instruction *InsertBefore)
: Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertBefore) {
@@ -1388,7 +1388,7 @@ FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
setSyncScopeID(SSID);
}
-FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
+FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
SyncScope::ID SSID,
BasicBlock *InsertAtEnd)
: Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertAtEnd) {
@@ -1575,14 +1575,14 @@ InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
setName(Name);
}
-bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
+bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
const Value *Index) {
if (!Vec->getType()->isVectorTy())
return false; // First operand of insertelement must be vector type.
-
+
if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
return false;// Second operand of insertelement must be vector element type.
-
+
if (!Index->getType()->isIntegerTy())
return false; // Third operand of insertelement must be i32.
return true;
@@ -1632,7 +1632,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
// V1 and V2 must be vectors of the same type.
if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
return false;
-
+
// Mask must be vector of i32.
auto *MaskTy = dyn_cast<VectorType>(Mask->getType());
if (!MaskTy || !MaskTy->getElementType()->isIntegerTy(32))
@@ -1654,7 +1654,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
}
return true;
}
-
+
if (const auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {
unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i)
@@ -1662,7 +1662,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
return false;
return true;
}
-
+
// The bitcode reader can create a place holder for a forward reference
// used as the shuffle mask. When this occurs, the shuffle mask will
// fall into this case and fail. To avoid this error, do this bit of
@@ -1687,12 +1687,12 @@ int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) {
void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
SmallVectorImpl<int> &Result) {
unsigned NumElts = Mask->getType()->getVectorNumElements();
-
+
if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {
for (unsigned i = 0; i != NumElts; ++i)
Result.push_back(CDS->getElementAsInteger(i));
return;
- }
+ }
for (unsigned i = 0; i != NumElts; ++i) {
Constant *C = Mask->getAggregateElement(i);
Result.push_back(isa<UndefValue>(C) ? -1 :
@@ -1806,7 +1806,7 @@ bool ShuffleVectorInst::isTransposeMask(ArrayRef<int> Mask) {
// InsertValueInst Class
//===----------------------------------------------------------------------===//
-void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
+void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
const Twine &Name) {
assert(getNumOperands() == 2 && "NumOperands not initialized?");
@@ -1903,7 +1903,7 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
AssertOK();
}
-BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
Type *Ty, const Twine &Name,
BasicBlock *InsertAtEnd)
: Instruction(Ty, iType,
@@ -1938,8 +1938,8 @@ void BinaryOperator::AssertOK() {
"Tried to create a floating-point operation on a "
"non-floating-point type!");
break;
- case UDiv:
- case SDiv:
+ case UDiv:
+ case SDiv:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
assert(getType()->isIntOrIntVectorTy() &&
@@ -1951,8 +1951,8 @@ void BinaryOperator::AssertOK() {
assert(getType()->isFPOrFPVectorTy() &&
"Incorrect operand type (not floating point) for FDIV");
break;
- case URem:
- case SRem:
+ case URem:
+ case SRem:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
assert(getType()->isIntOrIntVectorTy() &&
@@ -2185,7 +2185,7 @@ bool CastInst::isLosslessCast() const {
Type *DstTy = getType();
if (SrcTy == DstTy)
return true;
-
+
// Pointer to pointer is always lossless.
if (SrcTy->isPointerTy())
return DstTy->isPointerTy();
@@ -2194,10 +2194,10 @@ bool CastInst::isLosslessCast() const {
/// This function determines if the CastInst does not require any bits to be
/// changed in order to effect the cast. Essentially, it identifies cases where
-/// no code gen is necessary for the cast, hence the name no-op cast. For
+/// no code gen is necessary for the cast, hence the name no-op cast. For
/// example, the following are all no-op casts:
/// # bitcast i32* %x to i8*
-/// # bitcast <2 x i32> %x to <4 x i16>
+/// # bitcast <2 x i32> %x to <4 x i16>
/// # ptrtoint i32* %x to i32 ; on 32-bit plaforms only
/// Determine if the described cast is a no-op.
bool CastInst::isNoopCast(Instruction::CastOps Opcode,
@@ -2208,7 +2208,7 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode,
default: llvm_unreachable("Invalid CastOp");
case Instruction::Trunc:
case Instruction::ZExt:
- case Instruction::SExt:
+ case Instruction::SExt:
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::UIToFP:
@@ -2247,7 +2247,7 @@ unsigned CastInst::isEliminableCastPair(
Type *DstIntPtrTy) {
// Define the 144 possibilities for these two cast instructions. The values
// in this matrix determine what to do in a given situation and select the
- // case in the switch below. The rows correspond to firstOp, the columns
+ // case in the switch below. The rows correspond to firstOp, the columns
// correspond to secondOp. In looking at the table below, keep in mind
// the following cast properties:
//
@@ -2315,16 +2315,16 @@ unsigned CastInst::isEliminableCastPair(
int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
[secondOp-Instruction::CastOpsBegin];
switch (ElimCase) {
- case 0:
+ case 0:
// Categorically disallowed.
return 0;
- case 1:
+ case 1:
// Allowed, use first cast's opcode.
return firstOp;
- case 2:
+ case 2:
// Allowed, use second cast's opcode.
return secondOp;
- case 3:
+ case 3:
// No-op cast in second op implies firstOp as long as the DestTy
// is integer and we are not converting between a vector and a
// non-vector type.
@@ -2337,7 +2337,7 @@ unsigned CastInst::isEliminableCastPair(
if (DstTy->isFloatingPointTy())
return firstOp;
return 0;
- case 5:
+ case 5:
// No-op cast in first op implies secondOp as long as the SrcTy
// is an integer.
if (SrcTy->isIntegerTy())
@@ -2449,7 +2449,7 @@ unsigned CastInst::isEliminableCastPair(
case 17:
// (sitofp (zext x)) -> (uitofp x)
return Instruction::UIToFP;
- case 99:
+ case 99:
// Cast combination can't happen (error in input). This is for all cases
// where the MidTy is not the same for the two cast instructions.
llvm_unreachable("Invalid Cast Combination");
@@ -2458,7 +2458,7 @@ unsigned CastInst::isEliminableCastPair(
}
}
-CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
const Twine &Name, Instruction *InsertBefore) {
assert(castIsValid(op, S, Ty) && "Invalid cast!");
// Construct and return the appropriate CastInst subclass
@@ -2502,7 +2502,7 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
}
}
-CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
const Twine &Name,
Instruction *InsertBefore) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2510,7 +2510,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
const Twine &Name,
BasicBlock *InsertAtEnd) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2518,7 +2518,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
}
-CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
const Twine &Name,
Instruction *InsertBefore) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2526,7 +2526,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
const Twine &Name,
BasicBlock *InsertAtEnd) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2543,7 +2543,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty,
}
CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty,
- const Twine &Name,
+ const Twine &Name,
BasicBlock *InsertAtEnd) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
@@ -2636,7 +2636,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
return Create(opcode, C, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
+CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
bool isSigned, const Twine &Name,
BasicBlock *InsertAtEnd) {
assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
@@ -2650,8 +2650,8 @@ CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
return Create(opcode, C, Ty, Name, InsertAtEnd);
}
-CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
- const Twine &Name,
+CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
+ const Twine &Name,
Instruction *InsertBefore) {
assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
"Invalid cast");
@@ -2663,8 +2663,8 @@ CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
return Create(opcode, C, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
- const Twine &Name,
+CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
+ const Twine &Name,
BasicBlock *InsertAtEnd) {
assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
"Invalid cast");
@@ -2707,7 +2707,7 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) {
return DestBits == SrcBits;
// Casting from something else
return SrcTy->isPointerTy();
- }
+ }
if (DestTy->isFloatingPointTy()) { // Casting to floating pt
if (SrcTy->isIntegerTy()) // Casting from integral
return true;
@@ -2724,7 +2724,7 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) {
if (SrcTy->isPointerTy()) // Casting from pointer
return true;
return SrcTy->isIntegerTy(); // Casting from integral
- }
+ }
if (DestTy->isX86_MMXTy()) {
if (SrcTy->isVectorTy())
return DestBits == SrcBits; // 64-bit vector to MMX
@@ -2834,10 +2834,10 @@ CastInst::getCastOpcode(
return BitCast; // Same size, No-op cast
}
} else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
- if (DestIsSigned)
+ if (DestIsSigned)
return FPToSI; // FP -> sint
else
- return FPToUI; // FP -> uint
+ return FPToUI; // FP -> uint
} else if (SrcTy->isVectorTy()) {
assert(DestBits == SrcBits &&
"Casting vector to integer of different width");
@@ -2898,7 +2898,7 @@ CastInst::getCastOpcode(
/// could be broken out into the separate constructors but it is useful to have
/// it in one place and to eliminate the redundant code for getting the sizes
/// of the types involved.
-bool
+bool
CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
// Check for type sanity on the arguments
Type *SrcTy = S->getType();
@@ -2928,7 +2928,7 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
case Instruction::ZExt:
return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
SrcLength == DstLength && SrcBitSize < DstBitSize;
- case Instruction::SExt:
+ case Instruction::SExt:
return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
SrcLength == DstLength && SrcBitSize < DstBitSize;
case Instruction::FPTrunc:
@@ -3019,138 +3019,138 @@ TruncInst::TruncInst(
TruncInst::TruncInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) {
+) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
}
ZExtInst::ZExtInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, ZExt, S, Name, InsertBefore) {
+) : CastInst(Ty, ZExt, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
}
ZExtInst::ZExtInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, ZExt, S, Name, InsertAtEnd) {
+) : CastInst(Ty, ZExt, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
}
SExtInst::SExtInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, SExt, S, Name, InsertBefore) {
+) : CastInst(Ty, SExt, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
}
SExtInst::SExtInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, SExt, S, Name, InsertAtEnd) {
+) : CastInst(Ty, SExt, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
}
FPTruncInst::FPTruncInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) {
+) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
}
FPTruncInst::FPTruncInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) {
+) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
}
FPExtInst::FPExtInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, FPExt, S, Name, InsertBefore) {
+) : CastInst(Ty, FPExt, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
}
FPExtInst::FPExtInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) {
+) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
}
UIToFPInst::UIToFPInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, UIToFP, S, Name, InsertBefore) {
+) : CastInst(Ty, UIToFP, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
}
UIToFPInst::UIToFPInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) {
+) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
}
SIToFPInst::SIToFPInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, SIToFP, S, Name, InsertBefore) {
+) : CastInst(Ty, SIToFP, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
}
SIToFPInst::SIToFPInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) {
+) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
}
FPToUIInst::FPToUIInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, FPToUI, S, Name, InsertBefore) {
+) : CastInst(Ty, FPToUI, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
}
FPToUIInst::FPToUIInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) {
+) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
}
FPToSIInst::FPToSIInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, FPToSI, S, Name, InsertBefore) {
+) : CastInst(Ty, FPToSI, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
}
FPToSIInst::FPToSIInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) {
+) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
}
PtrToIntInst::PtrToIntInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) {
+) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
}
PtrToIntInst::PtrToIntInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) {
+) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
}
IntToPtrInst::IntToPtrInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) {
+) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
}
IntToPtrInst::IntToPtrInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) {
+) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
}
BitCastInst::BitCastInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, BitCast, S, Name, InsertBefore) {
+) : CastInst(Ty, BitCast, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
}
BitCastInst::BitCastInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) {
+) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
}
@@ -3205,7 +3205,7 @@ CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2,
return new ICmpInst(CmpInst::Predicate(predicate),
S1, S2, Name);
}
-
+
if (InsertBefore)
return new FCmpInst(InsertBefore, CmpInst::Predicate(predicate),
S1, S2, Name);
@@ -3312,8 +3312,8 @@ StringRef CmpInst::getPredicateName(Predicate Pred) {
ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
switch (pred) {
default: llvm_unreachable("Unknown icmp predicate!");
- case ICMP_EQ: case ICMP_NE:
- case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE:
+ case ICMP_EQ: case ICMP_NE:
+ case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE:
return pred;
case ICMP_UGT: return ICMP_SGT;
case ICMP_ULT: return ICMP_SLT;
@@ -3325,8 +3325,8 @@ ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
switch (pred) {
default: llvm_unreachable("Unknown icmp predicate!");
- case ICMP_EQ: case ICMP_NE:
- case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE:
+ case ICMP_EQ: case ICMP_NE:
+ case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE:
return pred;
case ICMP_SGT: return ICMP_UGT;
case ICMP_SLT: return ICMP_ULT;
@@ -3371,7 +3371,7 @@ CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
case ICMP_ULT: return ICMP_UGT;
case ICMP_UGE: return ICMP_ULE;
case ICMP_ULE: return ICMP_UGE;
-
+
case FCMP_FALSE: case FCMP_TRUE:
case FCMP_OEQ: case FCMP_ONE:
case FCMP_UEQ: case FCMP_UNE:
@@ -3422,7 +3422,7 @@ CmpInst::Predicate CmpInst::getSignedPredicate(Predicate pred) {
bool CmpInst::isUnsigned(Predicate predicate) {
switch (predicate) {
default: return false;
- case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE: return true;
}
}
@@ -3430,7 +3430,7 @@ bool CmpInst::isUnsigned(Predicate predicate) {
bool CmpInst::isSigned(Predicate predicate) {
switch (predicate) {
default: return false;
- case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE: return true;
}
}
@@ -3438,17 +3438,17 @@ bool CmpInst::isSigned(Predicate predicate) {
bool CmpInst::isOrdered(Predicate predicate) {
switch (predicate) {
default: return false;
- case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT:
- case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE:
+ case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT:
+ case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE:
case FCmpInst::FCMP_ORD: return true;
}
}
-
+
bool CmpInst::isUnordered(Predicate predicate) {
switch (predicate) {
default: return false;
- case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT:
- case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE:
case FCmpInst::FCMP_UNO: return true;
}
}
@@ -3619,7 +3619,7 @@ void IndirectBrInst::init(Value *Address, unsigned NumDests) {
void IndirectBrInst::growOperands() {
unsigned e = getNumOperands();
unsigned NumOps = e*2;
-
+
ReservedSpace = NumOps;
growHungoffUses(ReservedSpace);
}
@@ -3665,13 +3665,13 @@ void IndirectBrInst::addDestination(BasicBlock *DestBB) {
/// indirectbr instruction.
void IndirectBrInst::removeDestination(unsigned idx) {
assert(idx < getNumOperands()-1 && "Successor index out of range!");
-
+
unsigned NumOps = getNumOperands();
Use *OL = getOperandList();
// Replace this value with the last one.
OL[idx+1] = OL[NumOps-1];
-
+
// Nuke the last value.
OL[NumOps-1].set(nullptr);
setNumHungOffUseOperands(NumOps-1);
@@ -3725,7 +3725,7 @@ LoadInst *LoadInst::cloneImpl() const {
StoreInst *StoreInst::cloneImpl() const {
return new StoreInst(getOperand(0), getOperand(1), isVolatile(),
getAlignment(), getOrdering(), getSyncScopeID());
-
+
}
AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const {
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index d5046d644187..3b2e1e81b1c1 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file declares LLVMContextImpl, the opaque implementation
+// This file declares LLVMContextImpl, the opaque implementation
// of LLVMContext.
//
//===----------------------------------------------------------------------===//
@@ -1217,7 +1217,7 @@ public:
/// OwnedModules - The set of modules instantiated in this context, and which
/// will be automatically deleted if this context is deleted.
SmallPtrSet<Module*, 4> OwnedModules;
-
+
LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler = nullptr;
void *InlineAsmDiagContext = nullptr;
@@ -1265,10 +1265,10 @@ public:
using ArrayConstantsTy = ConstantUniqueMap<ConstantArray>;
ArrayConstantsTy ArrayConstants;
-
+
using StructConstantsTy = ConstantUniqueMap<ConstantStruct>;
StructConstantsTy StructConstants;
-
+
using VectorConstantsTy = ConstantUniqueMap<ConstantVector>;
VectorConstantsTy VectorConstants;
@@ -1293,11 +1293,11 @@ public:
Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy, TokenTy;
Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty;
-
+
/// TypeAllocator - All dynamically allocated types are allocated from this.
/// They live forever until the context is torn down.
BumpPtrAllocator TypeAllocator;
-
+
DenseMap<unsigned, IntegerType*> IntegerTypes;
using FunctionTypeSet = DenseSet<FunctionType *, FunctionTypeKeyInfo>;
@@ -1306,7 +1306,7 @@ public:
StructTypeSet AnonStructTypes;
StringMap<StructType*> NamedStructTypes;
unsigned NamedStructTypesUniqueID = 0;
-
+
DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
DenseMap<Type*, PointerType*> PointerTypes; // Pointers in AddrSpace = 0
@@ -1317,7 +1317,7 @@ public:
/// whether or not a value has an entry in this map.
using ValueHandlesTy = DenseMap<Value *, ValueHandleBase *>;
ValueHandlesTy ValueHandles;
-
+
/// CustomMDKindNames - Map to hold the metadata string to ID mapping.
StringMap<unsigned> CustomMDKindNames;
diff --git a/lib/IR/SymbolTableListTraitsImpl.h b/lib/IR/SymbolTableListTraitsImpl.h
index 6ddab6b4c69d..d4ad1eba33c6 100644
--- a/lib/IR/SymbolTableListTraitsImpl.h
+++ b/lib/IR/SymbolTableListTraitsImpl.h
@@ -33,17 +33,17 @@ void SymbolTableListTraits<ValueSubClass>::setSymTabObject(TPtr *Dest,
// Do it.
*Dest = Src;
-
+
// Get the new SymTab object.
ValueSymbolTable *NewST = getSymTab(getListOwner());
-
+
// If there is nothing to do, quick exit.
if (OldST == NewST) return;
-
+
// Move all the elements from the old symtab to the new one.
ListTy &ItemList = getList(getListOwner());
if (ItemList.empty()) return;
-
+
if (OldST) {
// Remove all entries from the previous symtab.
for (auto I = ItemList.begin(); I != ItemList.end(); ++I)
@@ -57,7 +57,7 @@ void SymbolTableListTraits<ValueSubClass>::setSymTabObject(TPtr *Dest,
if (I->hasName())
NewST->reinsertValue(&*I);
}
-
+
}
template <typename ValueSubClass>
diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp
index 0a7f2803cd4c..f4bea5604043 100644
--- a/lib/IR/ValueSymbolTable.cpp
+++ b/lib/IR/ValueSymbolTable.cpp
@@ -79,7 +79,7 @@ void ValueSymbolTable::reinsertValue(Value* V) {
// *V << "\n");
return;
}
-
+
// Otherwise, there is a naming conflict. Rename this value.
SmallString<256> UniqueName(V->getName().begin(), V->getName().end());
@@ -107,7 +107,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
// << *V << "\n");
return &*IterBool.first;
}
-
+
// Otherwise, there is a naming conflict. Rename this value.
SmallString<256> UniqueName(Name.begin(), Name.end());
return makeUniqueName(V, UniqueName);
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index 90d0f9bdb885..642e538ecf92 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -422,7 +422,7 @@ public:
int TempFD;
llvm::sys::path::remove_filename(CachePath);
sys::path::append(TempFilename, CachePath, "Thin-%%%%%%.tmp.o");
- std::error_code EC =
+ std::error_code EC =
sys::fs::createUniqueFile(TempFilename, TempFD, TempFilename);
if (EC) {
errs() << "Error: " << EC.message() << "\n";
@@ -432,7 +432,7 @@ public:
raw_fd_ostream OS(TempFD, /* ShouldClose */ true);
OS << OutputBuffer.getBuffer();
}
- // Rename temp file to final destination; rename is atomic
+ // Rename temp file to final destination; rename is atomic
EC = sys::fs::rename(TempFilename, EntryPath);
if (EC)
sys::fs::remove(TempFilename);
@@ -1048,10 +1048,10 @@ void ThinLTOCodeGenerator::run() {
if (SavedObjectsDirectoryPath.empty()) {
// We need to generated a memory buffer for the linker.
if (!CacheEntryPath.empty()) {
- // When cache is enabled, reload from the cache if possible.
+ // When cache is enabled, reload from the cache if possible.
// Releasing the buffer from the heap and reloading it from the
- // cache file with mmap helps us to lower memory pressure.
- // The freed memory can be used for the next input file.
+ // cache file with mmap helps us to lower memory pressure.
+ // The freed memory can be used for the next input file.
// The final binary link will read from the VFS cache (hopefully!)
// or from disk (if the memory pressure was too high).
auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer();
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 92f615180561..ae02f50bf8bd 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -337,7 +337,7 @@ void MCAsmStreamer::AddComment(const Twine &T, bool EOL) {
if (!IsVerboseAsm) return;
T.toVector(CommentToEmit);
-
+
if (EOL)
CommentToEmit.push_back('\n'); // Place comment in a new line.
}
@@ -655,7 +655,7 @@ void MCAsmStreamer::EmitSyntaxDirective() {
EmitEOL();
}
// FIXME: Currently emit unprefix'ed registers.
- // The intel_syntax directive has one optional argument
+ // The intel_syntax directive has one optional argument
// with may have a value of prefix or noprefix.
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 1470e026d985..1e23b6d816e8 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -550,7 +550,7 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
break;
}
- case MCFragment::FT_Data:
+ case MCFragment::FT_Data:
++stats::EmittedDataFragments;
OS << cast<MCDataFragment>(F).getContents();
break;
@@ -822,6 +822,9 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
} else if (auto *FragWithFixups = dyn_cast<MCCVDefRangeFragment>(&Frag)) {
Fixups = FragWithFixups->getFixups();
Contents = FragWithFixups->getContents();
+ } else if (auto *FragWithFixups = dyn_cast<MCDwarfLineAddrFragment>(&Frag)) {
+ Fixups = FragWithFixups->getFixups();
+ Contents = FragWithFixups->getContents();
} else
llvm_unreachable("Unknown fragment with fixups!");
for (const MCFixup &Fixup : Fixups) {
@@ -951,16 +954,43 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
MCContext &Context = Layout.getAssembler().getContext();
uint64_t OldSize = DF.getContents().size();
int64_t AddrDelta;
- bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
- assert(Abs && "We created a line delta with an invalid expression");
- (void) Abs;
+ bool Abs;
+ if (getBackend().requiresDiffExpressionRelocations())
+ Abs = DF.getAddrDelta().evaluateAsAbsolute(AddrDelta, Layout);
+ else {
+ Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
+ assert(Abs && "We created a line delta with an invalid expression");
+ }
int64_t LineDelta;
LineDelta = DF.getLineDelta();
- SmallString<8> &Data = DF.getContents();
+ SmallVectorImpl<char> &Data = DF.getContents();
Data.clear();
raw_svector_ostream OSE(Data);
- MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta,
- AddrDelta, OSE);
+ DF.getFixups().clear();
+
+ if (Abs) {
+ MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta,
+ AddrDelta, OSE);
+ } else {
+ uint32_t Offset;
+ uint32_t Size;
+ bool SetDelta = MCDwarfLineAddr::FixedEncode(Context,
+ getDWARFLinetableParams(),
+ LineDelta, AddrDelta,
+ OSE, &Offset, &Size);
+ // Add Fixups for address delta or new address.
+ const MCExpr *FixupExpr;
+ if (SetDelta) {
+ FixupExpr = &DF.getAddrDelta();
+ } else {
+ const MCBinaryExpr *ABE = cast<MCBinaryExpr>(&DF.getAddrDelta());
+ FixupExpr = ABE->getLHS();
+ }
+ DF.getFixups().push_back(
+ MCFixup::create(Offset, FixupExpr,
+ MCFixup::getKindForSize(Size, false /*isPCRel*/)));
+ }
+
return OldSize != Data.size();
}
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 30e0bb562644..ad0a39991c53 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
// LLVMCreateDisasm() creates a disassembler for the TripleName. Symbolic
// disassembly is supported by passing a block of information in the DisInfo
// parameter and specifying the TagType and callback functions as described in
-// the header llvm-c/Disassembler.h . The pointer to the block and the
+// the header llvm-c/Disassembler.h . The pointer to the block and the
// functions can all be passed as NULL. If successful, this returns a
// disassembler context. If not, it returns NULL.
//
diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h
index 25d17dafb576..f638fdc781d7 100644
--- a/lib/MC/MCDisassembler/Disassembler.h
+++ b/lib/MC/MCDisassembler/Disassembler.h
@@ -4,10 +4,10 @@
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
-// This file defines the interface for the Disassembly library's disassembler
+// This file defines the interface for the Disassembly library's disassembler
// context. The disassembler is responsible for producing strings for
// individual instructions according to a given architecture and disassembly
// syntax.
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 6131fcd658b2..0461c2564ccf 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -492,7 +492,7 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
// Parameters of the state machine, are next.
MCOS->EmitIntValue(context.getAsmInfo()->getMinInstAlignment(), 1);
- // maximum_operations_per_instruction
+ // maximum_operations_per_instruction
// For non-VLIW architectures this field is always 1.
// FIXME: VLIW architectures need to update this field accordingly.
if (LineTableVersion >= 4)
@@ -731,6 +731,57 @@ void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params,
}
}
+bool MCDwarfLineAddr::FixedEncode(MCContext &Context,
+ MCDwarfLineTableParams Params,
+ int64_t LineDelta, uint64_t AddrDelta,
+ raw_ostream &OS,
+ uint32_t *Offset, uint32_t *Size) {
+ if (LineDelta != INT64_MAX) {
+ OS << char(dwarf::DW_LNS_advance_line);
+ encodeSLEB128(LineDelta, OS);
+ }
+
+ // Use address delta to adjust address or use absolute address to adjust
+ // address.
+ bool SetDelta;
+ // According to DWARF spec., the DW_LNS_fixed_advance_pc opcode takes a
+ // single uhalf (unencoded) operand. So, the maximum value of AddrDelta
+ // is 65535. We set a conservative upper bound for it for relaxation.
+ if (AddrDelta > 60000) {
+ const MCAsmInfo *asmInfo = Context.getAsmInfo();
+ unsigned AddrSize = asmInfo->getCodePointerSize();
+
+ OS << char(dwarf::DW_LNS_extended_op);
+ encodeULEB128(1 + AddrSize, OS);
+ OS << char(dwarf::DW_LNE_set_address);
+ // Generate fixup for the address.
+ *Offset = OS.tell();
+ *Size = AddrSize;
+ SetDelta = false;
+ std::vector<uint8_t> FillData;
+ FillData.insert(FillData.begin(), AddrSize, 0);
+ OS.write(reinterpret_cast<char *>(FillData.data()), AddrSize);
+ } else {
+ OS << char(dwarf::DW_LNS_fixed_advance_pc);
+ // Generate fixup for 2-bytes address delta.
+ *Offset = OS.tell();
+ *Size = 2;
+ SetDelta = true;
+ OS << char(0);
+ OS << char(0);
+ }
+
+ if (LineDelta == INT64_MAX) {
+ OS << char(dwarf::DW_LNS_extended_op);
+ OS << char(1);
+ OS << char(dwarf::DW_LNE_end_sequence);
+ } else {
+ OS << char(dwarf::DW_LNS_copy);
+ }
+
+ return SetDelta;
+}
+
// Utility function to write a tuple for .debug_abbrev.
static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) {
MCOS->EmitULEB128IntValue(Name);
diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp
index 8223f3a5c66f..4d7c89116893 100644
--- a/lib/MC/MCInstrAnalysis.cpp
+++ b/lib/MC/MCInstrAnalysis.cpp
@@ -24,6 +24,11 @@ bool MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
return false;
}
+bool MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI,
+ const MCInst &Inst) const {
+ return false;
+}
+
bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
uint64_t Size, uint64_t &Target) const {
if (Inst.getNumOperands() == 0 ||
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 29d34a8c1e3e..b88d2d801822 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -950,8 +950,19 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
}
MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const {
- return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP,
- 0, utostr(Hash));
+ switch (TT.getObjectFormat()) {
+ case Triple::ELF:
+ return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP,
+ 0, utostr(Hash));
+ case Triple::MachO:
+ case Triple::COFF:
+ case Triple::Wasm:
+ case Triple::UnknownObjectFormat:
+ report_fatal_error("Cannot get DWARF types section for this object file "
+ "format: not implemented.");
+ break;
+ }
+ llvm_unreachable("Unknown ObjectFormatType");
}
MCSection *
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 67e3512cc5bd..7bf14968c973 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -254,7 +254,7 @@ bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
if (getLexer().is(AsmToken::Comma) ||
getLexer().is(AsmToken::EndOfStatement))
break;
-
+
unsigned CurSize;
if (getLexer().is(AsmToken::String)) {
CurSize = getTok().getIdentifier().size() + 2;
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 8dd4b61be68f..21a9c3604cfc 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -514,7 +514,7 @@ void MCStreamer::EmitCFIEscape(StringRef Values) {
void MCStreamer::EmitCFIGnuArgsSize(int64_t Size) {
MCSymbol *Label = EmitCFILabel();
- MCCFIInstruction Instruction =
+ MCCFIInstruction Instruction =
MCCFIInstruction::createGnuArgsSize(Label, Size);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index a464af1d42a7..2664528909af 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -952,7 +952,7 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
const DataRegionData *Data = &(*it);
uint64_t Start = getSymbolAddress(*Data->Start, Layout);
uint64_t End;
- if (Data->End)
+ if (Data->End)
End = getSymbolAddress(*Data->End, Layout);
else
report_fatal_error("Data region not terminated");
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index d72da3187e07..85b1913cb23b 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -339,7 +339,7 @@ unsigned COFFObjectFile::getSectionID(SectionRef Sec) const {
bool COFFObjectFile::isSectionVirtual(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
- // In COFF, a virtual section won't have any in-file
+ // In COFF, a virtual section won't have any in-file
// content, so the file pointer to the content will be zero.
return Sec->PointerToRawData == 0;
}
diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index f67a0db690eb..745f79cd77f3 100644
--- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -550,6 +550,10 @@ template <> void SymbolRecordImpl<ThreadLocalDataSym>::map(IO &IO) {
IO.mapRequired("DisplayName", Symbol.Name);
}
+template <> void SymbolRecordImpl<UsingNamespaceSym>::map(IO &IO) {
+ IO.mapRequired("Namespace", Symbol.Name);
+}
+
} // end namespace detail
} // end namespace CodeViewYAML
} // end namespace llvm
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 24005c1890c9..e9e429c8031b 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1752,7 +1752,7 @@ IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
if (compareAbsoluteValue(V) == cmpLessThan)
V = scalbn(V, -1, rmNearestTiesToEven);
V.sign = sign;
-
+
fs = subtract(V, rmNearestTiesToEven);
assert(fs==opOK);
}
diff --git a/lib/Support/ConvertUTF.cpp b/lib/Support/ConvertUTF.cpp
index e56854a3ae42..8f02fae4f558 100644
--- a/lib/Support/ConvertUTF.cpp
+++ b/lib/Support/ConvertUTF.cpp
@@ -8,9 +8,9 @@
*===------------------------------------------------------------------------=*/
/*
* Copyright 2001-2004 Unicode, Inc.
- *
+ *
* Disclaimer
- *
+ *
* This source code is provided as is by Unicode, Inc. No claims are
* made as to fitness for any particular purpose. No warranties of any
* kind are expressed or implied. The recipient agrees to determine
@@ -18,9 +18,9 @@
* purchased on magnetic or optical media from Unicode, Inc., the
* sole remedy for any claim will be exchange of defective media
* within 90 days of receipt.
- *
+ *
* Limitations on Rights to Redistribute This Code
- *
+ *
* Unicode, Inc. hereby grants the right to freely use the information
* supplied in this file in the creation of products supporting the
* Unicode Standard, and to make copies of this file in any form
@@ -117,7 +117,7 @@ static const char trailingBytesForUTF8[256] = {
* This table contains as many values as there might be trailing bytes
* in a UTF-8 sequence.
*/
-static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
/*
@@ -143,7 +143,7 @@ static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF16 (
- const UTF32** sourceStart, const UTF32* sourceEnd,
+ const UTF32** sourceStart, const UTF32* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
@@ -192,7 +192,7 @@ ConversionResult ConvertUTF32toUTF16 (
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF16toUTF32 (
- const UTF16** sourceStart, const UTF16* sourceEnd,
+ const UTF16** sourceStart, const UTF16* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF16* source = *sourceStart;
@@ -246,7 +246,7 @@ if (result == sourceIllegal) {
return result;
}
ConversionResult ConvertUTF16toUTF8 (
- const UTF16** sourceStart, const UTF16* sourceEnd,
+ const UTF16** sourceStart, const UTF16* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF16* source = *sourceStart;
@@ -255,7 +255,7 @@ ConversionResult ConvertUTF16toUTF8 (
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
- const UTF32 byteMark = 0x80;
+ const UTF32 byteMark = 0x80;
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
ch = *source++;
/* If we have a surrogate pair, convert to UTF32 first. */
@@ -316,7 +316,7 @@ ConversionResult ConvertUTF16toUTF8 (
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF8 (
- const UTF32** sourceStart, const UTF32* sourceEnd,
+ const UTF32** sourceStart, const UTF32* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
@@ -325,7 +325,7 @@ ConversionResult ConvertUTF32toUTF8 (
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
- const UTF32 byteMark = 0x80;
+ const UTF32 byteMark = 0x80;
ch = *source++;
if (flags == strictConversion ) {
/* UTF-16 surrogate values are illegal in UTF-32 */
@@ -347,7 +347,7 @@ ConversionResult ConvertUTF32toUTF8 (
ch = UNI_REPLACEMENT_CHAR;
result = sourceIllegal;
}
-
+
target += bytesToWrite;
if (target > targetEnd) {
--source; /* Back up source pointer! */
@@ -540,7 +540,7 @@ Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF8toUTF16 (
- const UTF8** sourceStart, const UTF8* sourceEnd,
+ const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
@@ -613,7 +613,7 @@ ConversionResult ConvertUTF8toUTF16 (
/* --------------------------------------------------------------------- */
static ConversionResult ConvertUTF8toUTF32Impl(
- const UTF8** sourceStart, const UTF8* sourceEnd,
+ const UTF8** sourceStart, const UTF8* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags,
Boolean InputIsPartial) {
ConversionResult result = conversionOK;
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index fd5d097d2b7e..be4b5c3e01c3 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -49,7 +49,7 @@ public:
/// Called when the separate crash-recovery thread was finished, to
/// indicate that we don't need to clear the thread-local CurrentContext.
- void setSwitchedThread() {
+ void setSwitchedThread() {
#if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0
SwitchedThread = true;
#endif
@@ -96,7 +96,7 @@ CrashRecoveryContext::~CrashRecoveryContext() {
delete tmp;
}
tlIsRecoveringFromCrash->set(PC);
-
+
CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
delete CRCI;
}
diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp
index b82aec1423f5..bd9f98b0b82d 100644
--- a/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/lib/Support/DAGDeltaAlgorithm.cpp
@@ -96,7 +96,7 @@ private:
assert(PredClosure.count(Node) && "Invalid node!");
return PredClosure[Node].end();
}
-
+
succ_iterator_ty succ_begin(change_ty Node) {
assert(Successors.count(Node) && "Invalid node!");
return Successors[Node].begin();
@@ -205,7 +205,7 @@ DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(
Worklist.pop_back();
std::set<change_ty> &ChangeSuccs = SuccClosure[Change];
- for (pred_iterator_ty it = pred_begin(Change),
+ for (pred_iterator_ty it = pred_begin(Change),
ie = pred_end(Change); it != ie; ++it) {
SuccClosure[*it].insert(Change);
SuccClosure[*it].insert(ChangeSuccs.begin(), ChangeSuccs.end());
@@ -222,7 +222,7 @@ DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(
for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
ie2 = succ_closure_end(*it); it2 != ie2; ++it2)
PredClosure[*it2].insert(*it);
-
+
// Dump useful debug info.
LLVM_DEBUG({
llvm::errs() << "-- DAGDeltaAlgorithmImpl --\n";
diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp
index 10be9b391b49..2149f21281d3 100644
--- a/lib/Support/Errno.cpp
+++ b/lib/Support/Errno.cpp
@@ -42,7 +42,7 @@ std::string StrError(int errnum) {
const int MaxErrStrLen = 2000;
char buffer[MaxErrStrLen];
buffer[0] = '\0';
-#endif
+#endif
#ifdef HAVE_STRERROR_R
// strerror_r is thread-safe.
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index ec7d57586e8b..cf9847faccd1 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -92,7 +92,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
unsigned Units = Size / 4;
unsigned Pos = 0;
const unsigned *Base = (const unsigned*) String.data();
-
+
// If the string is aligned do a bulk transfer.
if (!((intptr_t)Base & 3)) {
Bits.append(Base, Base + Units);
@@ -121,7 +121,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
}
}
}
-
+
// With the leftover bits.
unsigned V = 0;
// Pos will have overshot size by 4 - #bytes left over.
@@ -141,7 +141,7 @@ void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
Bits.append(ID.Bits.begin(), ID.Bits.end());
}
-/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
/// lookup the node in the FoldingSetBase.
unsigned FoldingSetNodeID::ComputeHash() const {
return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
@@ -192,7 +192,7 @@ static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) {
// The low bit is set if this is the pointer back to the bucket.
if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
return nullptr;
-
+
return static_cast<FoldingSetBase::Node*>(NextInBucketPtr);
}
@@ -272,11 +272,11 @@ void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) {
assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!");
void **OldBuckets = Buckets;
unsigned OldNumBuckets = NumBuckets;
-
+
// Clear out new buckets.
Buckets = AllocateBuckets(NewBucketCount);
// Set NumBuckets only if allocation of new buckets was succesful
- NumBuckets = NewBucketCount;
+ NumBuckets = NewBucketCount;
NumNodes = 0;
// Walk the old buckets, rehashing nodes into their new place.
@@ -296,7 +296,7 @@ void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) {
TempID.clear();
}
}
-
+
free(OldBuckets);
}
@@ -324,9 +324,9 @@ FoldingSetBase::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
unsigned IDHash = ID.ComputeHash();
void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
void *Probe = *Bucket;
-
+
InsertPos = nullptr;
-
+
FoldingSetNodeID TempID;
while (Node *NodeInBucket = GetNextPtr(Probe)) {
if (NodeEquals(NodeInBucket, ID, IDHash, TempID))
@@ -335,14 +335,14 @@ FoldingSetBase::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
Probe = NodeInBucket->getNextInBucket();
}
-
+
// Didn't find the node, return null with the bucket as the InsertPos.
InsertPos = Bucket;
return nullptr;
}
/// InsertNode - Insert the specified node into the folding set, knowing that it
-/// is not already in the map. InsertPos must be obtained from
+/// is not already in the map. InsertPos must be obtained from
/// FindNodeOrInsertPos.
void FoldingSetBase::InsertNode(Node *N, void *InsertPos) {
assert(!N->getNextInBucket());
@@ -354,12 +354,12 @@ void FoldingSetBase::InsertNode(Node *N, void *InsertPos) {
}
++NumNodes;
-
+
/// The insert position is actually a bucket pointer.
void **Bucket = static_cast<void**>(InsertPos);
-
+
void *Next = *Bucket;
-
+
// If this is the first insertion into this bucket, its next pointer will be
// null. Pretend as if it pointed to itself, setting the low bit to indicate
// that it is a pointer to the bucket.
@@ -384,13 +384,13 @@ bool FoldingSetBase::RemoveNode(Node *N) {
// Remember what N originally pointed to, either a bucket or another node.
void *NodeNextPtr = Ptr;
-
+
// Chase around the list until we find the node (or bucket) which points to N.
while (true) {
if (Node *NodeInBucket = GetNextPtr(Ptr)) {
// Advance pointer.
Ptr = NodeInBucket->getNextInBucket();
-
+
// We found a node that points to N, change it to point to N's next node,
// removing N from the list.
if (Ptr == N) {
@@ -400,7 +400,7 @@ bool FoldingSetBase::RemoveNode(Node *N) {
} else {
void **Bucket = GetBucketPtr(Ptr);
Ptr = *Bucket;
-
+
// If we found that the bucket points to N, update the bucket to point to
// whatever is next.
if (Ptr == N) {
@@ -432,7 +432,7 @@ FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
while (*Bucket != reinterpret_cast<void*>(-1) &&
(!*Bucket || !GetNextPtr(*Bucket)))
++Bucket;
-
+
NodePtr = static_cast<FoldingSetNode*>(*Bucket);
}
@@ -443,7 +443,7 @@ void FoldingSetIteratorImpl::advance() {
if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
NodePtr = NextNodeInBucket;
else {
- // Otherwise, this is the last link in this bucket.
+ // Otherwise, this is the last link in this bucket.
void **Bucket = GetBucketPtr(Probe);
// Skip to the next non-null non-self-cycle bucket.
@@ -451,7 +451,7 @@ void FoldingSetIteratorImpl::advance() {
++Bucket;
} while (*Bucket != reinterpret_cast<void*>(-1) &&
(!*Bucket || !GetNextPtr(*Bucket)));
-
+
NodePtr = static_cast<FoldingSetNode*>(*Bucket);
}
}
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index a9f4409f5dde..b0cb06c1daa2 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -65,7 +65,7 @@ void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {
///
/// \param NewCol - The column to move to.
///
-formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
+formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
// Figure out what's in the buffer and add it to the column count.
ComputePosition(getBufferStart(), GetNumBytesInBuffer());
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 1c884dc70fc9..74f71a385027 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -43,7 +43,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
Ptr.store(Tmp, std::memory_order_release);
DeleterFn = Deleter;
-
+
// Add to list of managed statics.
Next = StaticList;
StaticList = this;
@@ -53,7 +53,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
"Partially initialized ManagedStatic!?");
Ptr = Creator();
DeleterFn = Deleter;
-
+
// Add to list of managed statics.
Next = StaticList;
StaticList = this;
@@ -70,7 +70,7 @@ void ManagedStaticBase::destroy() const {
// Destroy memory.
DeleterFn(Ptr);
-
+
// Cleanup.
Ptr = nullptr;
DeleterFn = nullptr;
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 4428c2f24e32..ef9159bac284 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -152,7 +152,7 @@ MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize,
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
-MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize,
+MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize,
uint64_t Offset, bool IsVolatile) {
return getFileAux<MemoryBuffer>(FilePath, -1, MapSize, Offset, false,
IsVolatile);
@@ -533,5 +533,4 @@ MemoryBufferRef MemoryBuffer::getMemBufferRef() const {
return MemoryBufferRef(Data, Identifier);
}
-void MemoryBuffer::anchor() {}
-void SmallVectorMemoryBuffer::anchor() {}
+SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() {}
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index a806da23ec50..098230290ed2 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -1157,9 +1157,13 @@ Error TempFile::keep(const Twine &Name) {
setDeleteDisposition(H, true);
#else
std::error_code RenameEC = fs::rename(TmpName, Name);
- // If we can't rename, discard the temporary file.
- if (RenameEC)
- remove(TmpName);
+ if (RenameEC) {
+ // If we can't rename, try to copy to work around cross-device link issues.
+ RenameEC = sys::fs::copy_file(TmpName, Name);
+ // If we can't rename or copy, discard the temporary file.
+ if (RenameEC)
+ remove(TmpName);
+ }
sys::DontRemoveFileOnSignal(TmpName);
#endif
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index f5b6e6f3652d..206de91ae239 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -1,10 +1,10 @@
//===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines some helpful functions for dealing with the possibility of
@@ -72,10 +72,10 @@ static void PrintStack(raw_ostream &OS) {
static void PrintCurStackTrace(raw_ostream &OS) {
// Don't print an empty trace.
if (!PrettyStackTraceHead) return;
-
+
// If there are pretty stack frames registered, walk and emit them.
OS << "Stack dump:\n";
-
+
PrintStack(OS);
OS.flush();
}
@@ -85,9 +85,9 @@ static void PrintCurStackTrace(raw_ostream &OS) {
// If any clients of llvm try to link to libCrashReporterClient.a themselves,
// only one crash info struct will be used.
extern "C" {
-CRASH_REPORTER_CLIENT_HIDDEN
-struct crashreporter_annotations_t gCRAnnotations
- __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION)))
+CRASH_REPORTER_CLIENT_HIDDEN
+struct crashreporter_annotations_t gCRAnnotations
+ __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION)))
#if CRASHREPORTER_ANNOTATIONS_VERSION < 5
= { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0, 0, 0 };
#else
@@ -114,17 +114,17 @@ static void CrashHandler(void *) {
raw_svector_ostream Stream(TmpStr);
PrintCurStackTrace(Stream);
}
-
+
if (!TmpStr.empty()) {
#ifdef HAVE_CRASHREPORTERCLIENT_H
// Cast to void to avoid warning.
(void)CRSetCrashLogMessage(TmpStr.c_str());
-#elif HAVE_CRASHREPORTER_INFO
+#elif HAVE_CRASHREPORTER_INFO
__crashreporter_info__ = strdup(TmpStr.c_str());
#endif
errs() << TmpStr.str();
}
-
+
#endif
}
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index bc15fd4e4014..d8fde7fa8990 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -175,14 +175,14 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
std::pair<unsigned, unsigned> LineAndCol;
StringRef BufferID = "<unknown>";
std::string LineStr;
-
+
if (Loc.isValid()) {
unsigned CurBuf = FindBufferContainingLoc(Loc);
assert(CurBuf && "Invalid or unspecified location!");
const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf);
BufferID = CurMB->getBufferIdentifier();
-
+
// Scan backward to find the start of the line.
const char *LineStart = Loc.getPointer();
const char *BufStart = CurMB->getBufferStart();
@@ -202,17 +202,17 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
SMRange R = Ranges[i];
if (!R.isValid()) continue;
-
+
// If the line doesn't contain any part of the range, then ignore it.
if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
continue;
-
+
// Ignore pieces of the range that go onto other lines.
if (R.Start.getPointer() < LineStart)
R.Start = SMLoc::getFromPointer(LineStart);
if (R.End.getPointer() > LineEnd)
R.End = SMLoc::getFromPointer(LineEnd);
-
+
// Translate from SMLoc ranges to column ranges.
// FIXME: Handle multibyte characters.
ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart,
@@ -221,7 +221,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
LineAndCol = getLineAndColumn(Loc, CurBuf);
}
-
+
return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
LineAndCol.second-1, Kind, Msg.str(),
LineStr, ColRanges, FixIts);
@@ -440,7 +440,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
// Build the line with the caret and ranges.
std::string CaretLine(NumColumns+1, ' ');
-
+
// Expand any ranges.
for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
std::pair<unsigned, unsigned> R = Ranges[r];
@@ -459,14 +459,14 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
// Finally, plop on the caret.
if (unsigned(ColumnNo) <= NumColumns)
CaretLine[ColumnNo] = '^';
- else
+ else
CaretLine[NumColumns] = '^';
-
+
// ... and remove trailing whitespace so the output doesn't wrap for it. We
// know that the line isn't completely empty because it has the caret in it at
// least.
CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
-
+
printSourceLine(S, LineContents);
if (ShowColors)
@@ -479,7 +479,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
++OutCol;
continue;
}
-
+
// Okay, we have a tab. Insert the appropriate number of characters.
do {
S << CaretLine[i];
@@ -494,7 +494,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
// Print out the replacement line, matching tabs in the source line.
if (FixItInsertionLine.empty())
return;
-
+
for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
if (i >= LineContents.size() || LineContents[i] != '\t') {
S << FixItInsertionLine[i];
diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp
index 76faabc92bb5..c591857c415d 100644
--- a/lib/Support/StringPool.cpp
+++ b/lib/Support/StringPool.cpp
@@ -26,10 +26,10 @@ PooledStringPtr StringPool::intern(StringRef Key) {
table_t::iterator I = InternTable.find(Key);
if (I != InternTable.end())
return PooledStringPtr(&*I);
-
+
entry_t *S = entry_t::Create(Key);
S->getValue().Pool = this;
InternTable.insert(S);
-
+
return PooledStringPtr(S);
}
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index 9ba7a09f9962..f0349260e22f 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -389,7 +389,7 @@ static unsigned GetAutoSenseRadix(StringRef &Str) {
Str = Str.substr(2);
return 16;
}
-
+
if (Str.startswith("0b") || Str.startswith("0B")) {
Str = Str.substr(2);
return 2;
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index ed999fce5dad..c5eba5714766 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -98,7 +98,7 @@ void TargetRegistry::RegisterTarget(Target &T, const char *Name,
// convenience to some clients.
if (T.Name)
return;
-
+
// Add to the list of targets.
T.Next = FirstTarget;
FirstTarget = &T;
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index f425d607af47..b64b013d7407 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -450,7 +450,7 @@ static std::error_code rename_handle(HANDLE FromHandle, const Twine &To) {
if (std::error_code EC2 = realPathFromHandle(FromHandle, WideFrom))
return EC2;
if (::MoveFileExW(WideFrom.begin(), WideTo.begin(),
- MOVEFILE_REPLACE_EXISTING))
+ MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED))
return std::error_code();
return mapWindowsError(GetLastError());
}
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 354b7d0740de..9ef1410b99a5 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -1113,7 +1113,7 @@ bool Scanner::scanDirective() {
Current = skip_while(&Scanner::skip_ns_char, Current);
StringRef Name(NameStart, Current - NameStart);
Current = skip_while(&Scanner::skip_s_white, Current);
-
+
Token T;
if (Name == "YAML") {
Current = skip_while(&Scanner::skip_ns_char, Current);
diff --git a/lib/Support/regex_impl.h b/lib/Support/regex_impl.h
index f8296c9ff75e..8ddac7dcf998 100644
--- a/lib/Support/regex_impl.h
+++ b/lib/Support/regex_impl.h
@@ -96,7 +96,7 @@ extern "C" {
int llvm_regcomp(llvm_regex_t *, const char *, int);
size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t);
-int llvm_regexec(const llvm_regex_t *, const char *, size_t,
+int llvm_regexec(const llvm_regex_t *, const char *, size_t,
llvm_regmatch_t [], int);
void llvm_regfree(llvm_regex_t *);
size_t llvm_strlcpy(char *dst, const char *src, size_t siz);
diff --git a/lib/Support/xxhash.cpp b/lib/Support/xxhash.cpp
index df643f9bd639..e9dceed2c4ae 100644
--- a/lib/Support/xxhash.cpp
+++ b/lib/Support/xxhash.cpp
@@ -132,3 +132,7 @@ uint64_t llvm::xxHash64(StringRef Data) {
return H64;
}
+
+uint64_t llvm::xxHash64(ArrayRef<uint8_t> Data) {
+ return xxHash64({(const char *)Data.data(), Data.size()});
+}
diff --git a/lib/TableGen/StringMatcher.cpp b/lib/TableGen/StringMatcher.cpp
index 32599104f6a2..2c4d1f33997d 100644
--- a/lib/TableGen/StringMatcher.cpp
+++ b/lib/TableGen/StringMatcher.cpp
@@ -25,19 +25,19 @@ using namespace llvm;
/// FindFirstNonCommonLetter - Find the first character in the keys of the
/// string pairs that is not shared across the whole set of strings. All
/// strings are assumed to have the same length.
-static unsigned
+static unsigned
FindFirstNonCommonLetter(const std::vector<const
StringMatcher::StringPair*> &Matches) {
assert(!Matches.empty());
for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) {
// Check to see if letter i is the same across the set.
char Letter = Matches[0]->first[i];
-
+
for (unsigned str = 0, e = Matches.size(); str != e; ++str)
if (Matches[str]->first[i] != Letter)
return i;
}
-
+
return Matches[0]->first.size();
}
@@ -51,7 +51,7 @@ bool StringMatcher::EmitStringMatcherForChar(
unsigned IndentCount, bool IgnoreDuplicates) const {
assert(!Matches.empty() && "Must have at least one string to match!");
std::string Indent(IndentCount * 2 + 4, ' ');
-
+
// If we have verified that the entire string matches, we're done: output the
// matching code.
if (CharNo == Matches[0]->first.size()) {
@@ -60,7 +60,7 @@ bool StringMatcher::EmitStringMatcherForChar(
// If the to-execute code has \n's in it, indent each subsequent line.
StringRef Code = Matches[0]->second;
-
+
std::pair<StringRef, StringRef> Split = Code.split('\n');
OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n";
@@ -72,20 +72,20 @@ bool StringMatcher::EmitStringMatcherForChar(
}
return false;
}
-
+
// Bucket the matches by the character we are comparing.
std::map<char, std::vector<const StringPair*>> MatchesByLetter;
-
+
for (unsigned i = 0, e = Matches.size(); i != e; ++i)
MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
-
-
+
+
// If we have exactly one bucket to match, see how many characters are common
// across the whole set and match all of them at once.
if (MatchesByLetter.size() == 1) {
unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches);
unsigned NumChars = FirstNonCommonLetter-CharNo;
-
+
// Emit code to break out if the prefix doesn't match.
if (NumChars == 1) {
// Do the comparison with if (Str[1] != 'f')
@@ -105,13 +105,13 @@ bool StringMatcher::EmitStringMatcherForChar(
return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount,
IgnoreDuplicates);
}
-
+
// Otherwise, we have multiple possible things, emit a switch on the
// character.
OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
OS << Indent << "default: break;\n";
-
- for (std::map<char, std::vector<const StringPair*>>::iterator LI =
+
+ for (std::map<char, std::vector<const StringPair*>>::iterator LI =
MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
// TODO: escape hard stuff (like \n) if we ever care about it.
OS << Indent << "case '" << LI->first << "':\t // "
@@ -122,7 +122,7 @@ bool StringMatcher::EmitStringMatcherForChar(
IgnoreDuplicates))
OS << Indent << " break;\n";
}
-
+
OS << Indent << "}\n";
return true;
}
@@ -132,18 +132,18 @@ bool StringMatcher::EmitStringMatcherForChar(
void StringMatcher::Emit(unsigned Indent, bool IgnoreDuplicates) const {
// If nothing to match, just fall through.
if (Matches.empty()) return;
-
+
// First level categorization: group strings by length.
std::map<unsigned, std::vector<const StringPair*>> MatchesByLength;
-
+
for (unsigned i = 0, e = Matches.size(); i != e; ++i)
MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
-
+
// Output a switch statement on length and categorize the elements within each
// bin.
OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n";
OS.indent(Indent*2+2) << "default: break;\n";
-
+
for (std::map<unsigned, std::vector<const StringPair*>>::iterator LI =
MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
OS.indent(Indent*2+2) << "case " << LI->first << ":\t // "
@@ -152,6 +152,6 @@ void StringMatcher::Emit(unsigned Indent, bool IgnoreDuplicates) const {
if (EmitStringMatcherForChar(LI->second, 0, Indent, IgnoreDuplicates))
OS.indent(Indent*2+4) << "break;\n";
}
-
+
OS.indent(Indent*2+2) << "}\n";
}
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 43a3ae77a170..572d1c22feea 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3774,7 +3774,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
CallingConv::ID CC = F.getCallingConv();
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0c72f2ebee18..de762a7bb1d4 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8580,7 +8580,7 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
SDValue
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const {
+ SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
@@ -8603,11 +8603,9 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
- if (Created) {
- Created->push_back(Cmp.getNode());
- Created->push_back(Add.getNode());
- Created->push_back(CSel.getNode());
- }
+ Created.push_back(Cmp.getNode());
+ Created.push_back(Add.getNode());
+ Created.push_back(CSel.getNode());
// Divide by pow2.
SDValue SRA =
@@ -8618,8 +8616,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (Divisor.isNonNegative())
return SRA;
- if (Created)
- Created->push_back(SRA.getNode());
+ Created.push_back(SRA.getNode());
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 592845640a44..d783c8a6048c 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -644,7 +644,7 @@ private:
SelectionDAG &DAG) const;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const override;
+ SmallVectorImpl<SDNode *> &Created) const override;
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &ExtraSteps, bool &UseOneConst,
bool Reciprocal) const override;
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 1060c64f7b5d..15d61cd1ad26 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -57,6 +57,14 @@ class EncodedI<string cstr, list<dag> pattern> : AArch64Inst<NormalFrm, cstr> {
let Size = 4;
}
+// Enum describing whether an instruction is
+// destructive in its first source operand.
+class DestructiveInstTypeEnum<bits<1> val> {
+ bits<1> Value = val;
+}
+def NotDestructive : DestructiveInstTypeEnum<0>;
+def Destructive : DestructiveInstTypeEnum<1>;
+
// Normal instructions
class I<dag oops, dag iops, string asm, string operands, string cstr,
list<dag> pattern>
@@ -64,6 +72,13 @@ class I<dag oops, dag iops, string asm, string operands, string cstr,
dag OutOperandList = oops;
dag InOperandList = iops;
let AsmString = !strconcat(asm, operands);
+
+ // Destructive operations (SVE)
+ DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
+ ElementSizeEnum ElementSize = ElementSizeB;
+
+ let TSFlags{3} = DestructiveInstType.Value;
+ let TSFlags{2-0} = ElementSize.Value;
}
class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 230480cf1cea..032d53d19620 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4851,75 +4851,92 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
- /// Constants defining how certain sequences should be outlined.
- /// This encompasses how an outlined function should be called, and what kind of
- /// frame should be emitted for that outlined function.
- ///
- /// \p MachineOutlinerDefault implies that the function should be called with
- /// a save and restore of LR to the stack.
- ///
- /// That is,
- ///
- /// I1 Save LR OUTLINED_FUNCTION:
- /// I2 --> BL OUTLINED_FUNCTION I1
- /// I3 Restore LR I2
- /// I3
- /// RET
- ///
- /// * Call construction overhead: 3 (save + BL + restore)
- /// * Frame construction overhead: 1 (ret)
- /// * Requires stack fixups? Yes
- ///
- /// \p MachineOutlinerTailCall implies that the function is being created from
- /// a sequence of instructions ending in a return.
- ///
- /// That is,
- ///
- /// I1 OUTLINED_FUNCTION:
- /// I2 --> B OUTLINED_FUNCTION I1
- /// RET I2
- /// RET
- ///
- /// * Call construction overhead: 1 (B)
- /// * Frame construction overhead: 0 (Return included in sequence)
- /// * Requires stack fixups? No
- ///
- /// \p MachineOutlinerNoLRSave implies that the function should be called using
- /// a BL instruction, but doesn't require LR to be saved and restored. This
- /// happens when LR is known to be dead.
- ///
- /// That is,
- ///
- /// I1 OUTLINED_FUNCTION:
- /// I2 --> BL OUTLINED_FUNCTION I1
- /// I3 I2
- /// I3
- /// RET
- ///
- /// * Call construction overhead: 1 (BL)
- /// * Frame construction overhead: 1 (RET)
- /// * Requires stack fixups? No
- ///
- /// \p MachineOutlinerThunk implies that the function is being created from
- /// a sequence of instructions ending in a call. The outlined function is
- /// called with a BL instruction, and the outlined function tail-calls the
- /// original call destination.
- ///
- /// That is,
- ///
- /// I1 OUTLINED_FUNCTION:
- /// I2 --> BL OUTLINED_FUNCTION I1
- /// BL f I2
- /// B f
- /// * Call construction overhead: 1 (BL)
- /// * Frame construction overhead: 0
- /// * Requires stack fixups? No
- ///
+/// Constants defining how certain sequences should be outlined.
+/// This encompasses how an outlined function should be called, and what kind of
+/// frame should be emitted for that outlined function.
+///
+/// \p MachineOutlinerDefault implies that the function should be called with
+/// a save and restore of LR to the stack.
+///
+/// That is,
+///
+/// I1 Save LR OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 Restore LR I2
+/// I3
+/// RET
+///
+/// * Call construction overhead: 3 (save + BL + restore)
+/// * Frame construction overhead: 1 (ret)
+/// * Requires stack fixups? Yes
+///
+/// \p MachineOutlinerTailCall implies that the function is being created from
+/// a sequence of instructions ending in a return.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> B OUTLINED_FUNCTION I1
+/// RET I2
+/// RET
+///
+/// * Call construction overhead: 1 (B)
+/// * Frame construction overhead: 0 (Return included in sequence)
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerNoLRSave implies that the function should be called using
+/// a BL instruction, but doesn't require LR to be saved and restored. This
+/// happens when LR is known to be dead.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 I2
+/// I3
+/// RET
+///
+/// * Call construction overhead: 1 (BL)
+/// * Frame construction overhead: 1 (RET)
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerThunk implies that the function is being created from
+/// a sequence of instructions ending in a call. The outlined function is
+/// called with a BL instruction, and the outlined function tail-calls the
+/// original call destination.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// BL f I2
+/// B f
+/// * Call construction overhead: 1 (BL)
+/// * Frame construction overhead: 0
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerRegSave implies that the function should be called with a
+/// save and restore of LR to an available register. This allows us to avoid
+/// stack fixups. Note that this outlining variant is compatible with the
+/// NoLRSave case.
+///
+/// That is,
+///
+/// I1 Save LR OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 Restore LR I2
+/// I3
+/// RET
+///
+/// * Call construction overhead: 3 (save + BL + restore)
+/// * Frame construction overhead: 1 (ret)
+/// * Requires stack fixups? No
enum MachineOutlinerClass {
MachineOutlinerDefault, /// Emit a save, restore, call, and return.
MachineOutlinerTailCall, /// Only emit a branch.
MachineOutlinerNoLRSave, /// Emit a call and return.
MachineOutlinerThunk, /// Emit a call and tail-call.
+ MachineOutlinerRegSave /// Same as default, but save to a register.
};
enum MachineOutlinerMBBFlags {
@@ -4927,6 +4944,27 @@ enum MachineOutlinerMBBFlags {
HasCalls = 0x4
};
+unsigned
+AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
+ MachineFunction *MF = C.getMF();
+ const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
+
+ // Check if there is an available register across the sequence that we can
+ // use.
+ for (unsigned Reg : AArch64::GPR64RegClass) {
+ if (!ARI->isReservedReg(*MF, Reg) &&
+ Reg != AArch64::LR && // LR is not reserved, but don't use it.
+ Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
+ Reg != AArch64::X17 && // Ditto for X17.
+ C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
+ return Reg;
+ }
+
+ // No suitable register. Return 0.
+ return 0u;
+}
+
outliner::OutlinedFunction
AArch64InstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
@@ -5015,11 +5053,27 @@ AArch64InstrInfo::getOutliningCandidateInfo(
SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
}
- // LR is live, so we need to save it to the stack.
+ // LR is live, so we need to save it. Decide whether it should be saved to
+ // the stack, or if it can be saved to a register.
else {
- FrameID = MachineOutlinerDefault;
- NumBytesToCreateFrame = 4;
- SetCandidateCallInfo(MachineOutlinerDefault, 12);
+ if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
+ [this](outliner::Candidate &C) {
+ return findRegisterToSaveLRTo(C);
+ })) {
+ // Every candidate has an available callee-saved register for the save.
+ // We can save LR to a register.
+ FrameID = MachineOutlinerRegSave;
+ NumBytesToCreateFrame = 4;
+ SetCandidateCallInfo(MachineOutlinerRegSave, 12);
+ }
+
+ else {
+ // At least one candidate does not have an available callee-saved
+ // register. We must save LR to the stack.
+ FrameID = MachineOutlinerDefault;
+ NumBytesToCreateFrame = 4;
+ SetCandidateCallInfo(MachineOutlinerDefault, 12);
+ }
}
// Check if the range contains a call. These require a save + restore of the
@@ -5088,7 +5142,7 @@ AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
MBB.rend(),
[&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
- if (!LRU.available(AArch64::LR))
+ if (!LRU.available(AArch64::LR))
Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
return Flags;
@@ -5114,14 +5168,14 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
// ahead and skip over them.
if (MI.isKill())
return outliner::InstrType::Invisible;
-
+
// Is this a terminator for a basic block?
if (MI.isTerminator()) {
// Is this the end of a function?
if (MI.getParent()->succ_empty())
return outliner::InstrType::Legal;
-
+
// It's not, so don't outline it.
return outliner::InstrType::Illegal;
}
@@ -5424,7 +5478,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
MBB.insert(MBB.end(), ret);
// Did we have to modify the stack by saving the link register?
- if (OF.FrameConstructionID == MachineOutlinerNoLRSave)
+ if (OF.FrameConstructionID != MachineOutlinerDefault)
return;
// We modified the stack.
@@ -5457,13 +5511,41 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
// We want to return the spot where we inserted the call.
MachineBasicBlock::iterator CallPt;
- // We have a default call. Save the link register.
- MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
- .addReg(AArch64::SP, RegState::Define)
- .addReg(AArch64::LR)
- .addReg(AArch64::SP)
- .addImm(-16);
- It = MBB.insert(It, STRXpre);
+ // Instructions for saving and restoring LR around the call instruction we're
+ // going to insert.
+ MachineInstr *Save;
+ MachineInstr *Restore;
+ // Can we save to a register?
+ if (C.CallConstructionID == MachineOutlinerRegSave) {
+ // FIXME: This logic should be sunk into a target-specific interface so that
+ // we don't have to recompute the register.
+ unsigned Reg = findRegisterToSaveLRTo(C);
+ assert(Reg != 0 && "No callee-saved register available?");
+
+ // Save and restore LR from that register.
+ Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::LR)
+ .addImm(0);
+ Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
+ .addReg(AArch64::XZR)
+ .addReg(Reg)
+ .addImm(0);
+ } else {
+ // We have the default case. Save and restore from SP.
+ Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(-16);
+ Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR, RegState::Define)
+ .addReg(AArch64::SP)
+ .addImm(16);
+ }
+
+ It = MBB.insert(It, Save);
It++;
// Insert the call.
@@ -5472,13 +5554,11 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
CallPt = It;
It++;
- // Restore the link register.
- MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
- .addReg(AArch64::SP, RegState::Define)
- .addReg(AArch64::LR, RegState::Define)
- .addReg(AArch64::SP)
- .addImm(16);
- It = MBB.insert(It, LDRXpost);
-
+ It = MBB.insert(It, Restore);
return CallPt;
}
+
+bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
+ MachineFunction &MF) const {
+ return MF.getFunction().optForMinSize();
+}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 0e5953f6216d..11882e238b70 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -249,6 +249,7 @@ public:
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
+ bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
/// Returns true if the instruction sets to an immediate value that can be
/// executed more efficiently.
bool isExynosResetFast(const MachineInstr &MI) const;
@@ -271,6 +272,10 @@ private:
ArrayRef<MachineOperand> Cond) const;
bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg,
const MachineRegisterInfo *MRI) const;
+
+ /// Returns an unused general-purpose register which can be used for
+ /// constructing an outlined call if one exists. Returns 0 otherwise.
+ unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
};
/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
@@ -339,6 +344,32 @@ static inline bool isIndirectBranchOpcode(int Opc) {
return Opc == AArch64::BR;
}
+// struct TSFlags {
+#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
+#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 1-bit
+// }
+
+namespace AArch64 {
+
+enum ElementSizeType {
+ ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7),
+ ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0),
+ ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1),
+ ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2),
+ ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3),
+ ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4),
+};
+
+enum DestructiveInstType {
+ DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
+ NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
+ Destructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
+};
+
+#undef TSFLAG_ELEMENT_SIZE_TYPE
+#undef TSFLAG_DESTRUCTIVE_INST_TYPE
+}
+
} // end namespace llvm
#endif
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 4d7ca2349ed1..b2b500320b5c 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -21,6 +21,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -94,6 +95,10 @@ private:
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
+ // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
+ void materializeLargeCMVal(MachineInstr &I, const Value *V,
+ unsigned char OpFlags) const;
+
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
const AArch64InstrInfo &TII;
@@ -655,6 +660,45 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
return true;
}
+void AArch64InstructionSelector::materializeLargeCMVal(
+ MachineInstr &I, const Value *V, unsigned char OpFlags) const {
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineIRBuilder MIB(I);
+
+ auto MovZ = MIB.buildInstr(AArch64::MOVZXi, &AArch64::GPR64RegClass);
+ MovZ->addOperand(MF, I.getOperand(1));
+ MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
+ AArch64II::MO_NC);
+ MovZ->addOperand(MF, MachineOperand::CreateImm(0));
+ constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
+
+ auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
+ unsigned ForceDstReg) {
+ unsigned DstReg = ForceDstReg
+ ? ForceDstReg
+ : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
+ if (auto *GV = dyn_cast<GlobalValue>(V)) {
+ MovI->addOperand(MF, MachineOperand::CreateGA(
+ GV, MovZ->getOperand(1).getOffset(), Flags));
+ } else {
+ MovI->addOperand(
+ MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
+ MovZ->getOperand(1).getOffset(), Flags));
+ }
+ MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
+ constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
+ return DstReg;
+ };
+ unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
+ AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
+ DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
+ BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
+ return;
+}
+
bool AArch64InstructionSelector::select(MachineInstr &I,
CodeGenCoverage &CoverageInfo) const {
assert(I.getParent() && "Instruction should be in a basic block!");
@@ -936,36 +980,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
I.getOperand(1).setTargetFlags(OpFlags);
} else if (TM.getCodeModel() == CodeModel::Large) {
// Materialize the global using movz/movk instructions.
- unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- auto InsertPt = std::next(I.getIterator());
- auto MovZ =
- BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi))
- .addDef(MovZDstReg);
- MovZ->addOperand(MF, I.getOperand(1));
- MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
- AArch64II::MO_NC);
- MovZ->addOperand(MF, MachineOperand::CreateImm(0));
- constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
-
- auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags,
- unsigned Offset, unsigned ForceDstReg) {
- unsigned DstReg =
- ForceDstReg ? ForceDstReg
- : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(),
- TII.get(AArch64::MOVKXi))
- .addDef(DstReg)
- .addReg(SrcReg);
- MovI->addOperand(MF, MachineOperand::CreateGA(
- GV, MovZ->getOperand(1).getOffset(), Flags));
- MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
- constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
- return DstReg;
- };
- unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
- AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
- DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
- BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
+ materializeLargeCMVal(I, GV, OpFlags);
I.eraseFromParent();
return true;
} else {
@@ -1482,7 +1497,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
.addImm(1);
I.eraseFromParent();
return true;
- case TargetOpcode::G_IMPLICIT_DEF:
+ case TargetOpcode::G_IMPLICIT_DEF: {
I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const unsigned DstReg = I.getOperand(0).getReg();
@@ -1492,6 +1507,25 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
return true;
}
+ case TargetOpcode::G_BLOCK_ADDR: {
+ if (TM.getCodeModel() == CodeModel::Large) {
+ materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
+ I.eraseFromParent();
+ return true;
+ } else {
+ I.setDesc(TII.get(AArch64::MOVaddrBA));
+ auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
+ I.getOperand(0).getReg())
+ .addBlockAddress(I.getOperand(1).getBlockAddress(),
+ /* Offset */ 0, AArch64II::MO_PAGE)
+ .addBlockAddress(
+ I.getOperand(1).getBlockAddress(), /* Offset */ 0,
+ AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
+ }
+ }
+ }
return false;
}
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 9b8c0a34efba..327c758a7f8e 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -293,6 +293,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
}
+ getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
+
// Merge/Unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 798340f8fed8..e42214d15699 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -146,7 +146,7 @@ public:
Optional<bool> hasRedZone() const { return HasRedZone; }
void setHasRedZone(bool s) { HasRedZone = s; }
-
+
int getVarArgsStackIndex() const { return VarArgsStackIndex; }
void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; }
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index 7a653e117fd1..bbf401b474ca 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -764,18 +764,35 @@ def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>;
def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>;
}
+// Enum descibing the element size for destructive
+// operations.
+class ElementSizeEnum<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def ElementSizeNone : ElementSizeEnum<0>;
+def ElementSizeB : ElementSizeEnum<1>;
+def ElementSizeH : ElementSizeEnum<2>;
+def ElementSizeS : ElementSizeEnum<3>;
+def ElementSizeD : ElementSizeEnum<4>;
+def ElementSizeQ : ElementSizeEnum<5>; // Unused
+
class SVERegOp <string Suffix, AsmOperandClass C,
+ ElementSizeEnum Size,
RegisterClass RC> : RegisterOperand<RC> {
+ ElementSizeEnum ElementSize;
+
+ let ElementSize = Size;
let PrintMethod = !if(!eq(Suffix, ""),
"printSVERegOp<>",
"printSVERegOp<'" # Suffix # "'>");
let ParserMatchClass = C;
}
-class PPRRegOp <string Suffix, AsmOperandClass C,
- RegisterClass RC> : SVERegOp<Suffix, C, RC> {}
-class ZPRRegOp <string Suffix, AsmOperandClass C,
- RegisterClass RC> : SVERegOp<Suffix, C, RC> {}
+class PPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
+ RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
+class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
+ RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
//******************************************************************************
@@ -805,11 +822,11 @@ def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>;
def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>;
def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>;
-def PPRAny : PPRRegOp<"", PPRAsmOpAny, PPR>;
-def PPR8 : PPRRegOp<"b", PPRAsmOp8, PPR>;
-def PPR16 : PPRRegOp<"h", PPRAsmOp16, PPR>;
-def PPR32 : PPRRegOp<"s", PPRAsmOp32, PPR>;
-def PPR64 : PPRRegOp<"d", PPRAsmOp64, PPR>;
+def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>;
+def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>;
+def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>;
+def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>;
+def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>;
def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>;
def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>;
@@ -817,11 +834,11 @@ def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>;
def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>;
def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>;
-def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, PPR_3b>;
-def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, PPR_3b>;
-def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, PPR_3b>;
-def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, PPR_3b>;
-def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, PPR_3b>;
+def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>;
+def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, ElementSizeB, PPR_3b>;
+def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, ElementSizeH, PPR_3b>;
+def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, ElementSizeS, PPR_3b>;
+def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>;
//******************************************************************************
@@ -874,28 +891,28 @@ def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>;
def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>;
def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>;
-def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ZPR>;
-def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ZPR>;
-def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ZPR>;
-def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ZPR>;
-def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ZPR>;
-def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ZPR>;
+def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ElementSizeNone, ZPR>;
+def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ElementSizeB, ZPR>;
+def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ElementSizeH, ZPR>;
+def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ElementSizeS, ZPR>;
+def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ElementSizeD, ZPR>;
+def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ElementSizeQ, ZPR>;
def ZPRAsmOp3b8 : ZPRAsmOperand<"Vector3bB", 8, "_3b">;
def ZPRAsmOp3b16 : ZPRAsmOperand<"Vector3bH", 16, "_3b">;
def ZPRAsmOp3b32 : ZPRAsmOperand<"Vector3bS", 32, "_3b">;
-def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ZPR_3b>;
-def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ZPR_3b>;
-def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ZPR_3b>;
+def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ElementSizeB, ZPR_3b>;
+def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ElementSizeH, ZPR_3b>;
+def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ElementSizeS, ZPR_3b>;
def ZPRAsmOp4b16 : ZPRAsmOperand<"Vector4bH", 16, "_4b">;
def ZPRAsmOp4b32 : ZPRAsmOperand<"Vector4bS", 32, "_4b">;
def ZPRAsmOp4b64 : ZPRAsmOperand<"Vector4bD", 64, "_4b">;
-def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ZPR_4b>;
-def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ZPR_4b>;
-def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ZPR_4b>;
+def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ElementSizeH, ZPR_4b>;
+def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ElementSizeS, ZPR_4b>;
+def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ElementSizeD, ZPR_4b>;
class FPRasZPR<int Width> : AsmOperandClass{
let Name = "FPR" # Width # "asZPR";
diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 16e6ddda6398..0fde68011e86 100644
--- a/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -220,10 +220,33 @@ let Predicates = [HasSVE] in {
def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">;
def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">;
+ defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
+ defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
+ def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>;
def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>;
def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>;
+ def BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa">;
+ def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">;
+ def BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb">;
+ def BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs">;
+
+ def BRKN_PPzP : sve_int_brkn<0b0, "brkn">;
+ def BRKNS_PPzP : sve_int_brkn<0b1, "brkns">;
+
+ defm BRKA_PPzP : sve_int_break_z<0b000, "brka">;
+ defm BRKA_PPmP : sve_int_break_m<0b001, "brka">;
+ defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas">;
+ defm BRKB_PPzP : sve_int_break_z<0b100, "brkb">;
+ defm BRKB_PPmP : sve_int_break_m<0b101, "brkb">;
+ defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs">;
+
+ def PTEST_PP : sve_int_ptest<0b010000, "ptest">;
+ def PFALSE : sve_int_pfalse<0b000000, "pfalse">;
+ defm PFIRST : sve_int_pfirst<0b00000, "pfirst">;
+ defm PNEXT : sve_int_pnext<0b00110, "pnext">;
+
def AND_PPzPP : sve_int_pred_log<0b0000, "and">;
def BIC_PPzPP : sve_int_pred_log<0b0001, "bic">;
def EOR_PPzPP : sve_int_pred_log<0b0010, "eor">;
@@ -731,6 +754,21 @@ let Predicates = [HasSVE] in {
defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">;
defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">;
+ defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt">;
+ defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele">;
+ defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo">;
+ defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels">;
+
+ defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt">;
+ defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele">;
+ defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo">;
+ defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels">;
+
+ def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>;
+ def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>;
+ def CTERMEQ_XX : sve_int_cterm<0b1, 0b0, "ctermeq", GPR64>;
+ def CTERMNE_XX : sve_int_cterm<0b1, 0b1, "ctermne", GPR64>;
+
def RDVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdvl">;
def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">;
def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">;
@@ -854,40 +892,40 @@ let Predicates = [HasSVE] in {
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">;
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">;
- def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16>;
- def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32>;
- def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16>;
- def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32>;
- def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32>;
- def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16>;
- def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16>;
- def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32>;
- def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16>;
- def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32>;
- def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16>;
- def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64>;
- def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32>;
- def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64>;
- def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64>;
- def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64>;
- def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16>;
- def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32>;
- def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16>;
- def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16>;
- def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32>;
- def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16>;
- def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64>;
- def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64>;
- def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32>;
- def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32>;
- def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64>;
- def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32>;
- def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64>;
- def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32>;
- def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64>;
- def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64>;
- def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64>;
- def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64>;
+ def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>;
+ def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>;
+ def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>;
+ def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>;
+ def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>;
+ def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>;
+ def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>;
+ def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>;
+ def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>;
+ def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>;
+ def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>;
+ def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>;
+ def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>;
+ def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>;
+ def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>;
+ def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>;
+ def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>;
+ def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>;
+ def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>;
+ def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>;
+ def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>;
+ def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>;
+ def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>;
+ def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>;
+ def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>;
+ def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>;
+ def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>;
+ def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>;
+ def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>;
+ def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>;
+ def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>;
+ def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>;
+ def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>;
+ def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>;
defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">;
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">;
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 01a997e5aed7..120d71381c67 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -255,6 +255,9 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
// AArch64 supports the MachineOutliner.
setMachineOutliner(true);
+
+ // AArch64 supports default outlining behaviour.
+ setSupportsDefaultOutlining(true);
}
AArch64TargetMachine::~AArch64TargetMachine() = default;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index d75fef7b0171..96e751e86971 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -577,7 +577,7 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
unsigned NumVectorInstToHideOverhead = 10;
int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && SE &&
+ if (Ty->isVectorTy() && SE &&
!BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index a51c41d70915..30a9a08f2346 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
+#include "AArch64InstrInfo.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -79,6 +80,67 @@ private:
// Map of register aliases registers via the .req directive.
StringMap<std::pair<RegKind, unsigned>> RegisterReqs;
+ class PrefixInfo {
+ public:
+ static PrefixInfo CreateFromInst(const MCInst &Inst, uint64_t TSFlags) {
+ PrefixInfo Prefix;
+ switch (Inst.getOpcode()) {
+ case AArch64::MOVPRFX_ZZ:
+ Prefix.Active = true;
+ Prefix.Dst = Inst.getOperand(0).getReg();
+ break;
+ case AArch64::MOVPRFX_ZPmZ_B:
+ case AArch64::MOVPRFX_ZPmZ_H:
+ case AArch64::MOVPRFX_ZPmZ_S:
+ case AArch64::MOVPRFX_ZPmZ_D:
+ Prefix.Active = true;
+ Prefix.Predicated = true;
+ Prefix.ElementSize = TSFlags & AArch64::ElementSizeMask;
+ assert(Prefix.ElementSize != AArch64::ElementSizeNone &&
+ "No destructive element size set for movprfx");
+ Prefix.Dst = Inst.getOperand(0).getReg();
+ Prefix.Pg = Inst.getOperand(2).getReg();
+ break;
+ case AArch64::MOVPRFX_ZPzZ_B:
+ case AArch64::MOVPRFX_ZPzZ_H:
+ case AArch64::MOVPRFX_ZPzZ_S:
+ case AArch64::MOVPRFX_ZPzZ_D:
+ Prefix.Active = true;
+ Prefix.Predicated = true;
+ Prefix.ElementSize = TSFlags & AArch64::ElementSizeMask;
+ assert(Prefix.ElementSize != AArch64::ElementSizeNone &&
+ "No destructive element size set for movprfx");
+ Prefix.Dst = Inst.getOperand(0).getReg();
+ Prefix.Pg = Inst.getOperand(1).getReg();
+ break;
+ default:
+ break;
+ }
+
+ return Prefix;
+ }
+
+ PrefixInfo() : Active(false), Predicated(false) {}
+ bool isActive() const { return Active; }
+ bool isPredicated() const { return Predicated; }
+ unsigned getElementSize() const {
+ assert(Predicated);
+ return ElementSize;
+ }
+ unsigned getDstReg() const { return Dst; }
+ unsigned getPgReg() const {
+ assert(Predicated);
+ return Pg;
+ }
+
+ private:
+ bool Active;
+ bool Predicated;
+ unsigned ElementSize;
+ unsigned Dst;
+ unsigned Pg;
+ } NextPrefix;
+
AArch64TargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AArch64TargetStreamer &>(TS);
@@ -113,7 +175,8 @@ private:
bool parseDirectiveReq(StringRef Name, SMLoc L);
bool parseDirectiveUnreq(SMLoc L);
- bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
+ bool validateInstruction(MCInst &Inst, SMLoc &IDLoc,
+ SmallVectorImpl<SMLoc> &Loc);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -3665,12 +3728,89 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
return false;
}
+static inline bool isMatchingOrAlias(unsigned ZReg, unsigned Reg) {
+ assert((ZReg >= AArch64::Z0) && (ZReg <= AArch64::Z31));
+ return (ZReg == ((Reg - AArch64::B0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::H0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::S0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::D0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::Q0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::Z0) + AArch64::Z0));
+}
+
// FIXME: This entire function is a giant hack to provide us with decent
// operand range validation/diagnostics until TableGen/MC can be extended
// to support autogeneration of this kind of validation.
-bool AArch64AsmParser::validateInstruction(MCInst &Inst,
- SmallVectorImpl<SMLoc> &Loc) {
+bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
+ SmallVectorImpl<SMLoc> &Loc) {
const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+
+ // A prefix only applies to the instruction following it. Here we extract
+ // prefix information for the next instruction before validating the current
+ // one so that in the case of failure we don't erronously continue using the
+ // current prefix.
+ PrefixInfo Prefix = NextPrefix;
+ NextPrefix = PrefixInfo::CreateFromInst(Inst, MCID.TSFlags);
+
+ // Before validating the instruction in isolation we run through the rules
+ // applicable when it follows a prefix instruction.
+ // NOTE: brk & hlt can be prefixed but require no additional validation.
+ if (Prefix.isActive() &&
+ (Inst.getOpcode() != AArch64::BRK) &&
+ (Inst.getOpcode() != AArch64::HLT)) {
+
+ // Prefixed intructions must have a destructive operand.
+ if ((MCID.TSFlags & AArch64::DestructiveInstTypeMask) ==
+ AArch64::NotDestructive)
+ return Error(IDLoc, "instruction is unpredictable when following a"
+ " movprfx, suggest replacing movprfx with mov");
+
+ // Destination operands must match.
+ if (Inst.getOperand(0).getReg() != Prefix.getDstReg())
+ return Error(Loc[0], "instruction is unpredictable when following a"
+ " movprfx writing to a different destination");
+
+ // Destination operand must not be used in any other location.
+ for (unsigned i = 1; i < Inst.getNumOperands(); ++i) {
+ if (Inst.getOperand(i).isReg() &&
+ (MCID.getOperandConstraint(i, MCOI::TIED_TO) == -1) &&
+ isMatchingOrAlias(Prefix.getDstReg(), Inst.getOperand(i).getReg()))
+ return Error(Loc[0], "instruction is unpredictable when following a"
+ " movprfx and destination also used as non-destructive"
+ " source");
+ }
+
+ auto PPRRegClass = AArch64MCRegisterClasses[AArch64::PPRRegClassID];
+ if (Prefix.isPredicated()) {
+ int PgIdx = -1;
+
+ // Find the instructions general predicate.
+ for (unsigned i = 1; i < Inst.getNumOperands(); ++i)
+ if (Inst.getOperand(i).isReg() &&
+ PPRRegClass.contains(Inst.getOperand(i).getReg())) {
+ PgIdx = i;
+ break;
+ }
+
+ // Instruction must be predicated if the movprfx is predicated.
+ if (PgIdx == -1 ||
+ (MCID.TSFlags & AArch64::ElementSizeMask) == AArch64::ElementSizeNone)
+ return Error(IDLoc, "instruction is unpredictable when following a"
+ " predicated movprfx, suggest using unpredicated movprfx");
+
+ // Instruction must use same general predicate as the movprfx.
+ if (Inst.getOperand(PgIdx).getReg() != Prefix.getPgReg())
+ return Error(IDLoc, "instruction is unpredictable when following a"
+ " predicated movprfx using a different general predicate");
+
+ // Instruction element type must match the movprfx.
+ if ((MCID.TSFlags & AArch64::ElementSizeMask) != Prefix.getElementSize())
+ return Error(IDLoc, "instruction is unpredictable when following a"
+ " predicated movprfx with a different element size");
+ }
+ }
+
// Check for indexed addressing modes w/ the base register being the
// same as a destination/source register or pair load where
// the Rt == Rt2. All of those are undefined behaviour.
@@ -4516,7 +4656,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
NumOperands = Operands.size();
for (unsigned i = 1; i < NumOperands; ++i)
OperandLocs.push_back(Operands[i]->getStartLoc());
- if (validateInstruction(Inst, OperandLocs))
+ if (validateInstruction(Inst, IDLoc, OperandLocs))
return true;
Inst.setLoc(IDLoc);
@@ -4719,7 +4859,6 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
const MCObjectFileInfo::Environment Format =
getContext().getObjectFileInfo()->getObjectFileType();
bool IsMachO = Format == MCObjectFileInfo::IsMachO;
- bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
StringRef IDVal = DirectiveID.getIdentifier();
SMLoc Loc = DirectiveID.getLoc();
@@ -4733,14 +4872,14 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveLtorg(Loc);
else if (IDVal == ".unreq")
parseDirectiveUnreq(Loc);
- else if (!IsMachO && !IsCOFF) {
- if (IDVal == ".inst")
- parseDirectiveInst(Loc);
+ else if (IDVal == ".inst")
+ parseDirectiveInst(Loc);
+ else if (IsMachO) {
+ if (IDVal == MCLOHDirectiveName())
+ parseDirectiveLOH(IDVal, Loc);
else
return true;
- } else if (IDVal == MCLOHDirectiveName())
- parseDirectiveLOH(IDVal, Loc);
- else
+ } else
return true;
return false;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 1b949b54590c..dee964df2635 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -39,4 +39,16 @@ void AArch64TargetStreamer::emitCurrentConstantPool() {
// finish() - write out any non-empty assembler constant pools.
void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
-void AArch64TargetStreamer::emitInst(uint32_t Inst) {}
+void AArch64TargetStreamer::emitInst(uint32_t Inst) {
+ char Buffer[4];
+
+ // We can't just use EmitIntValue here, as that will swap the
+ // endianness on big-endian systems (instructions are always
+ // little-endian).
+ for (unsigned I = 0; I < 4; ++I) {
+ Buffer[I] = uint8_t(Inst);
+ Inst >>= 8;
+ }
+
+ getStreamer().EmitBytes(StringRef(Buffer, 4));
+}
diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td
index 17b3f6041279..7a8dd8bc5aee 100644
--- a/lib/Target/AArch64/SVEInstrFormats.td
+++ b/lib/Target/AArch64/SVEInstrFormats.td
@@ -282,6 +282,79 @@ let Predicates = [HasSVE] in {
//===----------------------------------------------------------------------===//
+// SVE Predicate Misc Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_pfalse<bits<6> opc, string asm>
+: I<(outs PPR8:$Pd), (ins),
+ asm, "\t$Pd",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{5-4};
+ let Inst{21-19} = 0b011;
+ let Inst{18-16} = opc{3-1};
+ let Inst{15-10} = 0b111001;
+ let Inst{9} = opc{0};
+ let Inst{8-4} = 0b00000;
+ let Inst{3-0} = Pd;
+}
+
+class sve_int_ptest<bits<6> opc, string asm>
+: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
+ asm, "\t$Pg, $Pn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pg;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{5-4};
+ let Inst{21-19} = 0b010;
+ let Inst{18-16} = opc{3-1};
+ let Inst{15-14} = 0b11;
+ let Inst{13-10} = Pg;
+ let Inst{9} = opc{0};
+ let Inst{8-5} = Pn;
+ let Inst{4-0} = 0b00000;
+
+ let Defs = [NZCV];
+}
+
+class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
+ PPRRegOp pprty>
+: I<(outs pprty:$Pdn), (ins PPRAny:$Pg, pprty:$_Pdn),
+ asm, "\t$Pdn, $Pg, $_Pdn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pdn;
+ bits<4> Pg;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-19} = 0b011;
+ let Inst{18-16} = opc{4-2};
+ let Inst{15-11} = 0b11000;
+ let Inst{10-9} = opc{1-0};
+ let Inst{8-5} = Pg;
+ let Inst{4} = 0;
+ let Inst{3-0} = Pdn;
+
+ let Constraints = "$Pdn = $_Pdn";
+ let Defs = [NZCV];
+}
+
+multiclass sve_int_pfirst<bits<5> opc, string asm> {
+ def : sve_int_pfirst_next<0b01, opc, asm, PPR8>;
+}
+
+multiclass sve_int_pnext<bits<5> opc, string asm> {
+ def _B : sve_int_pfirst_next<0b00, opc, asm, PPR8>;
+ def _H : sve_int_pfirst_next<0b01, opc, asm, PPR16>;
+ def _S : sve_int_pfirst_next<0b10, opc, asm, PPR32>;
+ def _D : sve_int_pfirst_next<0b11, opc, asm, PPR64>;
+}
+
+//===----------------------------------------------------------------------===//
// SVE Predicate Count Group
//===----------------------------------------------------------------------===//
@@ -348,6 +421,8 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_count_v<bits<5> opc, string asm> {
@@ -433,6 +508,8 @@ class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty> {
@@ -738,6 +815,8 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_perm_insrs<string asm> {
@@ -762,6 +841,8 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_perm_insrv<string asm> {
@@ -790,6 +871,8 @@ class sve_int_perm_extract_i<string asm>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
//===----------------------------------------------------------------------===//
@@ -883,6 +966,8 @@ class sve_int_log_imm<bits<2> opc, string asm>
let Constraints = "$Zdn = $_Zdn";
let DecoderMethod = "DecodeSVELogicalImmInstruction";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_log_imm<bits<2> opc, string asm, string alias> {
@@ -993,6 +1078,8 @@ class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, Operand imm_ty> {
@@ -1020,6 +1107,8 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_2op_p_zds<bits<4> opc, string asm> {
@@ -1045,6 +1134,8 @@ class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_fp_ftmad<string asm> {
@@ -1106,6 +1197,8 @@ class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm> {
@@ -1135,6 +1228,8 @@ class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm> {
@@ -1163,6 +1258,8 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bit opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm> {
@@ -1253,6 +1350,8 @@ class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_fcmla<string asm> {
@@ -1284,6 +1383,8 @@ class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_fp_fcmla_by_indexed_elem<string asm> {
@@ -1325,6 +1426,8 @@ class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_fcadd<string asm> {
@@ -1405,7 +1508,7 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm> {
//===----------------------------------------------------------------------===//
class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
- RegisterOperand o_zprtype>
+ RegisterOperand o_zprtype, ElementSizeEnum size>
: I<(outs o_zprtype:$Zd), (ins i_zprtype:$_Zd, PPR3bAny:$Pg, i_zprtype:$Zn),
asm, "\t$Zd, $Pg/m, $Zn",
"",
@@ -1423,12 +1526,14 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = size;
}
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> {
- def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16>;
- def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32>;
- def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64>;
+ def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
+ def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
+ def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
}
//===----------------------------------------------------------------------===//
@@ -1480,6 +1585,8 @@ class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_bin_pred_log<bits<3> opc, string asm> {
@@ -1541,6 +1648,8 @@ class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm> {
@@ -1571,6 +1680,8 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm> {
@@ -1601,6 +1712,8 @@ class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty1.ElementSize;
}
multiclass sve_intx_dot<bit opc, string asm> {
@@ -1629,6 +1742,8 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
@@ -1670,6 +1785,8 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm> {
@@ -1800,6 +1917,8 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_arith_imm0<bits<3> opc, string asm> {
@@ -1825,6 +1944,8 @@ class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_arith_imm1<bits<2> opc, string asm, Operand immtype> {
@@ -1885,6 +2006,8 @@ class sve_int_dup_fpimm_pred<bits<2> sz, Operand fpimmtype,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_dup_fpimm_pred<string asm> {
@@ -1917,6 +2040,9 @@ class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm,
let Inst{13} = imm{8}; // sh
let Inst{12-5} = imm{7-0}; // imm8
let Inst{4-0} = Zd;
+
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_dup_imm_pred_merge<string asm> {
@@ -2083,6 +2209,65 @@ multiclass sve_int_ucmp_vi<bits<2> opc, string asm> {
//===----------------------------------------------------------------------===//
+// SVE Integer Compare - Scalars Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt>
+: I<(outs), (ins rt:$Rn, rt:$Rm),
+ asm, "\t$Rn, $Rm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-23} = 0b001001011;
+ let Inst{22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-10} = 0b001000;
+ let Inst{9-5} = Rn;
+ let Inst{4} = opc;
+ let Inst{3-0} = 0b0000;
+
+ let Defs = [NZCV];
+}
+
+class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
+ RegisterClass gprty, PPRRegOp pprty>
+: I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm),
+ asm, "\t$Pd, $Rn, $Rm",
+ "", []>, Sched<[]> {
+ bits<4> Pd;
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = opc{3-1};
+ let Inst{9-5} = Rn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Defs = [NZCV];
+}
+
+multiclass sve_int_while4_rr<bits<3> opc, string asm> {
+ def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
+ def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
+ def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
+ def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>;
+}
+
+multiclass sve_int_while8_rr<bits<3> opc, string asm> {
+ def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
+ def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
+ def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
+ def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
+}
+
+
+//===----------------------------------------------------------------------===//
// SVE Floating Point Fast Reduction Group
//===----------------------------------------------------------------------===//
@@ -2312,9 +2497,9 @@ multiclass sve_int_index_rr<string asm> {
//===----------------------------------------------------------------------===//
// SVE Bitwise Shift - Predicated Group
//===----------------------------------------------------------------------===//
-
class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
- ZPRRegOp zprty, Operand immtype>
+ ZPRRegOp zprty, Operand immtype,
+ ElementSizeEnum size>
: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm),
asm, "\t$Zdn, $Pg/m, $_Zdn, $imm",
"",
@@ -2333,31 +2518,41 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = size;
}
multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> {
- def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
- def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
+ def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8,
+ ElementSizeB>;
+ def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16,
+ ElementSizeH> {
let Inst{8} = imm{3};
}
- def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
+ def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32,
+ ElementSizeS> {
let Inst{9-8} = imm{4-3};
}
- def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
+ def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64,
+ ElementSizeD> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}
}
multiclass sve_int_bin_pred_shift_imm_right<bits<3> opc, string asm> {
- def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
- def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+ def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
+ ElementSizeB>;
+ def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
+ ElementSizeH> {
let Inst{8} = imm{3};
}
- def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+ def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32,
+ ElementSizeS> {
let Inst{9-8} = imm{4-3};
}
- def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+ def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64,
+ ElementSizeD> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}
@@ -2383,6 +2578,8 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_bin_pred_shift<bits<3> opc, string asm> {
@@ -3017,6 +3214,8 @@ class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_perm_clast_zz<bit ab, string asm> {
@@ -3094,6 +3293,8 @@ class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_perm_splice<string asm> {
@@ -3122,6 +3323,8 @@ class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_perm_rev_rbit<string asm> {
@@ -3163,6 +3366,8 @@ class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_perm_cpy_r<string asm> {
@@ -3198,6 +3403,8 @@ class sve_int_perm_cpy_v<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_perm_cpy_v<string asm> {
@@ -4117,3 +4324,133 @@ multiclass sve_int_reduce_2<bits<3> opc, string asm> {
def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>;
def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>;
}
+
+class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
+ ZPRRegOp zprty, string pg_suffix, dag iops>
+: I<(outs zprty:$Zd), iops,
+ asm, "\t$Zd, $Pg"#pg_suffix#", $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_32;
+ let Inst{21-19} = 0b010;
+ let Inst{18-16} = opc;
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_movprfx_pred_merge<bits<3> opc, string asm> {
+let Constraints = "$Zd = $_Zd" in {
+ def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/m",
+ (ins ZPR8:$_Zd, PPR3bAny:$Pg, ZPR8:$Zn)>;
+ def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/m",
+ (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR16:$Zn)>;
+ def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/m",
+ (ins ZPR32:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn)>;
+ def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/m",
+ (ins ZPR64:$_Zd, PPR3bAny:$Pg, ZPR64:$Zn)>;
+}
+}
+
+multiclass sve_int_movprfx_pred_zero<bits<3> opc, string asm> {
+ def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/z",
+ (ins PPR3bAny:$Pg, ZPR8:$Zn)>;
+ def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/z",
+ (ins PPR3bAny:$Pg, ZPR16:$Zn)>;
+ def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/z",
+ (ins PPR3bAny:$Pg, ZPR32:$Zn)>;
+ def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/z",
+ (ins PPR3bAny:$Pg, ZPR64:$Zn)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Propagate Break Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_brkp<bits<2> opc, string asm>
+: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm),
+ asm, "\t$Pd, $Pg/z, $Pn, $Pm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pg;
+ bits<4> Pm;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23} = 0b0;
+ let Inst{22} = opc{1};
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = Pm;
+ let Inst{15-14} = 0b11;
+ let Inst{13-10} = Pg;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Partition Break Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_brkn<bit S, string asm>
+: I<(outs PPR8:$Pdm), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$_Pdm),
+ asm, "\t$Pdm, $Pg/z, $Pn, $_Pdm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pdm;
+ bits<4> Pg;
+ bits<4> Pn;
+ let Inst{31-23} = 0b001001010;
+ let Inst{22} = S;
+ let Inst{21-14} = 0b01100001;
+ let Inst{13-10} = Pg;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Pdm;
+
+ let Constraints = "$Pdm = $_Pdm";
+ let Defs = !if(!eq (S, 0b1), [NZCV], []);
+}
+
+class sve_int_break<bits<3> opc, string asm, string suffix, dag iops>
+: I<(outs PPR8:$Pd), iops,
+ asm, "\t$Pd, $Pg"#suffix#", $Pn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pg;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{2-1};
+ let Inst{21-14} = 0b01000001;
+ let Inst{13-10} = Pg;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Constraints = !if(!eq (opc{0}, 1), "$Pd = $_Pd", "");
+ let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
+
+}
+
+multiclass sve_int_break_m<bits<3> opc, string asm> {
+ def NAME : sve_int_break<opc, asm, "/m", (ins PPR8:$_Pd, PPRAny:$Pg, PPR8:$Pn)>;
+}
+
+multiclass sve_int_break_z<bits<3> opc, string asm> {
+ def NAME : sve_int_break<opc, asm, "/z", (ins PPRAny:$Pg, PPR8:$Pn)>;
+}
+
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index b201126c593b..21e44e9589d3 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -554,6 +554,7 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT:
+ case ISD::FCANONICALIZE:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::RCP_IFLAG:
@@ -907,6 +908,7 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
LLVMContext &Ctx = Fn.getParent()->getContext();
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn);
+ CallingConv::ID CC = Fn.getCallingConv();
unsigned MaxAlign = 1;
uint64_t ExplicitArgOffset = 0;
@@ -940,16 +942,10 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
EVT ArgVT = ValueVTs[Value];
EVT MemVT = ArgVT;
- MVT RegisterVT =
- getRegisterTypeForCallingConv(Ctx, ArgVT);
- unsigned NumRegs =
- getNumRegistersForCallingConv(Ctx, ArgVT);
-
- if (!Subtarget->isAmdHsaOS() &&
- (ArgVT == MVT::i16 || ArgVT == MVT::i8 || ArgVT == MVT::f16)) {
- // The ABI says the caller will extend these values to 32-bits.
- MemVT = ArgVT.isInteger() ? MVT::i32 : MVT::f32;
- } else if (NumRegs == 1) {
+ MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT);
+ unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT);
+
+ if (NumRegs == 1) {
// This argument is not split, so the IR type is the memory type.
if (ArgVT.isExtended()) {
// We have an extended type, like i24, so we should just use the
@@ -3600,6 +3596,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
case ISD::FRINT:
case ISD::FNEARBYINT: // XXX - Should fround be handled?
case ISD::FSIN:
+ case ISD::FCANONICALIZE:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::RCP_IFLAG:
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 96b7568eec1f..7442a59e594f 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -342,8 +342,9 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp,
def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>;
def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2",
- SDTypeProfile<1, 3, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>,
- SDTCisFP<0>, SDTCisVec<1>]>,
+ SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>,
+ SDTCisFP<0>, SDTCisVec<1>,
+ SDTCisInt<4>]>,
[]>;
def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td
index 9426df399597..c9c932ef2f5f 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -567,6 +567,7 @@ int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
int FP16_ONE = 0x3C00;
+int FP16_NEG_ONE = 0xBC00;
int V2FP16_ONE = 0x3C003C00;
int FP32_ONE = 0x3f800000;
int FP32_NEG_ONE = 0xbf800000;
diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 8cc7e38f7b29..c147830e12ed 100644
--- a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -100,16 +100,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
unsigned Size = DL.getTypeSizeInBits(ArgTy);
unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
-
- // Clover seems to always pad i8/i16 to i32, but doesn't properly align
- // them?
- // Make sure the struct elements have correct size and alignment for ext
- // args. These seem to be padded up to 4-bytes but not correctly aligned.
- bool IsExtArg = AllocSize < 32 && (Arg.hasZExtAttr() || Arg.hasSExtAttr()) &&
- !ST.isAmdHsaOS();
- if (IsExtArg)
- AllocSize = 4;
-
uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize;
@@ -164,8 +154,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
ArgPtr->getName() + ".cast");
}
- assert((!IsExtArg || !IsV3) && "incompatible situation");
-
if (IsV3 && Size >= 32) {
V4Ty = VectorType::get(VT->getVectorElementType(), 4);
// Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
@@ -212,20 +200,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
// TODO: Convert noalias arg to !noalias
if (Size < 32 && !ArgTy->isAggregateType()) {
- if (IsExtArg && OffsetDiff == 0) {
- Type *I32Ty = Builder.getInt32Ty();
- bool IsSext = Arg.hasSExtAttr();
- Metadata *LowAndHigh[] = {
- ConstantAsMetadata::get(
- ConstantInt::get(I32Ty, IsSext ? minIntN(Size) : 0)),
- ConstantAsMetadata::get(
- ConstantInt::get(I32Ty,
- IsSext ? maxIntN(Size) + 1 : maxUIntN(Size) + 1))
- };
-
- Load->setMetadata(LLVMContext::MD_range, MDNode::get(Ctx, LowAndHigh));
- }
-
Value *ExtractBits = OffsetDiff == 0 ?
Load : Builder.CreateLShr(Load, OffsetDiff * 8);
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td
index 1e0bc62c45a6..44c2d366e461 100644
--- a/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/lib/Target/AMDGPU/MIMGInstructions.td
@@ -66,6 +66,22 @@ def MIMGDimInfoTable : GenericTable {
let PrimaryKeyName = "getMIMGDimInfo";
}
+class MIMGLZMapping<MIMGBaseOpcode l, MIMGBaseOpcode lz> {
+ MIMGBaseOpcode L = l;
+ MIMGBaseOpcode LZ = lz;
+}
+
+def MIMGLZMappingTable : GenericTable {
+ let FilterClass = "MIMGLZMapping";
+ let CppTypeName = "MIMGLZMappingInfo";
+ let Fields = ["L", "LZ"];
+ GenericEnum TypeOf_L = MIMGBaseOpcode;
+ GenericEnum TypeOf_LZ = MIMGBaseOpcode;
+
+ let PrimaryKey = ["L"];
+ let PrimaryKeyName = "getMIMGLZMappingInfo";
+}
+
class mimg <bits<7> si, bits<7> vi = si> {
field bits<7> SI = si;
field bits<7> VI = vi;
@@ -547,3 +563,13 @@ foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
AMDGPUImageDimAtomicIntrinsics) in {
def : ImageDimIntrinsicInfo<intr>;
}
+
+// L to LZ Optimization Mapping
+def : MIMGLZMapping<IMAGE_SAMPLE_L, IMAGE_SAMPLE_LZ>;
+def : MIMGLZMapping<IMAGE_SAMPLE_C_L, IMAGE_SAMPLE_C_LZ>;
+def : MIMGLZMapping<IMAGE_SAMPLE_L_O, IMAGE_SAMPLE_LZ_O>;
+def : MIMGLZMapping<IMAGE_SAMPLE_C_L_O, IMAGE_SAMPLE_C_LZ_O>;
+def : MIMGLZMapping<IMAGE_GATHER4_L, IMAGE_GATHER4_LZ>;
+def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;
+def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;
+def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 5b7fc2656a20..25007861fd15 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -694,6 +694,87 @@ bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
return false;
}
+MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ // TODO: Consider splitting all arguments into 32-bit pieces.
+ if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ EVT ScalarVT = VT.getScalarType();
+ unsigned Size = ScalarVT.getSizeInBits();
+ if (Size == 32)
+ return ScalarVT.getSimpleVT();
+
+ if (Size == 64)
+ return MVT::i32;
+
+ if (Size == 16 &&
+ Subtarget->has16BitInsts() &&
+ isPowerOf2_32(VT.getVectorNumElements()))
+ return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
+ }
+
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+}
+
+unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT ScalarVT = VT.getScalarType();
+ unsigned Size = ScalarVT.getSizeInBits();
+
+ if (Size == 32)
+ return NumElts;
+
+ if (Size == 64)
+ return 2 * NumElts;
+
+ // FIXME: Fails to break down as we want with v3.
+ if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts))
+ return VT.getVectorNumElements() / 2;
+ }
+
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+}
+
+unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC,
+ EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+ if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT ScalarVT = VT.getScalarType();
+ unsigned Size = ScalarVT.getSizeInBits();
+ if (Size == 32) {
+ RegisterVT = ScalarVT.getSimpleVT();
+ IntermediateVT = RegisterVT;
+ NumIntermediates = NumElts;
+ return NumIntermediates;
+ }
+
+ if (Size == 64) {
+ RegisterVT = MVT::i32;
+ IntermediateVT = RegisterVT;
+ NumIntermediates = 2 * NumElts;
+ return NumIntermediates;
+ }
+
+ // FIXME: We should fix the ABI to be the same on targets without 16-bit
+ // support, but unless we can properly handle 3-vectors, it will be still be
+ // inconsistent.
+ if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts)) {
+ RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
+ IntermediateVT = RegisterVT;
+ NumIntermediates = NumElts / 2;
+ return NumIntermediates;
+ }
+ }
+
+ return TargetLowering::getVectorTypeBreakdownForCallingConv(
+ Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
+}
+
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
MachineFunction &MF,
@@ -1268,6 +1349,8 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
const ISD::InputArg *Arg = &Ins[I];
+ assert(!Arg->VT.isVector() && "vector type argument should have been split");
+
// First check if it's a PS input addr.
if (CallConv == CallingConv::AMDGPU_PS &&
!Arg->Flags.isInReg() && !Arg->Flags.isByVal() && PSInputNum <= 15) {
@@ -1301,25 +1384,7 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
++PSInputNum;
}
- // Second split vertices into their elements.
- if (Arg->VT.isVector()) {
- ISD::InputArg NewArg = *Arg;
- NewArg.Flags.setSplit();
- NewArg.VT = Arg->VT.getVectorElementType();
-
- // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
- // three or five element vertex only needs three or five registers,
- // NOT four or eight.
- Type *ParamType = FType->getParamType(Arg->getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- for (unsigned J = 0; J != NumElements; ++J) {
- Splits.push_back(NewArg);
- NewArg.PartOffset += NewArg.VT.getStoreSize();
- }
- } else {
- Splits.push_back(*Arg);
- }
+ Splits.push_back(*Arg);
}
}
@@ -4490,6 +4555,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
+ const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
+ AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
+ unsigned IntrOpcode = Intr->BaseOpcode;
SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
bool IsD16 = false;
@@ -4575,6 +4643,18 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
SmallVector<SDValue, 4> VAddrs;
for (unsigned i = 0; i < NumVAddrs; ++i)
VAddrs.push_back(Op.getOperand(AddrIdx + i));
+
+ // Optimize _L to _LZ when _L is zero
+ if (LZMappingInfo) {
+ if (auto ConstantLod =
+ dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) {
+ if (ConstantLod->isZero() || ConstantLod->isNegative()) {
+ IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
+ VAddrs.pop_back(); // remove 'lod'
+ }
+ }
+ }
+
SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
@@ -4634,10 +4714,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
int Opcode = -1;
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx8,
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
NumVDataDwords, NumVAddrDwords);
if (Opcode == -1)
- Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx6,
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);
@@ -4945,7 +5025,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_fdot2:
return DAG.getNode(AMDGPUISD::FDOT2, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(4));
case Intrinsic::amdgcn_fmul_legacy:
return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT,
Op.getOperand(1), Op.getOperand(2));
@@ -6754,10 +6835,6 @@ static bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 ||
ST->hasFP16Denormals();
- case ISD::FP16_TO_FP:
- case ISD::FP_TO_FP16:
- return ST->hasFP16Denormals();
-
// It can/will be lowered or combined as a bit operation.
// Need to check their input recursively to handle.
case ISD::FNEG:
@@ -6799,8 +6876,16 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
- ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0));
+ SDValue N0 = N->getOperand(0);
+ // fcanonicalize undef -> qnan
+ if (N0.isUndef()) {
+ EVT VT = N->getValueType(0);
+ APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
+ return DAG.getConstantFP(QNaN, SDLoc(N), VT);
+ }
+
+ ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0);
if (!CFP) {
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType().getScalarType();
@@ -6853,7 +6938,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
}
- return N->getOperand(0);
+ return N0;
}
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
@@ -7544,8 +7629,10 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
return SDValue();
if ((Vec1 == Vec3 && Vec2 == Vec4) ||
- (Vec1 == Vec4 && Vec2 == Vec3))
- return DAG.getNode(AMDGPUISD::FDOT2, SL, MVT::f32, Vec1, Vec2, FMAAcc);
+ (Vec1 == Vec4 && Vec2 == Vec3)) {
+ return DAG.getNode(AMDGPUISD::FDOT2, SL, MVT::f32, Vec1, Vec2, FMAAcc,
+ DAG.getTargetConstant(0, SL, MVT::i1));
+ }
}
return SDValue();
}
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index ad049f2a71c3..5b3d49b3d8e3 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -25,6 +25,19 @@ class SITargetLowering final : public AMDGPUTargetLowering {
private:
const GCNSubtarget *Subtarget;
+public:
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const override;
+ unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const override;
+
+ unsigned getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const override;
+
+private:
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
SDValue Chain, uint64_t Offset) const;
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp
index 61c8f359e168..dc9397cf7b85 100644
--- a/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -133,28 +133,10 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
return true;
- // V_READFIRSTLANE/V_READLANE destination register may be used as operand
- // by some SALU instruction. If exec mask is zero vector instruction
- // defining the register that is used by the scalar one is not executed
- // and scalar instruction will operate on undefined data. For
- // V_READFIRSTLANE/V_READLANE we should avoid predicated execution.
- if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) ||
- (I->getOpcode() == AMDGPU::V_READLANE_B32)) {
+ if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
return true;
- }
-
- if (I->isInlineAsm()) {
- const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
- const char *AsmStr = I->getOperand(0).getSymbolName();
-
- // inlineasm length estimate is number of bytes assuming the longest
- // instruction.
- uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
- NumInstr += MaxAsmSize / MAI->getMaxInstLength();
- } else {
- ++NumInstr;
- }
+ ++NumInstr;
if (NumInstr >= SkipThreshold)
return true;
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6c85c92454c3..f3745382a6f4 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2332,6 +2332,36 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
changesVGPRIndexingMode(MI);
}
+bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+
+ if (MI.mayStore() && isSMRD(MI))
+ return true; // scalar store or atomic
+
+ // These instructions cause shader I/O that may cause hardware lockups
+ // when executed with an empty EXEC mask.
+ //
+ // Note: exp with VM = DONE = 0 is automatically skipped by hardware when
+ // EXEC = 0, but checking for that case here seems not worth it
+ // given the typical code patterns.
+ if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
+ Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
+ return true;
+
+ if (MI.isInlineAsm())
+ return true; // conservative assumption
+
+ // These are like SALU instructions in terms of effects, so it's questionable
+ // whether we should return true for those.
+ //
+ // However, executing them with EXEC = 0 causes them to operate on undefined
+ // data, which we avoid by returning true here.
+ if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || Opcode == AMDGPU::V_READLANE_B32)
+ return true;
+
+ return false;
+}
+
bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
switch (Imm.getBitWidth()) {
case 32:
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 0a735257d34e..d681b926504e 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -597,6 +597,9 @@ public:
return !RI.isSGPRReg(MRI, Dest);
}
+ /// Whether we must prevent this instruction from executing with EXEC = 0.
+ bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
+
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index c3f8bfb53ef4..5c10646161b3 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1387,6 +1387,11 @@ def : GCNPat<
>;
def : GCNPat<
+ (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
+ (V_MUL_F16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src, 0, 0)
+>;
+
+def : GCNPat<
(fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
(V_PK_MUL_F16 0, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
>;
@@ -1411,6 +1416,11 @@ def : GCNPat<
(fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
(V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0)
>;
+
+def : GCNPat<
+ (fcanonicalize (f32 (fneg (VOP3Mods f32:$src, i32:$src_mods)))),
+ (V_MUL_F32_e64 0, (i32 CONST.FP32_NEG_ONE), $src_mods, $src, 0, 0)
+>;
}
let OtherPredicates = [FP32Denormals] in {
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 3fd3c75874a3..4eba19382315 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -110,6 +110,7 @@ struct MIMGInfo {
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
+#define GET_MIMGLZMappingTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 70681c271697..5b7af8268cda 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,6 +42,7 @@ namespace AMDGPU {
#define GET_MIMGBaseOpcode_DECL
#define GET_MIMGDim_DECL
#define GET_MIMGEncoding_DECL
+#define GET_MIMGLZMapping_DECL
#include "AMDGPUGenSearchableTables.inc"
namespace IsaInfo {
@@ -211,6 +212,14 @@ struct MIMGDimInfo {
LLVM_READONLY
const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
+struct MIMGLZMappingInfo {
+ MIMGBaseOpcode L;
+ MIMGBaseOpcode LZ;
+};
+
+LLVM_READONLY
+const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
+
LLVM_READONLY
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
unsigned VDataDwords, unsigned VAddrDwords);
diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td
index 5c78ada3211e..b51828b54679 100644
--- a/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -167,13 +167,30 @@ defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
let SubtargetPredicate = HasDLInsts in {
-def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>, AMDGPUfdot2>;
-def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2>;
-def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2>;
-def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4>;
-def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4>;
-def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8>;
-def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8>;
+def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
+def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
+def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
+def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+
+multiclass DotPats<SDPatternOperator dot_op,
+ VOP3PInst dot_inst> {
+ def : GCNPat <
+ (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)),
+ (dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)),
+ (dot_inst.Pfl.Src2VT (VOP3PMods dot_inst.Pfl.Src2VT:$src2, i32:$src2_modifiers)), i1:$clamp),
+ (dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1imm $clamp))>;
+}
+
+defm : DotPats<AMDGPUfdot2, V_DOT2_F32_F16>;
+defm : DotPats<int_amdgcn_sdot2, V_DOT2_I32_I16>;
+defm : DotPats<int_amdgcn_udot2, V_DOT2_U32_U16>;
+defm : DotPats<int_amdgcn_sdot4, V_DOT4_I32_I8>;
+defm : DotPats<int_amdgcn_udot4, V_DOT4_U32_U8>;
+defm : DotPats<int_amdgcn_sdot8, V_DOT8_I32_I4>;
+defm : DotPats<int_amdgcn_udot8, V_DOT8_U32_U4>;
} // End SubtargetPredicate = HasDLInsts
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 2196f9b47f3b..b227eaed8d61 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -117,7 +117,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// globals from all functions in PromotedGlobals.
for (auto *GV : AFI->getGlobalsPromotedToConstantPool())
PromotedGlobals.insert(GV);
-
+
// Calculate this function's optimization goal.
unsigned OptimizationGoal;
if (F.hasFnAttribute(Attribute::OptimizeNone))
@@ -367,8 +367,9 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
unsigned RC;
- InlineAsm::hasRegClassConstraint(Flags, RC);
- if (RC == ARM::GPRPairRegClassID) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ if (InlineAsm::hasRegClassConstraint(Flags, RC) &&
+ ARM::GPRPairRegClass.hasSubClassEq(TRI->getRegClass(RC))) {
if (NumVals != 1)
return true;
const MachineOperand &MO = MI->getOperand(OpNum);
@@ -990,7 +991,7 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
if (Subtarget->isThumb1Only())
EmitAlignment(2);
-
+
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
OutStreamer->EmitLabel(JTISymbol);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 43e8b7d66c62..5342e6e2cd13 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -584,7 +584,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// don't know for sure yet whether we'll need that, so we guess based
// on whether there are any local variables that would trigger it.
unsigned StackAlign = TFI->getStackAlignment();
- if (TFI->hasFP(MF) &&
+ if (TFI->hasFP(MF) &&
!((MFI.getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset))
return false;
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 63bf48abb7ac..543165de38d0 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -269,14 +269,15 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
for (auto Reg : RegList)
State.AllocateReg(Reg);
+ // After the first item has been allocated, the rest are packed as tightly as
+ // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
+ // be allocating a bunch of i32 slots).
+ unsigned RestAlign = std::min(Align, Size);
+
for (auto &It : PendingMembers) {
It.convertToMem(State.AllocateStack(Size, Align));
State.addLoc(It);
-
- // After the first item has been allocated, the rest are packed as tightly
- // as possible. (E.g. an incoming i64 would have starting Align of 8, but
- // we'll be allocating a bunch of i32 slots).
- Align = Size;
+ Align = RestAlign;
}
// All pending members have now been allocated
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index de08eb8c6985..2c4738d3cb74 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -2128,7 +2128,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
unsigned DeadSize = 0;
bool CanDeleteLEA = false;
bool BaseRegKill = false;
-
+
unsigned IdxReg = ~0U;
bool IdxRegKill = true;
if (isThumb2) {
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 5139a18f9263..55194ed94532 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -113,7 +113,7 @@ public:
bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
bool isPromotedGlobal() const{ return Kind == ARMCP::CPPromotedGlobal; }
-
+
int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) override;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 26d4aaa12acf..a66cd7053c0a 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -2116,7 +2116,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index af983ce2606a..a8c75702d7b5 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -372,7 +372,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc dl;
-
+
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// Determine the sizes of each callee-save spill areas and record which frame
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 081d4ff033bd..9592dd53c347 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2539,7 +2539,7 @@ void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
}
};
-
+
if (Range->second == 0) {
// 1. Mask includes the LSB -> Simply shift the top N bits off
NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
@@ -2633,7 +2633,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
MachineMemOperand::MOLoad, 4, 4);
cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1);
-
+
ReplaceNode(N, ResNode);
return;
}
@@ -2920,7 +2920,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
assert(N3.getOpcode() == ISD::Register);
unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
-
+
if (InFlag.getOpcode() == ARMISD::CMPZ) {
bool SwitchEQNEToPLMI;
SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
@@ -3023,7 +3023,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
-
+
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 47222a66f798..ede276dd91bb 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3096,7 +3096,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
// need to be duplicated) or duplicating the constant wouldn't increase code
// size (implying the constant is no larger than 4 bytes).
const Function &F = DAG.getMachineFunction().getFunction();
-
+
// We rely on this decision to inline being idemopotent and unrelated to the
// use-site. We know that if we inline a variable at one use site, we'll
// inline it elsewhere too (and reuse the constant pool entry). Fast-isel
@@ -5162,7 +5162,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
return SDValue();
// SoftFP: read half-precision arguments:
//
- // t2: i32,ch = ...
+ // t2: i32,ch = ...
// t7: i16 = truncate t2 <~~~~ Op
// t8: f16 = bitcast t7 <~~~~ N
//
@@ -5173,7 +5173,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
- // Half-precision return values
+ // Half-precision return values
if (SrcVT == MVT::f16 && DstVT == MVT::i16) {
if (!HasFullFP16)
return SDValue();
@@ -13461,13 +13461,13 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!RHS || RHS->getZExtValue() != 4)
return false;
-
+
Offset = Op->getOperand(1);
Base = Op->getOperand(0);
AM = ISD::POST_INC;
return true;
}
-
+
bool isInc;
bool isLegal = false;
if (Subtarget->isThumb2())
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 901138dbdfd5..db5f28480e90 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1275,7 +1275,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// we're minimizing code size.
if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
return false;
-
+
bool HighRegsUsed = false;
for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
if (MI->getOperand(i).getReg() >= ARM::R8) {
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 816116772995..91310e81e398 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -126,7 +126,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// The amount the literal pool has been increasedby due to promoted globals.
int PromotedGlobalsIncrease = 0;
-
+
public:
ARMFunctionInfo() = default;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index d4fbf76f299f..4d685158e258 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -49,7 +49,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
case RTLIB::MEMMOVE:
AEABILibcall = AEABI_MEMMOVE;
break;
- case RTLIB::MEMSET:
+ case RTLIB::MEMSET:
AEABILibcall = AEABI_MEMSET;
if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
if (ConstantSrc->getZExtValue() == 0)
@@ -93,14 +93,14 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
else if (Src.getValueType().bitsLT(MVT::i32))
Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
- Entry.Node = Src;
+ Entry.Node = Src;
Entry.Ty = Type::getInt32Ty(*DAG.getContext());
Entry.IsSExt = false;
Args.push_back(Entry);
} else {
Entry.Node = Src;
Args.push_back(Entry);
-
+
Entry.Node = Size;
Args.push_back(Entry);
}
@@ -121,7 +121,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
std::move(Args))
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
-
+
return CallResult.second;
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index f8cae31641ff..94f9cefe429c 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -389,7 +389,7 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
unsigned NumVectorInstToHideOverhead = 10;
int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && SE &&
+ if (Ty->isVectorTy() && SE &&
!BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index cd9fa0709020..e0cd2d8e26a6 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -153,7 +153,7 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
+ int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
const SCEV *Ptr);
int getArithmeticInstrCost(
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 807d62547337..a5fbbbf26be9 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -969,7 +969,7 @@ public:
// checks whether this operand is a memory operand computed as an offset
// applied to PC. the offset may have 8 bits of magnitude and is represented
- // with two bits of shift. textually it may be either [pc, #imm], #imm or
+ // with two bits of shift. textually it may be either [pc, #imm], #imm or
// relocable expression...
bool isThumbMemPC() const {
int64_t Val = 0;
@@ -2284,7 +2284,7 @@ public:
}
const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val);
-
+
assert(SR && "Unknown value type!");
Inst.addOperand(MCOperand::createExpr(SR));
return;
@@ -2326,7 +2326,7 @@ public:
assert(isImm() && "Not an immediate!");
// If we have an immediate that's not a constant, treat it as a label
- // reference needing a fixup.
+ // reference needing a fixup.
if (!isa<MCConstantExpr>(getImm())) {
Inst.addOperand(MCOperand::createExpr(getImm()));
return;
@@ -3419,7 +3419,7 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
- return -1;
+ return -1;
std::string lowerCase = Tok.getString().lower();
ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
@@ -4311,7 +4311,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- if (!Tok.is(AsmToken::Identifier))
+ if (!Tok.is(AsmToken::Identifier))
return MatchOperand_NoMatch;
StringRef IFlagsStr = Tok.getString();
@@ -4353,7 +4353,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
return MatchOperand_NoMatch;
}
unsigned SYSmvalue = Val & 0xFF;
- Parser.Lex();
+ Parser.Lex();
Operands.push_back(ARMOperand::CreateMSRMask(SYSmvalue, S));
return MatchOperand_Success;
}
@@ -4996,7 +4996,7 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
// first decide whether or not the branch should be conditional
// by looking at it's location relative to an IT block
if(inITBlock()) {
- // inside an IT block we cannot have any conditional branches. any
+ // inside an IT block we cannot have any conditional branches. any
// such instructions needs to be converted to unconditional form
switch(Inst.getOpcode()) {
case ARM::tBcc: Inst.setOpcode(ARM::tB); break;
@@ -5008,11 +5008,11 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
unsigned Cond = static_cast<ARMOperand &>(*Operands[CondOp]).getCondCode();
switch(Inst.getOpcode()) {
case ARM::tB:
- case ARM::tBcc:
- Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc);
+ case ARM::tBcc:
+ Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc);
break;
case ARM::t2B:
- case ARM::t2Bcc:
+ case ARM::t2Bcc:
Inst.setOpcode(Cond == ARMCC::AL ? ARM::t2B : ARM::t2Bcc);
break;
}
@@ -8882,7 +8882,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
case ARM::MOVsi: {
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
// rrx shifts and asr/lsr of #32 is encoded as 0
- if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr)
+ if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr)
return false;
if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) {
// Shifting by zero is accepted as a vanilla 'MOVr'
@@ -9371,6 +9371,12 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveAlign(DirectiveID.getLoc()); // Use Generic on failure.
else if (IDVal == ".thumb_set")
parseDirectiveThumbSet(DirectiveID.getLoc());
+ else if (IDVal == ".inst")
+ parseDirectiveInst(DirectiveID.getLoc());
+ else if (IDVal == ".inst.n")
+ parseDirectiveInst(DirectiveID.getLoc(), 'n');
+ else if (IDVal == ".inst.w")
+ parseDirectiveInst(DirectiveID.getLoc(), 'w');
else if (!IsMachO && !IsCOFF) {
if (IDVal == ".arch")
parseDirectiveArch(DirectiveID.getLoc());
@@ -9382,12 +9388,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveFPU(DirectiveID.getLoc());
else if (IDVal == ".fnstart")
parseDirectiveFnStart(DirectiveID.getLoc());
- else if (IDVal == ".inst")
- parseDirectiveInst(DirectiveID.getLoc());
- else if (IDVal == ".inst.n")
- parseDirectiveInst(DirectiveID.getLoc(), 'n');
- else if (IDVal == ".inst.w")
- parseDirectiveInst(DirectiveID.getLoc(), 'w');
else if (IDVal == ".object_arch")
parseDirectiveObjectArch(DirectiveID.getLoc());
else if (IDVal == ".tlsdescseq")
@@ -10012,8 +10012,8 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
case 'w':
break;
default:
- return Error(Loc, "cannot determine Thumb instruction size, "
- "use inst.n/inst.w instead");
+ Width = 0;
+ break;
}
} else {
if (Suffix)
@@ -10029,6 +10029,7 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
return Error(Loc, "expected constant expression");
}
+ char CurSuffix = Suffix;
switch (Width) {
case 2:
if (Value->getValue() > 0xffff)
@@ -10039,11 +10040,21 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
return Error(Loc, StringRef(Suffix ? "inst.w" : "inst") +
" operand is too big");
break;
+ case 0:
+ // Thumb mode, no width indicated. Guess from the opcode, if possible.
+ if (Value->getValue() < 0xe800)
+ CurSuffix = 'n';
+ else if (Value->getValue() >= 0xe8000000)
+ CurSuffix = 'w';
+ else
+ return Error(Loc, "cannot determine Thumb instruction size, "
+ "use inst.n/inst.w instead");
+ break;
default:
llvm_unreachable("only supported widths are 2 and 4");
}
- getTargetStreamer().emitInst(Value->getValue(), Suffix);
+ getTargetStreamer().emitInst(Value->getValue(), CurSuffix);
return false;
};
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4733cf49827e..61bec04678dd 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -620,7 +620,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
// assume a predicate of AL.
unsigned CC;
CC = ITBlock.getITCC();
- if (CC == 0xF)
+ if (CC == 0xF)
CC = ARMCC::AL;
if (ITBlock.instrInITBlock())
ITBlock.advanceITState();
@@ -888,7 +888,7 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- if (RegNo == 15)
+ if (RegNo == 15)
S = MCDisassembler::SoftFail;
Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
@@ -2171,7 +2171,7 @@ static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
const FeatureBitset &FeatureBits = Dis->getSubtargetInfo().getFeatureBits();
- if (!FeatureBits[ARM::HasV8_1aOps] ||
+ if (!FeatureBits[ARM::HasV8_1aOps] ||
!FeatureBits[ARM::HasV8Ops])
return MCDisassembler::Fail;
@@ -4467,7 +4467,7 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
index = fieldFromInstruction(Insn, 7, 1);
switch (fieldFromInstruction(Insn, 4, 2)) {
- case 0:
+ case 0:
align = 0; break;
case 3:
align = 4; break;
@@ -5279,7 +5279,7 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
return MCDisassembler::Fail;
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
- if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder)))
+ if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 75ed40c18fa2..bfc32073ba18 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -834,7 +834,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
return;
}
- O << SYSm;
+ O << SYSm;
return;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index dfa339091a7b..7d04c73fb3f2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -64,7 +64,7 @@ bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
}
}
-// Need to examine the Fixup when determining whether to
+// Need to examine the Fixup when determining whether to
// emit the relocation as an explicit symbol or as a section relative
// offset
unsigned ARMELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 0dab789505d5..b37b8073548f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -740,7 +740,7 @@ getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand MO = MI.getOperand(OpIdx);
if (MO.isExpr()) {
if (HasConditionalBranch(MI))
- return ::getBranchTargetOpValue(MI, OpIdx,
+ return ::getBranchTargetOpValue(MI, OpIdx,
ARM::fixup_arm_condbl, Fixups, STI);
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups, STI);
}
@@ -766,10 +766,10 @@ uint32_t ARMMCCodeEmitter::getThumbBranchTargetOpValue(
const MCSubtargetInfo &STI) const {
unsigned Val = 0;
const MCOperand MO = MI.getOperand(OpIdx);
-
+
if(MO.isExpr())
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups, STI);
- else
+ else
Val = MO.getImm() >> 1;
bool I = (Val & 0x800000);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 42371736fef4..63aa9735e8a4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -13,6 +13,8 @@
#include "ARMTargetMachine.h"
#include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -47,6 +49,41 @@ void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
// reset() - Reset any state
void ARMTargetStreamer::reset() {}
+void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {
+ unsigned Size;
+ char Buffer[4];
+ const bool LittleEndian = getStreamer().getContext().getAsmInfo()->isLittleEndian();
+
+ switch (Suffix) {
+ case '\0':
+ Size = 4;
+
+ for (unsigned II = 0, IE = Size; II != IE; II++) {
+ const unsigned I = LittleEndian ? (Size - II - 1) : II;
+ Buffer[Size - II - 1] = uint8_t(Inst >> I * CHAR_BIT);
+ }
+
+ break;
+ case 'n':
+ case 'w':
+ Size = (Suffix == 'n' ? 2 : 4);
+
+ // Thumb wide instructions are emitted as a pair of 16-bit words of the
+ // appropriate endianness.
+ for (unsigned II = 0, IE = Size; II != IE; II = II + 2) {
+ const unsigned I0 = LittleEndian ? II + 0 : II + 1;
+ const unsigned I1 = LittleEndian ? II + 1 : II + 0;
+ Buffer[Size - II - 2] = uint8_t(Inst >> I0 * CHAR_BIT);
+ Buffer[Size - II - 1] = uint8_t(Inst >> I1 * CHAR_BIT);
+ }
+
+ break;
+ default:
+ llvm_unreachable("Invalid Suffix");
+ }
+ getStreamer().EmitBytes(StringRef(Buffer, Size));
+}
+
// The remaining callbacks should be handled separately by each
// streamer.
void ARMTargetStreamer::emitFnStart() {}
@@ -76,7 +113,6 @@ void ARMTargetStreamer::emitArchExtension(unsigned ArchExt) {}
void ARMTargetStreamer::emitObjectArch(ARM::ArchKind Arch) {}
void ARMTargetStreamer::emitFPU(unsigned FPU) {}
void ARMTargetStreamer::finishAttributeSection() {}
-void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
void
ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 637e4a44c428..7f03e1463c1d 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -233,7 +233,7 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
// On Swift, we mostly care about hazards from multiplication instructions
// writing the accumulator and the pipelining of loop iterations by out-of-
- // order execution.
+ // order execution.
if (isSwift)
return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index a65e22fd86e8..5c745e112b2e 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -127,7 +127,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc dl;
-
+
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned BasePtr = RegInfo->getBaseRegister();
int CFAOffset = 0;
diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp
index c1515571aae5..1b412a9c6813 100644
--- a/lib/Target/AVR/AVRISelLowering.cpp
+++ b/lib/Target/AVR/AVRISelLowering.cpp
@@ -63,6 +63,13 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setOperationAction(ISD::ADDC, VT, Legal);
+ setOperationAction(ISD::SUBC, VT, Legal);
+ setOperationAction(ISD::ADDE, VT, Legal);
+ setOperationAction(ISD::SUBE, VT, Legal);
+ }
+
// sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types
// revert into a sub since we don't have an add with immediate instruction.
setOperationAction(ISD::ADD, MVT::i32, Custom);
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 4791b067aa8d..ba255d30fede 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1777,6 +1777,7 @@ namespace {
const BitTracker::RegisterCell &RC);
bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
+ bool simplifyRCmp0(MachineInstr *MI, BitTracker::RegisterRef RD);
// Cache of created instructions to avoid creating duplicates.
// XXX Currently only used by genBitSplit.
@@ -2567,6 +2568,127 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
return Changed;
}
+bool BitSimplification::simplifyRCmp0(MachineInstr *MI,
+ BitTracker::RegisterRef RD) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi)
+ return false;
+ MachineOperand &CmpOp = MI->getOperand(2);
+ if (!CmpOp.isImm() || CmpOp.getImm() != 0)
+ return false;
+
+ const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+ if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
+ return false;
+ assert(RD.Sub == 0);
+
+ MachineBasicBlock &B = *MI->getParent();
+ const DebugLoc &DL = MI->getDebugLoc();
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
+ bool KnownZ = true;
+ bool KnownNZ = false;
+
+ BitTracker::RegisterRef SR = MI->getOperand(1);
+ if (!BT.has(SR.Reg))
+ return false;
+ const BitTracker::RegisterCell &SC = BT.lookup(SR.Reg);
+ unsigned F, W;
+ if (!HBS::getSubregMask(SR, F, W, MRI))
+ return false;
+
+ for (uint16_t I = F; I != F+W; ++I) {
+ const BitTracker::BitValue &V = SC[I];
+ if (!V.is(0))
+ KnownZ = false;
+ if (V.is(1))
+ KnownNZ = true;
+ }
+
+ auto ReplaceWithConst = [&] (int C) {
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR)
+ .addImm(C);
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ BitTracker::RegisterCell NewRC(W);
+ for (uint16_t I = 0; I != W; ++I) {
+ NewRC[I] = BitTracker::BitValue(C & 1);
+ C = unsigned(C) >> 1;
+ }
+ BT.put(BitTracker::RegisterRef(NewR), NewRC);
+ return true;
+ };
+
+ auto IsNonZero = [] (const MachineOperand &Op) {
+ if (Op.isGlobal() || Op.isBlockAddress())
+ return true;
+ if (Op.isImm())
+ return Op.getImm() != 0;
+ if (Op.isCImm())
+ return !Op.getCImm()->isZero();
+ if (Op.isFPImm())
+ return !Op.getFPImm()->isZero();
+ return false;
+ };
+
+ auto IsZero = [] (const MachineOperand &Op) {
+ if (Op.isGlobal() || Op.isBlockAddress())
+ return false;
+ if (Op.isImm())
+ return Op.getImm() == 0;
+ if (Op.isCImm())
+ return Op.getCImm()->isZero();
+ if (Op.isFPImm())
+ return Op.getFPImm()->isZero();
+ return false;
+ };
+
+ // If the source register is known to be 0 or non-0, the comparison can
+ // be folded to a load of a constant.
+ if (KnownZ || KnownNZ) {
+ assert(KnownZ != KnownNZ && "Register cannot be both 0 and non-0");
+ return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi));
+ }
+
+ // Special case: if the compare comes from a C2_muxii, then we know the
+ // two possible constants that can be the source value.
+ MachineInstr *InpDef = MRI.getVRegDef(SR.Reg);
+ if (!InpDef)
+ return false;
+ if (SR.Sub == 0 && InpDef->getOpcode() == Hexagon::C2_muxii) {
+ MachineOperand &Src1 = InpDef->getOperand(2);
+ MachineOperand &Src2 = InpDef->getOperand(3);
+ // Check if both are non-zero.
+ bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2);
+ if (KnownNZ1 && KnownNZ2)
+ return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi);
+ // Check if both are zero.
+ bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2);
+ if (KnownZ1 && KnownZ2)
+ return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi);
+
+ // If for both operands we know that they are either 0 or non-0,
+ // replace the comparison with a C2_muxii, using the same predicate
+ // register, but with operands substituted with 0/1 accordingly.
+ if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) {
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR)
+ .addReg(InpDef->getOperand(1).getReg())
+ .addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi))
+ .addImm(KnownZ2 == (Opc == Hexagon::A4_rcmpeqi));
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ // Create a new cell with only the least significant bit unknown.
+ BitTracker::RegisterCell NewRC(W);
+ NewRC[0] = BitTracker::BitValue::self();
+ NewRC.fill(1, W, BitTracker::BitValue::Zero);
+ BT.put(BitTracker::RegisterRef(NewR), NewRC);
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool BitSimplification::processBlock(MachineBasicBlock &B,
const RegisterSet &AVs) {
if (!BT.reached(&B))
@@ -2615,6 +2737,7 @@ bool BitSimplification::processBlock(MachineBasicBlock &B,
T = T || genExtractHalf(MI, RD, RC);
T = T || genCombineHalf(MI, RD, RC);
T = T || genExtractLow(MI, RD, RC);
+ T = T || simplifyRCmp0(MI, RD);
Changed |= T;
continue;
}
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index e13cfd3f655a..94aacbed6af6 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -347,9 +347,11 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
return rr0(RC, Outputs);
}
case C2_tfrrp: {
- RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
- W0 = 8; // XXX Pred size
- return rr0(eINS(RC, eXTR(rc(1), 0, W0), 0), Outputs);
+ uint16_t RW = W0;
+ uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+ RegisterCell RC = RegisterCell::self(Reg[0].Reg, RW);
+ RC.fill(PW, RW, BT::BitValue::Zero);
+ return rr0(eINS(RC, eXTR(rc(1), 0, PW), 0), Outputs);
}
// Arithmetic:
@@ -950,6 +952,19 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
}
default:
+ // For instructions that define a single predicate registers, store
+ // the low 8 bits of the register only.
+ if (unsigned DefR = getUniqueDefVReg(MI)) {
+ if (MRI.getRegClass(DefR) == &Hexagon::PredRegsRegClass) {
+ BT::RegisterRef PD(DefR, 0);
+ uint16_t RW = getRegBitWidth(PD);
+ uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+ RegisterCell RC = RegisterCell::self(DefR, RW);
+ RC.fill(PW, RW, BT::BitValue::Zero);
+ putCell(PD, RC, Outputs);
+ return true;
+ }
+ }
return MachineEvaluator::evaluate(MI, Inputs, Outputs);
}
#undef im
@@ -1016,6 +1031,21 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI,
return true;
}
+unsigned HexagonEvaluator::getUniqueDefVReg(const MachineInstr &MI) const {
+ unsigned DefReg = 0;
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (DefReg != 0)
+ return 0;
+ DefReg = R;
+ }
+ return DefReg;
+}
+
bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
const CellMapType &Inputs,
CellMapType &Outputs) const {
diff --git a/lib/Target/Hexagon/HexagonBitTracker.h b/lib/Target/Hexagon/HexagonBitTracker.h
index d9dd04e1b088..f0b7c9d91950 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.h
+++ b/lib/Target/Hexagon/HexagonBitTracker.h
@@ -49,6 +49,7 @@ struct HexagonEvaluator : public BitTracker::MachineEvaluator {
const HexagonInstrInfo &TII;
private:
+ unsigned getUniqueDefVReg(const MachineInstr &MI) const;
bool evaluateLoad(const MachineInstr &MI, const CellMapType &Inputs,
CellMapType &Outputs) const;
bool evaluateFormalCopy(const MachineInstr &MI, const CellMapType &Inputs,
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index 183dee36a047..de486ec4b7bd 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -2,7 +2,7 @@
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 2acf701b43cb..ce7db657f5e9 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -7371,7 +7371,7 @@ bool MipsAsmParser::parseDirectiveGpWord() {
getParser().getStreamer().EmitGPRel32Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
- return Error(getLexer().getLoc(),
+ return Error(getLexer().getLoc(),
"unexpected token, expected end of statement");
Parser.Lex(); // Eat EndOfStatement token.
return false;
@@ -7506,7 +7506,7 @@ bool MipsAsmParser::parseDirectiveOption() {
}
// Unknown option.
- Warning(Parser.getTok().getLoc(),
+ Warning(Parser.getTok().getLoc(),
"unknown option, expected 'pic0' or 'pic2'");
Parser.eatToEndOfStatement();
return false;
@@ -8193,7 +8193,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".abicalls") {
getTargetStreamer().emitDirectiveAbiCalls();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
- Error(Parser.getTok().getLoc(),
+ Error(Parser.getTok().getLoc(),
"unexpected token, expected end of statement");
}
return false;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index fdb560f3c72f..d7f6cf91db73 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -114,7 +114,7 @@ namespace Mips {
// resulting in - R_MIPS_GOT_DISP
fixup_Mips_GOT_DISP,
- // resulting in - R_MIPS_HIGHER/R_MICROMIPS_HIGHER
+ // resulting in - R_MIPS_HIGHER/R_MICROMIPS_HIGHER
fixup_Mips_HIGHER,
fixup_MICROMIPS_HIGHER,
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 8ffc0731abcb..2e0c25de2bc8 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1094,7 +1094,7 @@ void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
// ALIGN
// B .tmpN
// 11 NOP instructions (44 bytes)
- // ADDIU T9, T9, 52
+ // ADDIU T9, T9, 52
// .tmpN
//
// We need the 44 bytes (11 instructions) because at runtime, we'd
diff --git a/lib/Target/Mips/MipsCallLowering.cpp b/lib/Target/Mips/MipsCallLowering.cpp
index e82f62260b3f..a705ebb6b193 100644
--- a/lib/Target/Mips/MipsCallLowering.cpp
+++ b/lib/Target/Mips/MipsCallLowering.cpp
@@ -418,7 +418,8 @@ void MipsCallLowering::subTargetRegTypeForCallingConv(
for (auto &Arg : Args) {
EVT VT = TLI.getValueType(DL, Arg.Ty);
- MVT RegisterVT = TLI.getRegisterTypeForCallingConv(F.getContext(), VT);
+ MVT RegisterVT = TLI.getRegisterTypeForCallingConv(F.getContext(),
+ F.getCallingConv(), VT);
ISD::ArgFlagsTy Flags = Arg.Flags;
Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL));
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index 9eb13a68e561..744523cc6cb9 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This pass is used to make Pc relative loads of constants.
-// For now, only Mips16 will use this.
+// For now, only Mips16 will use this.
//
// Loading constants inline is expensive on Mips16 and it's in general better
// to place the constant nearby in code space and then it can be loaded with a
@@ -1171,7 +1171,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
/// findAvailableWater - Look for an existing entry in the WaterList in which
/// we can place the CPE referenced from U so it's within range of U's MI.
/// Returns true if found, false if not. If it returns true, WaterIter
-/// is set to the WaterList entry.
+/// is set to the WaterList entry.
/// To ensure that this pass
/// terminates, the CPE location for a particular CPUser is only allowed to
/// move to a lower address, so search backward from the end of the list and
@@ -1231,7 +1231,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
// If the block does not end in an unconditional branch already, and if the
- // end of the block is within range, make new water there.
+ // end of the block is within range, make new water there.
if (BBHasFallthrough(UserMBB)) {
// Size of branch to insert.
unsigned Delta = 2;
@@ -1258,7 +1258,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
}
}
- // What a big block. Find a place within the block to split it.
+ // What a big block. Find a place within the block to split it.
// Try to split the block so it's fully aligned. Compute the latest split
// point where we can add a 4-byte branch instruction, and then align to
@@ -1582,7 +1582,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
MachineInstr *BMI = &MBB->back();
bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
unsigned OppositeBranchOpcode = TII->getOppositeBranchOpc(Opcode);
-
+
++NumCBrFixed;
if (BMI != MI) {
if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
@@ -1595,7 +1595,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
// bnez L2
// b L1
unsigned BMITargetOperand = branchTargetOperand(BMI);
- MachineBasicBlock *NewDest =
+ MachineBasicBlock *NewDest =
BMI->getOperand(BMITargetOperand).getMBB();
if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
LLVM_DEBUG(
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 7b39507812ed..19b30a44e86a 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -1662,7 +1662,7 @@ bool MipsFastISel::selectRet(const Instruction *I) {
return false;
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 9ffc38356b76..0677d378a115 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -111,6 +111,7 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
// The MIPS MSA ABI passes vector arguments in the integer register set.
// The number of integer registers used is dependant on the ABI used.
MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
if (VT.isVector()) {
if (Subtarget.isABI_O32()) {
@@ -123,6 +124,7 @@ MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
}
unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
if (VT.isVector())
return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)),
@@ -131,10 +133,10 @@ unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
}
unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
- LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const {
// Break down vector types to either 2 i64s or 4 i32s.
- RegisterVT = getRegisterTypeForCallingConv(Context, VT) ;
+ RegisterVT = getRegisterTypeForCallingConv(Context, CC, VT);
IntermediateVT = RegisterVT;
NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits()
? VT.getVectorNumElements()
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index b58d92c370d8..5a0de45c44f3 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -288,17 +288,18 @@ class TargetRegisterClass;
/// Return the register type for a given MVT, ensuring vectors are treated
/// as a series of gpr sized integers.
- MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
EVT VT) const override;
/// Return the number of registers for a given MVT, ensuring vectors are
/// treated as a series of gpr sized integers.
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const override;
/// Break down vectors to the correct number of gpr sized integers.
unsigned getVectorTypeBreakdownForCallingConv(
- LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const override;
/// Return the correct alignment for the current calling convention.
diff --git a/lib/Target/Mips/MipsInstructionSelector.cpp b/lib/Target/Mips/MipsInstructionSelector.cpp
index af0ac006bc9e..6c5b83021f74 100644
--- a/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -166,6 +166,33 @@ bool MipsInstructionSelector::select(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case G_GLOBAL_VALUE: {
+ if (MF.getTarget().isPositionIndependent())
+ return false;
+
+ const llvm::GlobalValue *GVal = I.getOperand(1).getGlobal();
+ unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ MachineInstr *LUi, *ADDiu;
+
+ LUi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
+ .addDef(LUiReg)
+ .addGlobalAddress(GVal);
+ LUi->getOperand(1).setTargetFlags(MipsII::MO_ABS_HI);
+
+ ADDiu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(LUiReg)
+ .addGlobalAddress(GVal);
+ ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO);
+
+ if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
+ return false;
+ if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+ }
default:
return false;
diff --git a/lib/Target/Mips/MipsLegalizerInfo.cpp b/lib/Target/Mips/MipsLegalizerInfo.cpp
index da6f9dabdaaf..fb259516be09 100644
--- a/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -36,6 +36,9 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
getActionDefinitionsBuilder(G_FRAME_INDEX)
.legalFor({p0});
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE)
+ .legalFor({p0});
+
computeTables();
verify(*ST.getInstrInfo());
}
diff --git a/lib/Target/Mips/MipsRegisterBankInfo.cpp b/lib/Target/Mips/MipsRegisterBankInfo.cpp
index cef21f447205..351135079217 100644
--- a/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -88,6 +88,7 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
case G_CONSTANT:
case G_FRAME_INDEX:
+ case G_GLOBAL_VALUE:
OperandsMapping =
getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr});
break;
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 676d702ba63e..896dd0eb0a5e 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -163,7 +163,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// HasEVA -- supports EVA ASE.
bool HasEVA;
-
+
// nomadd4 - disables generation of 4-operand madd.s, madd.d and
// related instructions.
bool DisableMadd4;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 3b042c74b26c..efe98003b1c8 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -248,7 +248,7 @@ protected:
private:
bool GlobalsEmitted;
-
+
// This is specific per MachineFunction.
const MachineRegisterInfo *MRI;
// The contents are specific for each
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index f12ed81b6d9f..ad1d7cbb52fc 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -2,7 +2,7 @@
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 10f1135ad841..5a9115f6f7f1 100644
--- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -2,7 +2,7 @@
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index ea709a73ebf2..fd7f81591426 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -175,7 +175,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O,
+ raw_ostream &O,
const char *Modifier) {
unsigned Code = MI->getOperand(OpNo).getImm();
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index f000fbb98110..351ccefa2da2 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -35,11 +35,11 @@ public:
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
-
+
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
-
+
bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
unsigned PrintMethodIdx,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 8ac461b96b88..fb7bf23509c7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -61,7 +61,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
CommentString = "#";
// Uses '.section' before '.bss' directive
- UsesELFSectionDirectiveForBSS = true;
+ UsesELFSectionDirectiveForBSS = true;
// Debug Information
SupportsDebugInformation = true;
@@ -73,7 +73,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
-
+
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
AssemblerDialect = 1; // New-Style mnemonics.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 2b948ca60028..57bda1403c62 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -102,7 +102,7 @@ public:
unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
-
+
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
@@ -138,7 +138,7 @@ public:
default:
llvm_unreachable("Invalid instruction size");
}
-
+
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
@@ -147,7 +147,7 @@ private:
void verifyInstructionPredicates(const MCInst &MI,
uint64_t AvailableFeatures) const;
};
-
+
} // end anonymous namespace
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
@@ -162,7 +162,7 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
-
+
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_br24));
@@ -212,7 +212,7 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
-
+
// Add a fixup for the immediate field.
Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
@@ -226,11 +226,11 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
// displacement and the next 5 bits as the register #.
assert(MI.getOperand(OpNo+1).isReg());
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 16;
-
+
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits;
-
+
// Add a fixup for the displacement field.
Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
@@ -244,11 +244,11 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
// displacement and the next 5 bits as the register #.
assert(MI.getOperand(OpNo+1).isReg());
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 14;
-
+
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits;
-
+
// Add a fixup for the displacement field.
Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16ds));
@@ -320,7 +320,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups, STI);
-
+
// Add a fixup for the TLS register, which simply provides a relocation
// hint to the linker that this statement is part of a relocation sequence.
// Return the thread-pointer register's encoding.
@@ -373,7 +373,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return Encode;
}
-
+
assert(MO.isImm() &&
"Relocation required in an instruction that we cannot encode!");
return MO.getImm();
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index fe7e7aeeb182..481ba3f09cc7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -58,7 +58,7 @@ namespace PPC {
PRED_BIT_SET = 1024,
PRED_BIT_UNSET = 1025
};
-
+
// Bit for branch taken (plus) or not-taken (minus) hint
enum BranchHintBit {
BR_NO_HINT = 0x0,
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index dfdec246e868..bfc613af3dc0 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -66,7 +66,7 @@ namespace llvm {
extern char &PPCVSXFMAMutateID;
namespace PPCII {
-
+
/// Target Operand Flag enum.
enum TOF {
//===------------------------------------------------------------------===//
@@ -111,7 +111,7 @@ namespace llvm {
MO_TLS = 8 << 4
};
} // end namespace PPCII
-
+
} // end namespace llvm;
#endif
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 64b8f1168beb..0d1bb9297bcb 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -130,7 +130,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
BlockSizes[MBB->getNumber()].first = BlockSize;
FuncSize += BlockSize;
}
-
+
// If the entire function is smaller than the displacement of a branch field,
// we know we don't need to shrink any branches in this function. This is a
// common case.
@@ -138,7 +138,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
BlockSizes.clear();
return false;
}
-
+
// For each conditional branch, if the offset to its destination is larger
// than the offset field allows, transform it into a long branch sequence
// like this:
@@ -153,7 +153,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
while (MadeChange) {
// Iteratively expand branches until we reach a fixed point.
MadeChange = false;
-
+
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
MachineBasicBlock &MBB = *MFI;
@@ -175,7 +175,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
MBBStartOffset += TII->getInstSizeInBytes(*I);
continue;
}
-
+
// Determine the offset from the current branch to the destination
// block.
int BranchSize;
@@ -184,7 +184,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
// start of this block to this branch, plus the sizes of all blocks
// from this block to the dest.
BranchSize = MBBStartOffset;
-
+
for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
BranchSize += BlockSizes[i].first;
} else {
@@ -213,7 +213,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
// 2. Target MBB
PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
unsigned CRReg = I->getOperand(1).getReg();
-
+
// Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
BuildMI(MBB, I, dl, TII->get(PPC::BCC))
.addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
@@ -234,7 +234,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
} else {
llvm_unreachable("Unhandled branch type!");
}
-
+
// Uncond branch to the real destination.
I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
@@ -277,7 +277,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
EverMadeChange |= MadeChange;
}
-
+
BlockSizes.clear();
return true;
}
diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp
index ed5e496b32fd..ac931f7d0ec0 100644
--- a/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -73,7 +73,7 @@ protected:
if ((*PI)->empty())
continue;
-
+
for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
if (J == (*PI)->end())
break;
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index b00655b50229..f212894035db 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1697,7 +1697,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index f0000c5bafd7..84dacf396462 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -174,7 +174,7 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
{PPC::V22, -160},
{PPC::V21, -176},
{PPC::V20, -192},
-
+
// SPE register save area (overlaps Vector save area).
{PPC::S31, -8},
{PPC::S30, -16},
@@ -1229,7 +1229,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
if (MBBI != MBB.end())
dl = MBBI->getDebugLoc();
-
+
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -1315,7 +1315,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
-
+
if (IsReturnBlock) {
unsigned RetOpcode = MBBI->getOpcode();
bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 551220466901..793a4dd7f624 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -50,7 +50,7 @@ bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
return true;
}
- return false;
+ return false;
}
bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
@@ -76,7 +76,7 @@ bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
return true;
}
- return false;
+ return false;
}
// FIXME: Remove this when we don't need this:
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 1e3e14c71144..51ff8a5cf77e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1224,6 +1224,7 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
}
unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv:: ID CC,
EVT VT) const {
if (Subtarget.hasSPE() && VT == MVT::f64)
return 2;
@@ -1231,6 +1232,7 @@ unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
}
MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv:: ID CC,
EVT VT) const {
if (Subtarget.hasSPE() && VT == MVT::f64)
return MVT::i32;
@@ -13102,8 +13104,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue
PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
- SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const {
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
// fold (sdiv X, pow2)
EVT VT = N->getValueType(0);
if (VT == MVT::i64 && !Subtarget.isPPC64())
@@ -13120,13 +13122,11 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
- if (Created)
- Created->push_back(Op.getNode());
+ Created.push_back(Op.getNode());
if (IsNegPow2) {
Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
- if (Created)
- Created->push_back(Op.getNode());
+ Created.push_back(Op.getNode());
}
return Op;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 9b8d6435515b..f174943a8004 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -665,7 +665,7 @@ namespace llvm {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const override;
+ SmallVectorImpl<SDNode *> &Created) const override;
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
@@ -872,9 +872,11 @@ namespace llvm {
MCContext &Ctx) const override;
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv:: ID CC,
EVT VT) const override;
MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv:: ID CC,
EVT VT) const override;
private:
@@ -1141,7 +1143,7 @@ namespace llvm {
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
- bool
+ bool
CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 4669719744bc..0930f7d3b8d7 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -316,11 +316,11 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
}
// For opcodes with the ReMaterializable flag set, this function is called to
-// verify the instruction is really rematable.
+// verify the instruction is really rematable.
bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
AliasAnalysis *AA) const {
switch (MI.getOpcode()) {
- default:
+ default:
// This function should only be called for opcodes with the ReMaterializable
// flag set.
llvm_unreachable("Unknown rematerializable operation!");
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 2217fa4693ce..0b57dd9b618d 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -360,7 +360,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
// generate direct offsets from both the pre-incremented and
// post-incremented pointer values. Thus, we'll pick the first non-prefetch
// instruction in each bucket, and adjust the recurrence and other offsets
- // accordingly.
+ // accordingly.
for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) {
if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr))
if (II->getIntrinsicID() == Intrinsic::prefetch)
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 62a612feb55c..e731c0bc0c23 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -75,7 +75,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
}
return Sym;
}
-
+
return Sym;
}
@@ -130,7 +130,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
// Subtract off the PIC base if required.
if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
const MachineFunction *MF = MO.getParent()->getParent()->getParent();
-
+
const MCExpr *PB = MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
Expr = MCBinaryExpr::createSub(Expr, PB, Ctx);
}
@@ -151,7 +151,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin) {
OutMI.setOpcode(MI->getOpcode());
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MCOperand MCOp;
if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP,
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index dbe1fe37ddf8..0068df19f0c8 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -891,7 +891,7 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
auto BII = BB.getFirstInstrTerminator();
// We optimize BBs ending with a conditional branch.
// We check only for BCC here, not BCCLR, because BCCLR
- // will be formed only later in the pipeline.
+ // will be formed only later in the pipeline.
if (BB.succ_size() == 2 &&
BII != BB.instr_end() &&
(*BII).getOpcode() == PPC::BCC &&
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index b14bbad2039a..8a3f50aa9565 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -29,7 +29,7 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// stored. Also used as an anchor for instructions that need to be altered
/// when using frame pointers (dyna_add, dyna_sub.)
int FramePointerSaveIndex = 0;
-
+
/// ReturnAddrSaveIndex - Frame index of where the return address is stored.
///
int ReturnAddrSaveIndex = 0;
@@ -128,7 +128,7 @@ public:
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
-
+
int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 6647ceace5eb..96923a97a82c 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -979,7 +979,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
SReg = MF.getRegInfo().createVirtualRegister(RC);
// Insert a set of rA with the full offset value before the ld, st, or add
- if (isInt<16>(Offset))
+ if (isInt<16>(Offset))
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI), SReg)
.addImm(Offset);
else {
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 226c75f704f4..b0da9b5a6d70 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -201,7 +201,7 @@ unsigned PPCTTIImpl::getUserCost(const User *U,
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, U->getType());
return LT.first * BaseT::getUserCost(U, Operands);
}
-
+
return BaseT::getUserCost(U, Operands);
}
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 1e8a1750ec3b..1be193e08c01 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -443,7 +443,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
// We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX,
// by adding special handling for narrowing copies as well as
// widening ones. However, I've experimented with this, and in
- // practice we currently do not appear to use STXSDX fed by
+ // practice we currently do not appear to use STXSDX fed by
// a narrowing copy from a full vector register. Since I can't
// generate any useful test cases, I've left this alone for now.
case PPC::STXSDX:
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index c7a5a1e8e6ee..35f52f7d279b 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -190,7 +190,7 @@ public:
Sparc::C8_C9, Sparc::C10_C11, Sparc::C12_C13, Sparc::C14_C15,
Sparc::C16_C17, Sparc::C18_C19, Sparc::C20_C21, Sparc::C22_C23,
Sparc::C24_C25, Sparc::C26_C27, Sparc::C28_C29, Sparc::C30_C31};
-
+
namespace {
/// SparcOperand - Instances of this class represent a parsed Sparc machine
@@ -459,7 +459,7 @@ public:
Op.Reg.Kind = rk_CoprocPairReg;
return true;
}
-
+
static std::unique_ptr<SparcOperand>
MorphToMEMrr(unsigned Base, std::unique_ptr<SparcOperand> Op) {
unsigned offsetReg = Op->getReg();
@@ -1000,7 +1000,7 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
RegKind = SparcOperand::rk_Special;
return true;
}
-
+
if (name.equals("wim")) {
RegNo = Sparc::WIM;
RegKind = SparcOperand::rk_Special;
@@ -1093,7 +1093,7 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
RegKind = SparcOperand::rk_CoprocReg;
return true;
}
-
+
if (name.equals("tpc")) {
RegNo = Sparc::TPC;
RegKind = SparcOperand::rk_Special;
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 8e298e8316da..3e30dae1537f 100644
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -350,18 +350,18 @@ DecodeStatus SparcDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
return MCDisassembler::Fail;
// Calling the auto-generated decoder function.
-
+
if (STI.getFeatureBits()[Sparc::FeatureV9])
{
Result = decodeInstruction(DecoderTableSparcV932, Instr, Insn, Address, this, STI);
}
else
{
- Result = decodeInstruction(DecoderTableSparcV832, Instr, Insn, Address, this, STI);
+ Result = decodeInstruction(DecoderTableSparcV832, Instr, Insn, Address, this, STI);
}
if (Result != MCDisassembler::Fail)
return Result;
-
+
Result =
decodeInstruction(DecoderTableSparc32, Instr, Insn, Address, this, STI);
@@ -662,7 +662,7 @@ static DecodeStatus DecodeTRAP(MCInst &MI, unsigned insn, uint64_t Address,
if (status != MCDisassembler::Success)
return status;
}
-
+
// Decode CC
MI.addOperand(MCOperand::createImm(cc));
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index 4981deae6af6..c1512cbdc44f 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -118,9 +118,9 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
if (MO.isImm()) {
switch (MI->getOpcode()) {
default:
- O << (int)MO.getImm();
+ O << (int)MO.getImm();
return;
-
+
case SP::TICCri: // Fall through
case SP::TICCrr: // Fall through
case SP::TRAPri: // Fall through
@@ -128,7 +128,7 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
case SP::TXCCri: // Fall through
case SP::TXCCrr: // Fall through
// Only seven-bit values up to 127.
- O << ((int) MO.getImm() & 0x7f);
+ O << ((int) MO.getImm() & 0x7f);
return;
}
}
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index 4135e4e1b61d..0cea53b359eb 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -73,7 +73,7 @@ namespace llvm {
FCC_LE = 13+16, // Less or Equal
FCC_ULE = 14+16, // Unordered or Less or Equal
FCC_O = 15+16, // Ordered
-
+
CPCC_A = 8+32, // Always
CPCC_N = 0+32, // Never
CPCC_3 = 7+32,
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index bf700d6a99d8..0cbbda787881 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -59,9 +59,9 @@ namespace llvm {
public:
SparcTargetLowering(const TargetMachine &TM, const SparcSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
-
+
bool useSoftFloat() const override;
-
+
/// computeKnownBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 6750763d8ee5..47b42444b94d 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -115,7 +115,7 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
case SPCC::FCC_UE: return SPCC::FCC_LG;
case SPCC::FCC_NE: return SPCC::FCC_E;
case SPCC::FCC_E: return SPCC::FCC_NE;
-
+
case SPCC::CPCC_A: return SPCC::CPCC_N;
case SPCC::CPCC_N: return SPCC::CPCC_A;
case SPCC::CPCC_3: LLVM_FALLTHROUGH;
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index a0d40653fd9b..07f9e7250bd9 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -100,7 +100,7 @@ SparcTargetMachine::SparcTargetMachine(
SparcTargetMachine::~SparcTargetMachine() {}
-const SparcSubtarget *
+const SparcSubtarget *
SparcTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
@@ -119,7 +119,7 @@ SparcTargetMachine::getSubtargetImpl(const Function &F) const {
F.hasFnAttribute("use-soft-float") &&
F.getFnAttribute("use-soft-float").getValueAsString() == "true";
- if (softFloat)
+ if (softFloat)
FS += FS.empty() ? "+soft-float" : ",+soft-float";
auto &I = SubtargetMap[CPU + FS];
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index d300d1d88abc..b9e5788cf018 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -55,7 +55,7 @@ getNumDecoderSlots(SUnit *SU) const {
else
return 3; // Expanded/group-alone instruction
}
-
+
return 1; // Normal instruction
}
@@ -81,6 +81,7 @@ getHazardType(SUnit *m, int Stalls) {
void SystemZHazardRecognizer::Reset() {
CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
clearProcResCounters();
GrpCount = 0;
LastFPdOpCycleIdx = UINT_MAX;
@@ -99,6 +100,12 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
if (SC->BeginGroup)
return (CurrGroupSize == 0);
+ // An instruction with 4 register operands will not fit in last slot.
+ assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
+ "Current decoder group is already full!");
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return false;
+
// Since a full group is handled immediately in EmitInstruction(),
// SU should fit into current group. NumSlots should be 1 or 0,
// since it is not a cracked or expanded instruction.
@@ -108,6 +115,23 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
return true;
}
+bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+ const MCInstrDesc &MID = MI->getDesc();
+ unsigned Count = 0;
+ for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
+ const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
+ if (RC == nullptr)
+ continue;
+ if (OpIdx >= MID.getNumDefs() &&
+ MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ continue;
+ Count++;
+ }
+ return Count >= 4;
+}
+
void SystemZHazardRecognizer::nextGroup() {
if (CurrGroupSize == 0)
return;
@@ -119,6 +143,7 @@ void SystemZHazardRecognizer::nextGroup() {
// Reset counter for next group.
CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
// Decrease counters for execution units by one.
for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
@@ -142,7 +167,7 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return;
-
+
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
@@ -172,6 +197,8 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
OS << "/EndsGroup";
if (SU->isUnbuffered)
OS << "/Unbuffered";
+ if (has4RegOps(SU->getInstr()))
+ OS << "/4RegOps";
}
void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
@@ -184,6 +211,7 @@ void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
dbgs() << "{ " << CurGroupDbg << " }";
dbgs() << " (" << CurrGroupSize << " decoder slot"
<< (CurrGroupSize > 1 ? "s":"")
+ << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
<< ")\n";
}
}
@@ -294,11 +322,14 @@ EmitInstruction(SUnit *SU) {
// Insert SU into current group by increasing number of slots used
// in current group.
CurrGroupSize += getNumDecoderSlots(SU);
- assert (CurrGroupSize <= 3);
+ CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
+ unsigned GroupLim =
+ ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3);
+ assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!");
// Check if current group is now full/ended. If so, move on to next
// group to be ready to evaluate more candidates.
- if (CurrGroupSize == 3 || SC->EndGroup)
+ if (CurrGroupSize == GroupLim || SC->EndGroup)
nextGroup();
}
@@ -306,7 +337,7 @@ int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return 0;
-
+
// If SU begins new group, it can either break a current group early
// or fit naturally if current group is empty (negative cost).
if (SC->BeginGroup) {
@@ -325,6 +356,10 @@ int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
return -1;
}
+ // An instruction with 4 register operands will not fit in last slot.
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return 1;
+
// Most instructions can be placed in any decoder slot.
return 0;
}
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.h b/lib/Target/SystemZ/SystemZHazardRecognizer.h
index 40cb3acc7009..6292feefbfea 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.h
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -45,15 +45,17 @@ namespace llvm {
/// SystemZHazardRecognizer maintains the state for one MBB during scheduling.
class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
-#ifndef NDEBUG
const SystemZInstrInfo *TII;
-#endif
const TargetSchedModel *SchedModel;
/// Keep track of the number of decoder slots used in the current
/// decoder group.
unsigned CurrGroupSize;
+ /// True if an instruction with four reg operands have been scheduled into
+ /// the current decoder group.
+ bool CurrGroupHas4RegOps;
+
/// The tracking of resources here are quite similar to the common
/// code use of a critical resource. However, z13 differs in the way
/// that it has two processor sides which may be interesting to
@@ -73,6 +75,9 @@ class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
/// Return true if MI fits into current decoder group.
bool fitsIntoCurrentGroup(SUnit *SU) const;
+ /// Return true if this instruction has four register operands.
+ bool has4RegOps(const MachineInstr *MI) const;
+
/// Two decoder groups per cycle are formed (for z13), meaning 2x3
/// instructions. This function returns a number between 0 and 5,
/// representing the current decoder slot of the current cycle. If an SU
@@ -105,11 +110,7 @@ class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
public:
SystemZHazardRecognizer(const SystemZInstrInfo *tii,
const TargetSchedModel *SM)
- :
-#ifndef NDEBUG
- TII(tii),
-#endif
- SchedModel(SM) {
+ : TII(tii), SchedModel(SM) {
Reset();
}
@@ -134,7 +135,7 @@ public:
/// new decoder group, this is negative if this fits the schedule or
/// positive if it would mean ending a group prematurely. For normal
/// instructions this returns 0.
- int groupingCost(SUnit *SU) const;
+ int groupingCost(SUnit *SU) const;
/// Return the cost of SU in regards to processor resources usage.
/// A positive value means it would be better to wait with SU, while
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 302c7883f97b..e76fa71dacd7 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -527,10 +527,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::BSWAP);
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::ROTL);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -609,7 +605,7 @@ struct AddressingMode {
// True if use of index register is supported.
bool IndexReg;
-
+
AddressingMode(bool LongDispl, bool IdxReg) :
LongDisplacement(LongDispl), IndexReg(IdxReg) {}
};
@@ -5524,76 +5520,6 @@ SDValue SystemZTargetLowering::combineBSWAP(
return SDValue();
}
-SDValue SystemZTargetLowering::combineSHIFTROT(
- SDNode *N, DAGCombinerInfo &DCI) const {
-
- SelectionDAG &DAG = DCI.DAG;
-
- // Shift/rotate instructions only use the last 6 bits of the second operand
- // register. If the second operand is the result of an AND with an immediate
- // value that has its last 6 bits set, we can safely remove the AND operation.
- //
- // If the AND operation doesn't have the last 6 bits set, we can't remove it
- // entirely, but we can still truncate it to a 16-bit value. This prevents
- // us from ending up with a NILL with a signed operand, which will cause the
- // instruction printer to abort.
- SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::AND) {
- SDValue AndMaskOp = N1->getOperand(1);
- auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
-
- // The AND mask is constant
- if (AndMask) {
- auto AmtVal = AndMask->getZExtValue();
-
- // Bottom 6 bits are set
- if ((AmtVal & 0x3f) == 0x3f) {
- SDValue AndOp = N1->getOperand(0);
-
- // This is the only use, so remove the node
- if (N1.hasOneUse()) {
- // Combine the AND away
- DCI.CombineTo(N1.getNode(), AndOp);
-
- // Return N so it isn't rechecked
- return SDValue(N, 0);
-
- // The node will be reused, so create a new node for this one use
- } else {
- SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
- N->getValueType(0), N->getOperand(0),
- AndOp);
- DCI.AddToWorklist(Replace.getNode());
-
- return Replace;
- }
-
- // We can't remove the AND, but we can use NILL here (normally we would
- // use NILF). Only keep the last 16 bits of the mask. The actual
- // transformation will be handled by .td definitions.
- } else if (AmtVal >> 16 != 0) {
- SDValue AndOp = N1->getOperand(0);
-
- auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
- SDLoc(AndMaskOp),
- AndMaskOp.getValueType());
-
- auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
- AndOp, NewMask);
-
- SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
- N->getValueType(0), N->getOperand(0),
- NewAnd);
- DCI.AddToWorklist(Replace.getNode());
-
- return Replace;
- }
- }
- }
-
- return SDValue();
-}
-
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
// We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
// set by the CCReg instruction using the CCValid / CCMask masks,
@@ -5752,10 +5678,6 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::ROTL: return combineSHIFTROT(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 0ca93a38a016..267e31a85216 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -602,7 +602,6 @@ private:
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 9d7312269957..bb5b7aae883b 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1352,8 +1352,8 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
//===----------------------------------------------------------------------===//
// Logical shift left.
-defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
-def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
+defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shiftop<shl>, GR32>;
+def SLLG : BinaryRSY<"sllg", 0xEB0D, shiftop<shl>, GR64>;
def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>;
// Arithmetic shift left.
@@ -1364,20 +1364,20 @@ let Defs = [CC] in {
}
// Logical shift right.
-defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
-def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
+defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, shiftop<srl>, GR32>;
+def SRLG : BinaryRSY<"srlg", 0xEB0C, shiftop<srl>, GR64>;
def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>;
// Arithmetic shift right.
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
- defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
- def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>;
+ defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, shiftop<sra>, GR32>;
+ def SRAG : BinaryRSY<"srag", 0xEB0A, shiftop<sra>, GR64>;
def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>;
}
// Rotate left.
-def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>;
-def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
+def RLL : BinaryRSY<"rll", 0xEB1D, shiftop<rotl>, GR32>;
+def RLLG : BinaryRSY<"rllg", 0xEB1C, shiftop<rotl>, GR64>;
// Rotate second operand left and inserted selected bits into first operand.
// These can act like 32-bit operands provided that the constant start and
@@ -2162,29 +2162,29 @@ def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
// Complexity is added so that we match this before we match NILF on the AND
// operation alone.
let AddedComplexity = 4 in {
- def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
}
// Peepholes for turning scalar operations into block operations.
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
index fcbf4c4b5fe4..98e761ef87fe 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -129,7 +129,7 @@ SystemZPostRASchedStrategy::
SystemZPostRASchedStrategy(const MachineSchedContext *C)
: MLI(C->MLI),
TII(static_cast<const SystemZInstrInfo *>
- (C->MF->getSubtarget().getInstrInfo())),
+ (C->MF->getSubtarget().getInstrInfo())),
MBB(nullptr), HazardRec(nullptr) {
const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
SchedModel.init(ST);
@@ -169,8 +169,7 @@ SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) {
return *Available.begin();
}
- // All nodes that are possible to schedule are stored by in the
- // Available set.
+ // All nodes that are possible to schedule are stored in the Available set.
LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););
Candidate Best;
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.h b/lib/Target/SystemZ/SystemZMachineScheduler.h
index cb0304825966..ab820e5d3e63 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.h
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -26,7 +26,7 @@
using namespace llvm;
namespace llvm {
-
+
/// A MachineSchedStrategy implementation for SystemZ post RA scheduling.
class SystemZPostRASchedStrategy : public MachineSchedStrategy {
@@ -37,7 +37,7 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy {
// non-scheduled instructions, so it would not always be possible to call
// DAG->getSchedClass(SU).
TargetSchedModel SchedModel;
-
+
/// A candidate during instruction evaluation.
struct Candidate {
SUnit *SU = nullptr;
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index da682cb4e5ab..7bf32bf19a4a 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -357,6 +357,7 @@ def imm32zx16 : Immediate<i32, [{
}], UIMM16, "U16Imm">;
def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
+def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
// Full 32-bit immediates. we need both signed and unsigned versions
// because the assembler is picky. E.g. AFI requires signed operands
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 3cfe23aec417..5103867e2d9a 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -697,6 +697,16 @@ class storei<SDPatternOperator operator, SDPatternOperator store = store>
: PatFrag<(ops node:$addr),
(store (operator), node:$addr)>;
+// Create a shift operator that optionally ignores an AND of the
+// shift count with an immediate if the bottom 6 bits are all set.
+def imm32bottom6set : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() & 0x3f) == 0x3f;
+}]>;
+class shiftop<SDPatternOperator operator>
+ : PatFrags<(ops node:$val, node:$count),
+ [(operator node:$val, node:$count),
+ (operator node:$val, (and node:$count, imm32bottom6set))]>;
+
// Vector representation of all-zeros and all-ones.
def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index e2a3efda5c5e..c5cdc22f2099 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -329,7 +329,7 @@ bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
}
int SystemZTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
+ unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo,
@@ -469,7 +469,7 @@ int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
assert (Tp->isVectorTy());
assert (ST->hasVector() && "getShuffleCost() called.");
unsigned NumVectors = getNumberOfParts(Tp);
-
+
// TODO: Since fp32 is expanded, the shuffle cost should always be 0.
// FP128 values are always in scalar registers, so there is no work
@@ -647,7 +647,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return Cost;
}
}
-
+
if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) {
// TODO: Fix base implementation which could simplify things a bit here
@@ -704,7 +704,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)
return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/);
-
+
if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
Src->isIntegerTy(1)) {
// This should be extension of a compare i1 result, which is done with
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 42d92622d6c8..f23ea72eb513 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the common infrastructure (including C bindings) for
+// This file implements the common infrastructure (including C bindings) for
// libLLVMTarget.a, which implements target information.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 907ecf46e8ff..6bcf60fafc3e 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -92,10 +92,10 @@ static bool IsNullTerminatedString(const Constant *C) {
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(C)) {
unsigned NumElts = CDS->getNumElements();
assert(NumElts != 0 && "Can't have an empty CDS");
-
+
if (CDS->getElementAsInteger(NumElts-1) != 0)
return false; // Not null terminated.
-
+
// Verify that the null doesn't occur anywhere else in the string.
for (unsigned i = 0; i != NumElts-1; ++i)
if (CDS->getElementAsInteger(i) == 0)
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index b84c2d31a63e..fafbed0bd935 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2603,11 +2603,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
bool HadVerifyError = false;
// Append default arguments to "ins[bwld]"
- if (Name.startswith("ins") &&
+ if (Name.startswith("ins") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
Name == "ins")) {
-
+
AddDefaultSrcDestOperands(TmpOperands,
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
DefaultMemDIOperand(NameLoc));
@@ -2615,7 +2615,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
// Append default arguments to "outs[bwld]"
- if (Name.startswith("outs") &&
+ if (Name.startswith("outs") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "outsb" || Name == "outsw" || Name == "outsl" ||
Name == "outsd" || Name == "outs")) {
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 82e82fe1efd9..0e861d5ddbc9 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -92,7 +92,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
// the hex value of the immediate operand when it isn't in the range
// [-256,255].
if (CommentStream && !HasCustomInstComment && (Imm > 255 || Imm < -256)) {
- // Don't print unnecessary hex sign bits.
+ // Don't print unnecessary hex sign bits.
if (Imm == (int16_t)(Imm))
*CommentStream << format("imm = 0x%" PRIX16 "\n", (uint16_t)Imm);
else if (Imm == (int32_t)(Imm))
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index d030f26d98de..f1d15e66918b 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -307,10 +307,84 @@ class X86MCInstrAnalysis : public MCInstrAnalysis {
public:
X86MCInstrAnalysis(const MCInstrInfo *MCII) : MCInstrAnalysis(MCII) {}
+ bool isDependencyBreaking(const MCSubtargetInfo &STI,
+ const MCInst &Inst) const override;
bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst,
APInt &Mask) const override;
};
+bool X86MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI,
+ const MCInst &Inst) const {
+ if (STI.getCPU() == "btver2") {
+ // Reference: Agner Fog's microarchitecture.pdf - Section 20 "AMD Bobcat and
+ // Jaguar pipeline", subsection 8 "Dependency-breaking instructions".
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case X86::SUB32rr:
+ case X86::SUB64rr:
+ case X86::SBB32rr:
+ case X86::SBB64rr:
+ case X86::XOR32rr:
+ case X86::XOR64rr:
+ case X86::XORPSrr:
+ case X86::XORPDrr:
+ case X86::VXORPSrr:
+ case X86::VXORPDrr:
+ case X86::ANDNPSrr:
+ case X86::VANDNPSrr:
+ case X86::ANDNPDrr:
+ case X86::VANDNPDrr:
+ case X86::PXORrr:
+ case X86::VPXORrr:
+ case X86::PANDNrr:
+ case X86::VPANDNrr:
+ case X86::PSUBBrr:
+ case X86::PSUBWrr:
+ case X86::PSUBDrr:
+ case X86::PSUBQrr:
+ case X86::VPSUBBrr:
+ case X86::VPSUBWrr:
+ case X86::VPSUBDrr:
+ case X86::VPSUBQrr:
+ case X86::PCMPEQBrr:
+ case X86::PCMPEQWrr:
+ case X86::PCMPEQDrr:
+ case X86::PCMPEQQrr:
+ case X86::VPCMPEQBrr:
+ case X86::VPCMPEQWrr:
+ case X86::VPCMPEQDrr:
+ case X86::VPCMPEQQrr:
+ case X86::PCMPGTBrr:
+ case X86::PCMPGTWrr:
+ case X86::PCMPGTDrr:
+ case X86::PCMPGTQrr:
+ case X86::VPCMPGTBrr:
+ case X86::VPCMPGTWrr:
+ case X86::VPCMPGTDrr:
+ case X86::VPCMPGTQrr:
+ case X86::MMX_PXORirr:
+ case X86::MMX_PANDNirr:
+ case X86::MMX_PSUBBirr:
+ case X86::MMX_PSUBDirr:
+ case X86::MMX_PSUBQirr:
+ case X86::MMX_PSUBWirr:
+ case X86::MMX_PCMPGTBirr:
+ case X86::MMX_PCMPGTDirr:
+ case X86::MMX_PCMPGTWirr:
+ case X86::MMX_PCMPEQBirr:
+ case X86::MMX_PCMPEQDirr:
+ case X86::MMX_PCMPEQWirr:
+ return Inst.getOperand(1).getReg() == Inst.getOperand(2).getReg();
+ case X86::CMP32rr:
+ case X86::CMP64rr:
+ return Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg();
+ }
+ }
+
+ return false;
+}
+
bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
const MCInst &Inst,
APInt &Mask) const {
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
index c49a6838fa44..d0fcbd313312 100644
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -66,7 +66,7 @@ inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
// not to split i64 and double between a register and stack
static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]);
-
+
SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
// If this is the first part of an double/i64/i128, or if we're already
diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp
index f73455cc31b8..1c5f110d8c60 100644
--- a/lib/Target/X86/X86CmovConversion.cpp
+++ b/lib/Target/X86/X86CmovConversion.cpp
@@ -622,7 +622,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
// If the CMOV group is not packed, e.g., there are debug instructions between
// first CMOV and last CMOV, then pack the group and make the CMOV instruction
- // consecutive by moving the debug instructions to after the last CMOV.
+ // consecutive by moving the debug instructions to after the last CMOV.
packCmovGroup(Group.front(), Group.back());
// To convert a CMOVcc instruction, we actually have to insert the diamond
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index de8b40f28a86..35a15577fe09 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1195,7 +1195,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -2649,7 +2649,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::VMOVPDI2DIrr), ResultReg)
.addReg(InputReg, RegState::Kill);
-
+
// The result value is in the lower 16-bits of ResultReg.
unsigned RegIdx = X86::sub_16bit;
ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
@@ -3687,7 +3687,7 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
unsigned Reg = getRegForValue(I->getOperand(0));
if (Reg == 0)
return false;
-
+
// No instruction is needed for conversion. Reuse the register used by
// the fist operand.
updateValueMap(I, Reg);
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index d85389a0a7f1..f3f7f6a37360 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -578,7 +578,7 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
continue;
if (OptLEA) {
- if (MF.getSubtarget<X86Subtarget>().isSLM())
+ if (MF.getSubtarget<X86Subtarget>().slowLEA())
processInstructionForSLM(I, MFI);
else {
diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp
index 1ba08d39c595..c17c51a7aeac 100644
--- a/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -730,9 +730,12 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
for (MachineInstr &MI :
llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode());
- if (Cond != X86::COND_INVALID && MI.getOperand(0).isReg() &&
- TRI->isVirtualRegister(MI.getOperand(0).getReg()))
+ if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() &&
+ TRI->isVirtualRegister(MI.getOperand(0).getReg())) {
+ assert(MI.getOperand(0).isDef() &&
+ "A non-storing SETcc should always define a register!");
CondRegs[Cond] = MI.getOperand(0).getReg();
+ }
// Stop scanning when we see the first definition of the EFLAGS as prior to
// this we would potentially capture the wrong flag state.
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index ae748901164a..f330acff61a1 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -347,12 +347,12 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
LiveBundle &Bundle =
LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
-
+
// In regcall convention, some FP registers may not be passed through
// the stack, so they will need to be assigned to the stack first
if ((Entry->getParent()->getFunction().getCallingConv() ==
CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
- // In the register calling convention, up to one FP argument could be
+ // In the register calling convention, up to one FP argument could be
// saved in the first FP register.
// If bundle.mask is non-zero and Bundle.FixCount is zero, it means
// that the FP registers contain arguments.
@@ -991,7 +991,7 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
// Reset the FP Stack - It is required because of possible leftovers from
- // passed arguments. The caller should assume that the FP stack is
+ // passed arguments. The caller should assume that the FP stack is
// returned empty (unless the callee returns values on FP stack).
while (StackTop > 0)
popReg();
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index a257ec41f75b..e207c343fac8 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -68,7 +68,7 @@ X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
// needsFrameIndexResolution - Do we need to perform FI resolution for
// this function. Normally, this is required only when the function
// has any stack objects. However, FI resolution actually has another job,
-// not apparent from the title - it resolves callframesetup/destroy
+// not apparent from the title - it resolves callframesetup/destroy
// that were not simplified earlier.
// So, this is required for x86 functions that have push sequences even
// when there are no stack objects.
@@ -607,8 +607,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
int64_t RCXShadowSlot = 0;
int64_t RDXShadowSlot = 0;
- // If inlining in the prolog, save RCX and RDX.
- // Future optimization: don't save or restore if not live in.
+ // If inlining in the prolog, save RCX and RDX.
if (InProlog) {
// Compute the offsets. We need to account for things already
// pushed onto the stack at this point: return address, frame
@@ -616,15 +615,30 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
const bool HasFP = hasFP(MF);
- RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
- RDXShadowSlot = RCXShadowSlot + 8;
- // Emit the saves.
- addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
- RCXShadowSlot)
- .addReg(X86::RCX);
- addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
- RDXShadowSlot)
- .addReg(X86::RDX);
+
+ // Check if we need to spill RCX and/or RDX.
+ // Here we assume that no earlier prologue instruction changes RCX and/or
+ // RDX, so checking the block live-ins is enough.
+ const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
+ const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
+ int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
+ // Assign the initial slot to both registers, then change RDX's slot if both
+ // need to be spilled.
+ if (IsRCXLiveIn)
+ RCXShadowSlot = InitSlot;
+ if (IsRDXLiveIn)
+ RDXShadowSlot = InitSlot;
+ if (IsRDXLiveIn && IsRCXLiveIn)
+ RDXShadowSlot += 8;
+ // Emit the saves if needed.
+ if (IsRCXLiveIn)
+ addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+ RCXShadowSlot)
+ .addReg(X86::RCX);
+ if (IsRDXLiveIn)
+ addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+ RDXShadowSlot)
+ .addReg(X86::RDX);
} else {
// Not in the prolog. Copy RAX to a virtual reg.
BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
@@ -661,6 +675,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB);
// Add code to roundMBB to round the final stack pointer to a page boundary.
+ RoundMBB->addLiveIn(FinalReg);
BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
.addReg(FinalReg)
.addImm(PageMask);
@@ -677,6 +692,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
.addMBB(LoopMBB);
}
+ LoopMBB->addLiveIn(JoinReg);
addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
false, -PageSize);
@@ -688,6 +704,8 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
.addImm(0)
.addReg(0)
.addImm(0);
+
+ LoopMBB->addLiveIn(RoundedReg);
BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
.addReg(RoundedReg)
.addReg(ProbeReg);
@@ -697,16 +715,19 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
// If in prolog, restore RDX and RCX.
if (InProlog) {
- addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
- X86::RCX),
- X86::RSP, false, RCXShadowSlot);
- addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
- X86::RDX),
- X86::RSP, false, RDXShadowSlot);
+ if (RCXShadowSlot) // It means we spilled RCX in the prologue.
+ addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
+ TII.get(X86::MOV64rm), X86::RCX),
+ X86::RSP, false, RCXShadowSlot);
+ if (RDXShadowSlot) // It means we spilled RDX in the prologue.
+ addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
+ TII.get(X86::MOV64rm), X86::RDX),
+ X86::RSP, false, RDXShadowSlot);
}
// Now that the probing is done, add code to continueMBB to update
// the stack pointer for real.
+ ContinueMBB->addLiveIn(SizeReg);
BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
.addReg(X86::RSP)
.addReg(SizeReg);
@@ -734,8 +755,6 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
CMBBI->setFlag(MachineInstr::FrameSetup);
}
}
-
- // Possible TODO: physreg liveness for InProlog case.
}
void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
@@ -2694,7 +2713,7 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
Regs[FoundRegs++] = Regs[0];
for (int i = 0; i < NumPops; ++i)
- BuildMI(MBB, MBBI, DL,
+ BuildMI(MBB, MBBI, DL,
TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
return true;
@@ -2984,7 +3003,7 @@ struct X86FrameSortingComparator {
// in general. Something to keep in mind, though.
if (DensityAScaled == DensityBScaled)
return A.ObjectAlignment < B.ObjectAlignment;
-
+
return DensityAScaled < DensityBScaled;
}
};
@@ -3020,7 +3039,7 @@ void X86FrameLowering::orderFrameObjects(
if (ObjectSize == 0)
// Variable size. Just use 4.
SortingObjects[Obj].ObjectSize = 4;
- else
+ else
SortingObjects[Obj].ObjectSize = ObjectSize;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7dcdb7967058..2820004cfc6d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1800,17 +1800,19 @@ X86TargetLowering::getPreferredVectorAction(EVT VT) const {
}
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return MVT::v32i8;
- return TargetLowering::getRegisterTypeForCallingConv(Context, VT);
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return 1;
- return TargetLowering::getNumRegistersForCallingConv(Context, VT);
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
@@ -23366,7 +23368,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
return DAG.getBuildVector(VT, dl, Elts);
}
- // If the target doesn't support variable shifts, use either FP conversion
+ // If the target doesn't support variable shifts, use either FP conversion
// or integer multiplication to avoid shifting each element individually.
if (VT == MVT::v4i32) {
Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT));
@@ -23509,6 +23511,24 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
+ // Constant ISD::SRL can be performed efficiently on vXi8/vXi16 vectors as we
+ // can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt).
+ // TODO: Improve support for the shift by zero special case.
+ if (Op.getOpcode() == ISD::SRL && ConstantAmt &&
+ ((Subtarget.hasSSE41() && VT == MVT::v8i16) ||
+ DAG.isKnownNeverZero(Amt)) &&
+ (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ ((VT == MVT::v32i8 || VT == MVT::v16i16) && Subtarget.hasInt256()))) {
+ SDValue EltBits = DAG.getConstant(VT.getScalarSizeInBits(), dl, VT);
+ SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
+ if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ);
+ SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale);
+ return DAG.getSelect(dl, VT, ZAmt, R, Res);
+ }
+ }
+
// v4i32 Non Uniform Shifts.
// If the shift amount is constant we can shift each lane using the SSE2
// immediate shifts, else we need to zero-extend each lane to the lower i64
@@ -33425,33 +33445,32 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
}
}
- // Handle (CMOV C-1, (ADD (CTTZ X), C), (X != 0)) ->
- // (ADD (CMOV (CTTZ X), -1, (X != 0)), C) or
- // (CMOV (ADD (CTTZ X), C), C-1, (X == 0)) ->
- // (ADD (CMOV C-1, (CTTZ X), (X == 0)), C)
- if (CC == X86::COND_NE || CC == X86::COND_E) {
- auto *Cnst = CC == X86::COND_E ? dyn_cast<ConstantSDNode>(TrueOp)
- : dyn_cast<ConstantSDNode>(FalseOp);
- SDValue Add = CC == X86::COND_E ? FalseOp : TrueOp;
-
- if (Cnst && Add.getOpcode() == ISD::ADD && Add.hasOneUse()) {
- auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1));
- SDValue AddOp2 = Add.getOperand(0);
- if (AddOp1 && (AddOp2.getOpcode() == ISD::CTTZ_ZERO_UNDEF ||
- AddOp2.getOpcode() == ISD::CTTZ)) {
- APInt Diff = Cnst->getAPIntValue() - AddOp1->getAPIntValue();
- if (CC == X86::COND_E) {
- Add = DAG.getNode(X86ISD::CMOV, DL, Add.getValueType(), AddOp2,
- DAG.getConstant(Diff, DL, Add.getValueType()),
- DAG.getConstant(CC, DL, MVT::i8), Cond);
- } else {
- Add = DAG.getNode(X86ISD::CMOV, DL, Add.getValueType(),
- DAG.getConstant(Diff, DL, Add.getValueType()),
- AddOp2, DAG.getConstant(CC, DL, MVT::i8), Cond);
- }
- return DAG.getNode(X86ISD::ADD, DL, Add.getValueType(), Add,
- SDValue(AddOp1, 0));
- }
+ // Fold (CMOV C1, (ADD (CTTZ X), C2), (X != 0)) ->
+ // (ADD (CMOV C1-C2, (CTTZ X), (X != 0)), C2)
+ // Or (CMOV (ADD (CTTZ X), C2), C1, (X == 0)) ->
+ // (ADD (CMOV (CTTZ X), C1-C2, (X == 0)), C2)
+ if ((CC == X86::COND_NE || CC == X86::COND_E) &&
+ Cond.getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1))) {
+ SDValue Add = TrueOp;
+ SDValue Const = FalseOp;
+ // Canonicalize the condition code for easier matching and output.
+ if (CC == X86::COND_E) {
+ std::swap(Add, Const);
+ CC = X86::COND_NE;
+ }
+
+ // Ok, now make sure that Add is (add (cttz X), C2) and Const is a constant.
+ if (isa<ConstantSDNode>(Const) && Add.getOpcode() == ISD::ADD &&
+ Add.hasOneUse() && isa<ConstantSDNode>(Add.getOperand(1)) &&
+ (Add.getOperand(0).getOpcode() == ISD::CTTZ_ZERO_UNDEF ||
+ Add.getOperand(0).getOpcode() == ISD::CTTZ) &&
+ Add.getOperand(0).getOperand(0) == Cond.getOperand(0)) {
+ EVT VT = N->getValueType(0);
+ // This should constant fold.
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1));
+ SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0),
+ DAG.getConstant(CC, DL, MVT::i8), Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1));
}
}
@@ -33873,31 +33892,42 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C)
return SDValue();
- uint64_t MulAmt = C->getZExtValue();
- if (isPowerOf2_64(MulAmt))
+ if (isPowerOf2_64(C->getZExtValue()))
return SDValue();
+ int64_t SignMulAmt = C->getSExtValue();
+ assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
+ uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
+
SDLoc DL(N);
- if (MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
- return DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
- N->getOperand(1));
+ if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) {
+ SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(AbsMulAmt, DL, VT));
+ if (SignMulAmt < 0)
+ NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ NewMul);
+
+ return NewMul;
+ }
uint64_t MulAmt1 = 0;
uint64_t MulAmt2 = 0;
- if ((MulAmt % 9) == 0) {
+ if ((AbsMulAmt % 9) == 0) {
MulAmt1 = 9;
- MulAmt2 = MulAmt / 9;
- } else if ((MulAmt % 5) == 0) {
+ MulAmt2 = AbsMulAmt / 9;
+ } else if ((AbsMulAmt % 5) == 0) {
MulAmt1 = 5;
- MulAmt2 = MulAmt / 5;
- } else if ((MulAmt % 3) == 0) {
+ MulAmt2 = AbsMulAmt / 5;
+ } else if ((AbsMulAmt % 3) == 0) {
MulAmt1 = 3;
- MulAmt2 = MulAmt / 3;
+ MulAmt2 = AbsMulAmt / 3;
}
SDValue NewMul;
+ // For negative multiply amounts, only allow MulAmt2 to be a power of 2.
if (MulAmt2 &&
- (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
+ (isPowerOf2_64(MulAmt2) ||
+ (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) {
if (isPowerOf2_64(MulAmt2) &&
!(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
@@ -33919,17 +33949,19 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, DL, VT));
+
+ // Negate the result.
+ if (SignMulAmt < 0)
+ NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ NewMul);
} else if (!Subtarget.slowLEA())
- NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);
+ NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
if (!NewMul) {
- assert(MulAmt != 0 &&
- MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
+ assert(C->getZExtValue() != 0 &&
+ C->getZExtValue() != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
"Both cases that could cause potential overflows should have "
"already been handled.");
- int64_t SignMulAmt = C->getSExtValue();
- assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
- uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
if (isPowerOf2_64(AbsMulAmt - 1)) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
NewMul = DAG.getNode(
@@ -36738,6 +36770,145 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
+// Attempt to match PMADDUBSW, which multiplies corresponding unsigned bytes
+// from one vector with signed bytes from another vector, adds together
+// adjacent pairs of 16-bit products, and saturates the result before
+// truncating to 16-bits.
+//
+// Which looks something like this:
+// (i16 (ssat (add (mul (zext (even elts (i8 A))), (sext (even elts (i8 B)))),
+// (mul (zext (odd elts (i8 A)), (sext (odd elts (i8 B))))))))
+static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ const SDLoc &DL) {
+ if (!VT.isVector() || !Subtarget.hasSSSE3())
+ return SDValue();
+
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT ScalarVT = VT.getVectorElementType();
+ if (ScalarVT != MVT::i16 || NumElems < 8 || !isPowerOf2_32(NumElems))
+ return SDValue();
+
+ SDValue SSatVal = detectSSatPattern(In, VT);
+ if (!SSatVal || SSatVal.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // Ok this is a signed saturation of an ADD. See if this ADD is adding pairs
+ // of multiplies from even/odd elements.
+ SDValue N0 = SSatVal.getOperand(0);
+ SDValue N1 = SSatVal.getOperand(1);
+
+ if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+
+ // TODO: Handle constant vectors and use knownbits/computenumsignbits?
+ // Canonicalize zero_extend to LHS.
+ if (N01.getOpcode() == ISD::ZERO_EXTEND)
+ std::swap(N00, N01);
+ if (N11.getOpcode() == ISD::ZERO_EXTEND)
+ std::swap(N10, N11);
+
+ // Ensure we have a zero_extend and a sign_extend.
+ if (N00.getOpcode() != ISD::ZERO_EXTEND ||
+ N01.getOpcode() != ISD::SIGN_EXTEND ||
+ N10.getOpcode() != ISD::ZERO_EXTEND ||
+ N11.getOpcode() != ISD::SIGN_EXTEND)
+ return SDValue();
+
+ // Peek through the extends.
+ N00 = N00.getOperand(0);
+ N01 = N01.getOperand(0);
+ N10 = N10.getOperand(0);
+ N11 = N11.getOperand(0);
+
+ // Ensure the extend is from vXi8.
+ if (N00.getValueType().getVectorElementType() != MVT::i8 ||
+ N01.getValueType().getVectorElementType() != MVT::i8 ||
+ N10.getValueType().getVectorElementType() != MVT::i8 ||
+ N11.getValueType().getVectorElementType() != MVT::i8)
+ return SDValue();
+
+ // All inputs should be build_vectors.
+ if (N00.getOpcode() != ISD::BUILD_VECTOR ||
+ N01.getOpcode() != ISD::BUILD_VECTOR ||
+ N10.getOpcode() != ISD::BUILD_VECTOR ||
+ N11.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // N00/N10 are zero extended. N01/N11 are sign extended.
+
+ // For each element, we need to ensure we have an odd element from one vector
+ // multiplied by the odd element of another vector and the even element from
+ // one of the same vectors being multiplied by the even element from the
+ // other vector. So we need to make sure for each element i, this operator
+ // is being performed:
+ // A[2 * i] * B[2 * i] + A[2 * i + 1] * B[2 * i + 1]
+ SDValue ZExtIn, SExtIn;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue N00Elt = N00.getOperand(i);
+ SDValue N01Elt = N01.getOperand(i);
+ SDValue N10Elt = N10.getOperand(i);
+ SDValue N11Elt = N11.getOperand(i);
+ // TODO: Be more tolerant to undefs.
+ if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+ auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1));
+ auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1));
+ auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1));
+ auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1));
+ if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt)
+ return SDValue();
+ unsigned IdxN00 = ConstN00Elt->getZExtValue();
+ unsigned IdxN01 = ConstN01Elt->getZExtValue();
+ unsigned IdxN10 = ConstN10Elt->getZExtValue();
+ unsigned IdxN11 = ConstN11Elt->getZExtValue();
+ // Add is commutative so indices can be reordered.
+ if (IdxN00 > IdxN10) {
+ std::swap(IdxN00, IdxN10);
+ std::swap(IdxN01, IdxN11);
+ }
+ // N0 indices be the even element. N1 indices must be the next odd element.
+ if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 ||
+ IdxN01 != 2 * i || IdxN11 != 2 * i + 1)
+ return SDValue();
+ SDValue N00In = N00Elt.getOperand(0);
+ SDValue N01In = N01Elt.getOperand(0);
+ SDValue N10In = N10Elt.getOperand(0);
+ SDValue N11In = N11Elt.getOperand(0);
+ // First time we find an input capture it.
+ if (!ZExtIn) {
+ ZExtIn = N00In;
+ SExtIn = N01In;
+ }
+ if (ZExtIn != N00In || SExtIn != N01In ||
+ ZExtIn != N10In || SExtIn != N11In)
+ return SDValue();
+ }
+
+ auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ // Shrink by adding truncate nodes and let DAGCombine fold with the
+ // sources.
+ EVT InVT = Ops[0].getValueType();
+ assert(InVT.getScalarType() == MVT::i8 &&
+ "Unexpected scalar element type");
+ assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
+ InVT.getVectorNumElements() / 2);
+ return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]);
+ };
+ return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn },
+ PMADDBuilder);
+}
+
static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
@@ -36752,6 +36923,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
+ // Try to detect PMADD
+ if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL))
+ return PMAdd;
+
// Try to combine truncation with signed/unsigned saturation.
if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget))
return Val;
@@ -36793,38 +36968,14 @@ static SDValue isFNEG(SDNode *N) {
if (!Op1.getValueType().isFloatingPoint())
return SDValue();
- SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
-
- unsigned EltBits = Op1.getScalarValueSizeInBits();
- auto isSignMask = [&](const ConstantFP *C) {
- return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits);
- };
-
- // There is more than one way to represent the same constant on
- // the different X86 targets. The type of the node may also depend on size.
- // - load scalar value and broadcast
- // - BUILD_VECTOR node
- // - load from a constant pool.
- // We check all variants here.
- if (Op1.getOpcode() == X86ISD::VBROADCAST) {
- if (auto *C = getTargetConstantFromNode(Op1.getOperand(0)))
- if (isSignMask(cast<ConstantFP>(C)))
- return Op0;
-
- } else if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1)) {
- if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode())
- if (isSignMask(CN->getConstantFPValue()))
- return Op0;
+ // Extract constant bits and see if they are all sign bit masks.
+ APInt UndefElts;
+ SmallVector<APInt, 16> EltBits;
+ if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(),
+ UndefElts, EltBits, false, false))
+ if (llvm::all_of(EltBits, [](APInt &I) { return I.isSignMask(); }))
+ return peekThroughBitcasts(Op.getOperand(0));
- } else if (auto *C = getTargetConstantFromNode(Op1)) {
- if (C->getType()->isVectorTy()) {
- if (auto *SplatV = C->getSplatValue())
- if (isSignMask(cast<ConstantFP>(SplatV)))
- return Op0;
- } else if (auto *FPConst = dyn_cast<ConstantFP>(C))
- if (isSignMask(FPConst))
- return Op0;
- }
return SDValue();
}
@@ -37777,8 +37928,7 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
// Look through extract_vector_elts. If it comes from an FNEG, create a
// new extract from the FNEG input.
if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(V.getOperand(1)) &&
- cast<ConstantSDNode>(V.getOperand(1))->getZExtValue() == 0) {
+ isNullConstant(V.getOperand(1))) {
if (SDValue NegVal = isFNEG(V.getOperand(0).getNode())) {
NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal);
V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(),
@@ -38896,7 +39046,7 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
std::swap(IdxN00, IdxN10);
std::swap(IdxN01, IdxN11);
}
- // N0 indices be the even elemtn. N1 indices must be the next odd element.
+ // N0 indices be the even element. N1 indices must be the next odd element.
if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 ||
IdxN01 != 2 * i || IdxN11 != 2 * i + 1)
return SDValue();
@@ -39322,8 +39472,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
- auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
- if (Idx2 && Idx2->getZExtValue() == 0) {
+ if (isNullConstant(Vec.getOperand(2))) {
SDValue SubVec2 = Vec.getOperand(1);
// If needed, look through bitcasts to get to the load.
if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 32215b170a8c..ff5006d208e5 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1097,10 +1097,11 @@ namespace llvm {
/// Customize the preferred legalization strategy for certain types.
LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
- MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
EVT VT) const override;
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
@@ -1125,8 +1126,8 @@ namespace llvm {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
- SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
- SDValue Addr, SelectionDAG &DAG)
+ SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
+ SDValue Addr, SelectionDAG &DAG)
const override;
protected:
diff --git a/lib/Target/X86/X86InstrFoldTables.cpp b/lib/Target/X86/X86InstrFoldTables.cpp
index 5d8400595bfa..7d31cfab4137 100644
--- a/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/lib/Target/X86/X86InstrFoldTables.cpp
@@ -1576,7 +1576,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::SUBSDrr_Int, X86::SUBSDrm_Int, TB_NO_REVERSE },
{ X86::SUBSSrr, X86::SUBSSrm, 0 },
{ X86::SUBSSrr_Int, X86::SUBSSrm_Int, TB_NO_REVERSE },
- // FIXME: TEST*rr -> swapped operand of TEST *mr.
+ // FIXME: TEST*rr -> swapped operand of TEST *mr.
{ X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
{ X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
{ X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 1b61accfb42b..96db8b4e7585 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -7725,7 +7725,7 @@ X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
if (C.CallConstructionID == MachineOutlinerTailCall) {
// Yes, just insert a JMP.
It = MBB.insert(It,
- BuildMI(MF, DebugLoc(), get(X86::JMP_1))
+ BuildMI(MF, DebugLoc(), get(X86::TAILJMPd64))
.addGlobalAddress(M.getNamedValue(MF.getName())));
} else {
// No, insert a call.
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 7509b312c100..bc7afd32d494 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1750,7 +1750,7 @@ def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags
// Bit tests instructions: BT, BTS, BTR, BTC.
let Defs = [EFLAGS] in {
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTest] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>,
@@ -1783,7 +1783,7 @@ let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
[]>, TB, NotMemoryFoldable;
}
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTest] in {
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
@@ -1818,7 +1818,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
} // SchedRW
let hasSideEffects = 0 in {
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1842,7 +1842,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
@@ -1861,7 +1861,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1885,7 +1885,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB;
@@ -1908,7 +1908,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1932,7 +1932,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index ee3b01159174..023137634df1 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -650,9 +650,9 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
// Double shift instructions (generalizations of rotate)
//===----------------------------------------------------------------------===//
-let Constraints = "$src1 = $dst", SchedRW = [WriteShiftDouble] in {
+let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteSHDrrcl] in {
def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
@@ -683,9 +683,9 @@ def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
TB;
-}
+} // SchedRW
-let isCommutable = 1 in { // These instructions commute to each other.
+let isCommutable = 1, SchedRW = [WriteSHDrri] in { // These instructions commute to each other.
def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
(outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, u8imm:$src3),
@@ -728,11 +728,10 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
(i8 imm:$src3)))]>,
TB;
-}
-} // Constraints = "$src = $dst", SchedRW
+} // SchedRW
+} // Constraints = "$src = $dst"
-let SchedRW = [WriteShiftDoubleLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteSHDmrcl] in {
def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
@@ -759,8 +758,9 @@ def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
addr:$dst)]>, TB;
-}
+} // SchedRW
+let SchedRW = [WriteSHDmri] in {
def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td
index c7713fea70fa..6334d9e89a60 100755
--- a/lib/Target/X86/X86SchedBroadwell.td
+++ b/lib/Target/X86/X86SchedBroadwell.td
@@ -119,8 +119,8 @@ defm : BWWriteResPair<WriteIDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteIDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteIDiv64, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteBSWAP32,[BWPort15], 1>; //
-defm : BWWriteResPair<WriteBSWAP64,[BWPort06, BWPort15], 2, [1, 1], 2>; //
+defm : X86WriteRes<WriteBSWAP32, [BWPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [BWPort06, BWPort15], 2, [1, 1], 2>;
defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
@@ -137,6 +137,7 @@ def : WriteRes<WriteSETCCStore, [BWPort06,BWPort4,BWPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [BWPort06]>;
+def : WriteRes<WriteBitTest,[BWPort06]>; // Bit Test instrs
// Bit counts.
defm : BWWriteResPair<WriteBSF, [BWPort1], 3>;
@@ -148,8 +149,11 @@ defm : BWWriteResPair<WritePOPCNT, [BWPort1], 3>;
// Integer shifts and rotates.
defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
-// Double shift instructions.
-defm : BWWriteResPair<WriteShiftDouble, [BWPort06], 1>;
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [BWPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[BWPort1,BWPort06,BWPort0156], 6, [1, 1, 2], 4>;
+defm : X86WriteRes<WriteSHDmri, [BWPort1,BWPort23,BWPort237,BWPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156], 11, [1, 1, 1, 1, 2], 6>;
// BMI1 BEXTR, BMI2 BZHI
defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>;
@@ -600,14 +604,6 @@ def BWWriteResGroup6 : SchedWriteRes<[BWPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[BWWriteResGroup6], (instrs CDQ, CQO)>;
-def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> {
let Latency = 1;
@@ -746,8 +742,6 @@ def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> {
def: InstRW<[BWWriteResGroup27], (instregex "MMX_CVTPI2PSirr",
"PDEP(32|64)rr",
"PEXT(32|64)rr",
- "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8",
"(V?)CVTDQ2PS(Y?)rr")>;
def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> {
@@ -1055,14 +1049,6 @@ def BWWriteResGroup66 : SchedWriteRes<[BWPort23,BWPort0156]> {
def: InstRW<[BWWriteResGroup66], (instrs POP16r, POP32r, POP64r)>;
def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)rmr")>;
-def BWWriteResGroup67 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[BWWriteResGroup67], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def BWWriteResGroup68 : SchedWriteRes<[BWPort1,BWPort6,BWPort06,BWPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
@@ -1307,14 +1293,6 @@ def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> {
def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTB(Y?)rm",
"VPBROADCASTW(Y?)rm")>;
-def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[BWWriteResGroup111], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def BWWriteResGroup112 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
@@ -1380,14 +1358,6 @@ def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
}
def: InstRW<[BWWriteResGroup128], (instregex "VCVTDQ2PDYrm")>;
-def BWWriteResGroup130 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156]> {
- let Latency = 11;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,1,2];
-}
-def: InstRW<[BWWriteResGroup130], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 189dd4183839..876c3e4162cf 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -118,17 +118,26 @@ defm : X86WriteRes<WriteLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteMove, [HWPort0156], 1, [1], 1>;
def : WriteRes<WriteZero, []>;
+// Arithmetic.
defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
-defm : HWWriteResPair<WriteADC, [HWPort06,HWPort0156], 2, [1,1], 2>;
+defm : HWWriteResPair<WriteADC, [HWPort06, HWPort0156], 2, [1,1], 2>;
defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul64, [HWPort1], 3>;
-defm : HWWriteResPair<WriteBSWAP32,[HWPort15], 1>;
-defm : HWWriteResPair<WriteBSWAP64,[HWPort06, HWPort15], 2, [1,1], 2>;
+defm : X86WriteRes<WriteBSWAP32, [HWPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [HWPort06, HWPort15], 2, [1,1], 2>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+
+// Integer shifts and rotates.
defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
-defm : HWWriteResPair<WriteShiftDouble, [HWPort06], 1>;
+
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [HWPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[HWPort1, HWPort06, HWPort0156], 6, [1, 1, 2], 4>;
+defm : X86WriteRes<WriteSHDmri, [HWPort1, HWPort23, HWPort237, HWPort0156], 10, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[HWPort1, HWPort23, HWPort237, HWPort06, HWPort0156], 12, [1, 1, 1, 1, 2], 6>;
+
defm : HWWriteResPair<WriteJump, [HWPort06], 1>;
defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>;
@@ -141,6 +150,7 @@ def : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [HWPort06]>;
+def : WriteRes<WriteBitTest,[HWPort06]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -886,14 +896,6 @@ def HWWriteResGroup7 : SchedWriteRes<[HWPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[HWWriteResGroup7], (instrs CDQ, CQO)>;
-def: InstRW<[HWWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> {
let Latency = 1;
@@ -1240,8 +1242,6 @@ def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPI2PSirr",
"PDEP(32|64)rr",
"PEXT(32|64)rr",
- "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8",
"(V?)CVTDQ2PS(Y?)rr")>;
def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> {
@@ -1513,14 +1513,6 @@ def HWWriteResGroup83 : SchedWriteRes<[HWPort1,HWPort6,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup83], (instregex "LAR(16|32|64)rr")>;
-def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> {
- let Latency = 10;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup86], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def HWWriteResGroup87 : SchedWriteRes<[HWPort1,HWPort6,HWPort23,HWPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
@@ -1638,14 +1630,6 @@ def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
}
def: InstRW<[HWWriteResGroup104], (instregex "VCVTDQ2PDYrm")>;
-def HWWriteResGroup105 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[HWWriteResGroup105], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
@@ -1660,14 +1644,6 @@ def HWWriteResGroup108 : SchedWriteRes<[HWPort6,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup108], (instrs STD)>;
-def HWWriteResGroup109 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort0156]> {
- let Latency = 12;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,1,2];
-}
-def: InstRW<[HWWriteResGroup109], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def HWWriteResGroup114 : SchedWriteRes<[HWPort6,HWPort06,HWPort15,HWPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index 3b543c680ef4..6b7bbdea860a 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -106,13 +106,14 @@ def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
def : WriteRes<WriteMove, [SBPort015]>;
def : WriteRes<WriteZero, []>;
+// Arithmetic.
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
defm : SBWriteResPair<WriteADC, [SBPort05,SBPort015], 2, [1,1], 2>;
defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul64, [SBPort1], 3>;
-defm : SBWriteResPair<WriteBSWAP32,[SBPort1], 1>;
-defm : SBWriteResPair<WriteBSWAP64,[SBPort1,SBPort05], 2, [1,1], 2>;
+defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SBPort1,SBPort05], 2, [1,1], 2>;
defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
@@ -125,8 +126,13 @@ defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>;
+defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>;
+defm : X86WriteRes<WriteSHDmri, [SBPort4,SBPort23,SBPort05,SBPort015], 8, [1, 2, 1, 1], 5>;
+defm : X86WriteRes<WriteSHDmrcl,[SBPort4,SBPort23,SBPort05,SBPort015], 10, [1, 2, 3, 1], 7>;
+
defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
-defm : SBWriteResPair<WriteShiftDouble, [SBPort05], 1>;
defm : SBWriteResPair<WriteJump, [SBPort5], 1>;
defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>;
@@ -139,6 +145,7 @@ def : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SBPort05]>;
+def : WriteRes<WriteBitTest,[SBPort05]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -564,14 +571,6 @@ def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
let ResourceCycles = [1];
}
def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>;
-def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
let Latency = 1;
@@ -630,14 +629,6 @@ def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ)>;
def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>;
-def SBWriteResGroup19 : SchedWriteRes<[SBPort05,SBPort015]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup19], (instregex "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8")>;
-
def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
let Latency = 3;
let NumMicroOps = 1;
@@ -728,14 +719,6 @@ def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
}
def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>;
-def SBWriteResGroup29_3 : SchedWriteRes<[SBPort05,SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [3,1];
-}
-def: InstRW<[SBWriteResGroup29_3], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
let Latency = 5;
let NumMicroOps = 1;
@@ -1027,14 +1010,6 @@ def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
}
def: InstRW<[SBWriteResGroup87], (instrs FARCALL64)>;
-def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 9;
let NumMicroOps = 3;
@@ -1130,14 +1105,6 @@ def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
def: InstRW<[SBWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m")>;
-def SBWriteResGroup103_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
- let Latency = 10;
- let NumMicroOps = 7;
- let ResourceCycles = [1,2,3,1];
-}
-def: InstRW<[SBWriteResGroup103_2], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 11;
let NumMicroOps = 2;
diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td
index 1417799d76be..bda088e1512f 100644
--- a/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/lib/Target/X86/X86SchedSkylakeClient.td
@@ -110,8 +110,8 @@ defm : SKLWriteResPair<WriteADC, [SKLPort06], 1>; // Integer ALU + flags op
defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication.
defm : SKLWriteResPair<WriteIMul64, [SKLPort1], 3>; // Integer 64-bit multiplication.
-defm : SKLWriteResPair<WriteBSWAP32,[SKLPort15], 1>; //
-defm : SKLWriteResPair<WriteBSWAP64,[SKLPort06, SKLPort15], 2, [1,1], 2>; //
+defm : X86WriteRes<WriteBSWAP32, [SKLPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SKLPort06, SKLPort15], 2, [1,1], 2>;
defm : SKLWriteResPair<WriteDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
defm : SKLWriteResPair<WriteDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
@@ -136,6 +136,7 @@ def : WriteRes<WriteSETCCStore, [SKLPort06,SKLPort4,SKLPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SKLPort06]>;
+def : WriteRes<WriteBitTest,[SKLPort06]>; //
// Bit counts.
defm : SKLWriteResPair<WriteBSF, [SKLPort1], 3>;
@@ -147,8 +148,11 @@ defm : SKLWriteResPair<WritePOPCNT, [SKLPort1], 3>;
// Integer shifts and rotates.
defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>;
-// Double shift instructions.
-defm : SKLWriteResPair<WriteShiftDouble, [SKLPort06], 1>;
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [SKLPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[SKLPort1,SKLPort06,SKLPort0156], 6, [1, 2, 1], 4>;
+defm : X86WriteRes<WriteSHDmri, [SKLPort1,SKLPort23,SKLPort237,SKLPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156], 11, [1, 1, 1, 2, 1], 6>;
// BMI1 BEXTR, BMI2 BZHI
defm : SKLWriteResPair<WriteBEXTR, [SKLPort06,SKLPort15], 2, [1,1], 2>;
@@ -602,14 +606,6 @@ def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
-def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> {
let Latency = 1;
@@ -743,9 +739,7 @@ def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> {
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr",
- "PEXT(32|64)rr",
- "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8")>;
+ "PEXT(32|64)rr")>;
def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> {
let Latency = 4;
@@ -1096,14 +1090,6 @@ def SKLWriteResGroup78 : SchedWriteRes<[SKLPort5,SKLPort01]> {
}
def: InstRW<[SKLWriteResGroup78], (instregex "(V?)CVTSI642SSrr")>;
-def SKLWriteResGroup79 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SKLWriteResGroup79], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def SKLWriteResGroup80 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06,SKLPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
@@ -1392,14 +1378,6 @@ def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
def: InstRW<[SKLWriteResGroup128], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
-def SKLWriteResGroup130 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKLWriteResGroup130], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def SKLWriteResGroup131 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
@@ -1519,14 +1497,6 @@ def: InstRW<[SKLWriteResGroup152], (instregex "CVTPD2PSrm",
"CVT(T?)PD2DQrm",
"MMX_CVT(T?)PD2PIirm")>;
-def SKLWriteResGroup153 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
- let Latency = 11;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,2,1];
-}
-def: InstRW<[SKLWriteResGroup153], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td
index 7095ec081bd9..9d5f8555c505 100755
--- a/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/lib/Target/X86/X86SchedSkylakeServer.td
@@ -110,8 +110,8 @@ defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op
defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication.
defm : SKXWriteResPair<WriteIMul64, [SKXPort1], 3>; // Integer 64-bit multiplication.
-defm : SKXWriteResPair<WriteBSWAP32,[SKXPort15], 1>; //
-defm : SKXWriteResPair<WriteBSWAP64,[SKXPort06, SKXPort15], 2, [1,1], 2>; //
+defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>;
defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : SKXWriteResPair<WriteDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
@@ -136,12 +136,16 @@ def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SKXPort06]>;
+def : WriteRes<WriteBitTest,[SKXPort06]>; //
// Integer shifts and rotates.
defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
-// Double shift instructions.
-defm : SKXWriteResPair<WriteShiftDouble, [SKXPort06], 1>;
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[SKXPort1,SKXPort06,SKXPort0156], 6, [1, 2, 1], 4>;
+defm : X86WriteRes<WriteSHDmri, [SKXPort1,SKXPort23,SKXPort237,SKXPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156], 11, [1, 1, 1, 2, 1], 6>;
// Bit counts.
defm : SKXWriteResPair<WriteBSF, [SKXPort1], 3>;
@@ -615,14 +619,6 @@ def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
-def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> {
let Latency = 1;
@@ -783,9 +779,7 @@ def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> {
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr",
- "PEXT(32|64)rr",
- "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8")>;
+ "PEXT(32|64)rr")>;
def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> {
let Latency = 4;
@@ -1270,14 +1264,6 @@ def: InstRW<[SKXWriteResGroup82], (instregex "(V?)CVTSI642SSrr",
"VCVTSI642SSZrr",
"VCVTUSI642SSZrr")>;
-def SKXWriteResGroup83 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SKXWriteResGroup83], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
@@ -1830,14 +1816,6 @@ def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
-def SKXWriteResGroup145 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKXWriteResGroup145], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
@@ -2033,14 +2011,6 @@ def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
}
def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>;
-def SKXWriteResGroup168 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
- let Latency = 11;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,2,1];
-}
-def: InstRW<[SKXWriteResGroup168], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index d0167753ccd4..ef9ce94706df 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -118,8 +118,8 @@ defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
def WriteIMulH : SchedWrite; // Integer multiplication, high part.
def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
-defm WriteBSWAP32: X86SchedWritePair; // Byte Order (Endiannes) Swap
-defm WriteBSWAP64: X86SchedWritePair; // Byte Order (Endiannes) Swap
+def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
+def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
// Integer division.
defm WriteDiv8 : X86SchedWritePair;
@@ -142,11 +142,15 @@ def WriteFCMOV : SchedWrite; // X87 conditional move.
def WriteSETCC : SchedWrite; // Set register based on condition code.
def WriteSETCCStore : SchedWrite;
def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
+def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support
// Integer shifts and rotates.
defm WriteShift : X86SchedWritePair;
// Double shift instructions.
-defm WriteShiftDouble : X86SchedWritePair;
+def WriteSHDrri : SchedWrite;
+def WriteSHDrrcl : SchedWrite;
+def WriteSHDmri : SchedWrite;
+def WriteSHDmrcl : SchedWrite;
// BMI1 BEXTR, BMI2 BZHI
defm WriteBEXTR : X86SchedWritePair;
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index d1e902e6c43f..a7f461c456bd 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -81,8 +81,8 @@ defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
-defm : AtomWriteResPair<WriteBSWAP32, [AtomPort0], [AtomPort0]>;
-defm : AtomWriteResPair<WriteBSWAP64, [AtomPort0], [AtomPort0]>;
+defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [AtomPort0], 1, [1], 1>;
defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
@@ -108,6 +108,7 @@ def : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
let Latency = 2;
let ResourceCycles = [2];
}
+def : WriteRes<WriteBitTest,[AtomPort01]>;
defm : X86WriteResUnsupported<WriteIMulH>;
@@ -150,11 +151,10 @@ defm : X86WriteResPairUnsupported<WriteBZHI>;
defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
-////////////////////////////////////////////////////////////////////////////////
-// Double shift instructions.
-////////////////////////////////////////////////////////////////////////////////
-
-defm : AtomWriteResPair<WriteShiftDouble, [AtomPort0], [AtomPort0]>;
+defm : X86WriteRes<WriteSHDrri, [AtomPort01], 2, [2], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[AtomPort01], 2, [2], 1>;
+defm : X86WriteRes<WriteSHDmri, [AtomPort01], 4, [4], 1>;
+defm : X86WriteRes<WriteSHDmrcl,[AtomPort01], 4, [4], 1>;
////////////////////////////////////////////////////////////////////////////////
// Loads, stores, and moves, not folded with other operations.
@@ -562,9 +562,7 @@ def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
PUSH16rmm, PUSH32rmm, PUSH64rmm,
LODSB, LODSL, LODSQ, LODSW,
- SCASB, SCASL, SCASQ, SCASW,
- SHLD32rrCL, SHRD32rrCL,
- SHLD32rri8, SHRD32rri8)>;
+ SCASB, SCASL, SCASQ, SCASW)>;
def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
"PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
"XADD(8|16|32|64)rr",
@@ -598,8 +596,6 @@ def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> {
}
def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO,
JCXZ, JECXZ, JRCXZ,
- SHLD32mrCL, SHRD32mrCL,
- SHLD32mri8, SHRD32mri8,
LD_F80m)>;
def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm",
"(MMX_)?PEXTRWrr(_REV)?")>;
diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td
index d78c343ebd5c..719e71cd25e5 100644
--- a/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/lib/Target/X86/X86ScheduleBtVer2.td
@@ -168,8 +168,8 @@ defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32
defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
-defm : JWriteResIntPair<WriteBSWAP32,[JALU01], 1>;
-defm : JWriteResIntPair<WriteBSWAP64,[JALU01], 1>;
+defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
@@ -188,6 +188,7 @@ defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional m
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
def : WriteRes<WriteLAHFSAHF, [JALU01]>;
+def : WriteRes<WriteBitTest,[JALU01]>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [JALU01]>;
@@ -209,33 +210,11 @@ defm : X86WriteResPairUnsupported<WriteBZHI>;
defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
-defm : JWriteResIntPair<WriteShiftDouble, [JALU01], 1>;
-
-def JWriteSHLDrri : SchedWriteRes<[JALU01]> {
- let Latency = 3;
- let ResourceCycles = [6];
- let NumMicroOps = 6;
-}
-def: InstRW<[JWriteSHLDrri], (instrs SHLD16rri8, SHLD32rri8, SHLD64rri8,
- SHRD16rri8, SHRD32rri8, SHRD64rri8)>;
-
-def JWriteSHLDrrCL : SchedWriteRes<[JALU01]> {
- let Latency = 4;
- let ResourceCycles = [8];
- let NumMicroOps = 7;
-}
-def: InstRW<[JWriteSHLDrrCL], (instrs SHLD16rrCL, SHLD32rrCL, SHLD64rrCL,
- SHRD16rrCL, SHRD32rrCL, SHRD64rrCL)>;
-
-def JWriteSHLDm : SchedWriteRes<[JLAGU, JALU01]> {
- let Latency = 9;
- let ResourceCycles = [1, 22];
- let NumMicroOps = 8;
-}
-def: InstRW<[JWriteSHLDm],(instrs SHLD16mri8, SHLD32mri8, SHLD64mri8,
- SHLD16mrCL, SHLD32mrCL, SHLD64mrCL,
- SHRD16mri8, SHRD32mri8, SHRD64mri8,
- SHRD16mrCL, SHRD32mrCL, SHRD64mrCL)>;
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [JALU01], 3, [6], 6>;
+defm : X86WriteRes<WriteSHDrrcl,[JALU01], 4, [8], 7>;
+defm : X86WriteRes<WriteSHDmri, [JLAGU, JALU01], 9, [1, 22], 8>;
+defm : X86WriteRes<WriteSHDmrcl,[JLAGU, JALU01], 9, [1, 22], 8>;
////////////////////////////////////////////////////////////////////////////////
// Loads, stores, and moves, not folded with other operations.
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
index c938a4a8939e..b1e843013707 100644
--- a/lib/Target/X86/X86ScheduleSLM.td
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -98,11 +98,16 @@ defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteBSWAP32,[SLM_IEC_RSV01], 1>;
-defm : SLMWriteResPair<WriteBSWAP64,[SLM_IEC_RSV01], 1>;
+defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
-defm : SLMWriteResPair<WriteShiftDouble, [SLM_IEC_RSV0], 1>;
+
+defm : X86WriteRes<WriteSHDrri, [SLM_IEC_RSV0], 1, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[SLM_IEC_RSV0], 1, [1], 1>;
+defm : X86WriteRes<WriteSHDmri, [SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
+defm : X86WriteRes<WriteSHDmrcl,[SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
+
defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>;
@@ -115,6 +120,7 @@ def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
let ResourceCycles = [2,1];
}
def : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
+def : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td
index d28d58580752..7184b850a195 100644
--- a/lib/Target/X86/X86ScheduleZnver1.td
+++ b/lib/Target/X86/X86ScheduleZnver1.td
@@ -180,11 +180,16 @@ defm : ZnWriteResPair<WriteADC, [ZnALU], 1>;
defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>;
defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
-defm : ZnWriteResPair<WriteBSWAP32,[ZnALU], 1, [4]>;
-defm : ZnWriteResPair<WriteBSWAP64,[ZnALU], 1, [4]>;
+defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>;
+defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>;
defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
-defm : ZnWriteResPair<WriteShiftDouble, [ZnALU], 1>;
+
+defm : X86WriteRes<WriteSHDrri, [ZnALU], 1, [1], 1>;
+defm : X86WriteResUnsupported<WriteSHDrrcl>;
+defm : X86WriteResUnsupported<WriteSHDmri>;
+defm : X86WriteResUnsupported<WriteSHDmrcl>;
+
defm : ZnWriteResPair<WriteJump, [ZnALU], 1>;
defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>;
@@ -193,6 +198,7 @@ defm : ZnWriteResPair<WriteCMOV2, [ZnALU], 1>;
def : WriteRes<WriteSETCC, [ZnALU]>;
def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>;
defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
+def : WriteRes<WriteBitTest,[ZnALU]>;
// Bit counts.
defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index fedb13f89e19..85e8256a6e94 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -51,7 +51,7 @@ enum Style {
} // end namespace PICStyles
class X86Subtarget final : public X86GenSubtargetInfo {
-public:
+public:
enum X86ProcFamilyEnum {
Others,
IntelAtom,
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index bae2ef80c365..865462622627 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2274,8 +2274,8 @@ int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
// Sign-extend all constants to a multiple of 64-bit.
APInt ImmVal = Imm;
- if (BitSize & 0x3f)
- ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
+ if (BitSize % 64 != 0)
+ ImmVal = Imm.sext(alignTo(BitSize, 64));
// Split the constant into 64-bit chunks and calculate the cost for each
// chunk.
@@ -2332,9 +2332,15 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
// immediates here as the normal path expects bit 31 to be sign extended.
if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue()))
return TTI::TCC_Free;
- LLVM_FALLTHROUGH;
+ ImmIdx = 1;
+ break;
case Instruction::Add:
case Instruction::Sub:
+ // For add/sub, we can use the opposite instruction for INT32_MIN.
+ if (Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000)
+ return TTI::TCC_Free;
+ ImmIdx = 1;
+ break;
case Instruction::Mul:
case Instruction::UDiv:
case Instruction::SDiv:
@@ -2366,7 +2372,7 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
}
if (Idx == ImmIdx) {
- int NumConstants = (BitSize + 63) / 64;
+ int NumConstants = divideCeil(BitSize, 64);
int Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<int>(TTI::TCC_Free)
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 8f7c8a82380a..916bca6392de 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -146,7 +146,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
}
EmitAlignment(Align > 2 ? Align : 2, GV);
-
+
if (GV->isThreadLocal()) {
report_fatal_error("TLS is not supported by this target!");
}
@@ -162,7 +162,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// are padded to 32 bits.
if (Size < 4)
OutStreamer->EmitZeros(4 - Size);
-
+
// Mark the end of the global
getTargetStreamer().emitCCBottomData(GVSym->getName());
}
@@ -295,6 +295,6 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
// Force static initialization.
-extern "C" void LLVMInitializeXCoreAsmPrinter() {
+extern "C" void LLVMInitializeXCoreAsmPrinter() {
RegisterAsmPrinter<XCoreAsmPrinter> X(getTheXCoreTarget());
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index d5e276788f71..b0de048672df 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -63,7 +63,7 @@ static bool isZeroImm(const MachineOperand &op) {
unsigned XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
int Opcode = MI.getOpcode();
- if (Opcode == XCore::LDWFI)
+ if (Opcode == XCore::LDWFI)
{
if ((MI.getOperand(1).isFI()) && // is a stack slot
(MI.getOperand(2).isImm()) && // the imm is zero
@@ -74,7 +74,7 @@ unsigned XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
}
return 0;
}
-
+
/// isStoreToStackSlot - If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
/// the source reg along with the FrameIndex of the loaded stack slot. If
@@ -129,9 +129,9 @@ static inline bool IsBR_JT(unsigned BrOpc) {
|| BrOpc == XCore::BR_JT32;
}
-/// GetCondFromBranchOpc - Return the XCore CC that matches
+/// GetCondFromBranchOpc - Return the XCore CC that matches
/// the correspondent Branch instruction opcode.
-static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
+static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
{
if (IsBRT(BrOpc)) {
return XCore::COND_TRUE;
@@ -144,7 +144,7 @@ static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
/// GetCondBranchFromCond - Return the Branch instruction
/// opcode that matches the cc.
-static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
+static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
{
switch (CC) {
default: llvm_unreachable("Illegal condition code!");
@@ -153,7 +153,7 @@ static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
}
}
-/// GetOppositeBranchCondition - Return the inverse of the specified
+/// GetOppositeBranchCondition - Return the inverse of the specified
/// condition, e.g. turning COND_E to COND_NE.
static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
{
@@ -209,11 +209,11 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
TBB = LastInst->getOperand(0).getMBB();
return false;
}
-
+
XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
if (BranchCode == XCore::COND_INVALID)
return true; // Can't handle indirect branch.
-
+
// Conditional branch
// Block ends with fall-through condbranch.
@@ -222,17 +222,17 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
Cond.push_back(LastInst->getOperand(0));
return false;
}
-
+
// Get the instruction before it if it's a terminator.
MachineInstr *SecondLastInst = &*I;
// If there are three terminators, we don't know what sort of block this is.
if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
return true;
-
+
unsigned SecondLastOpc = SecondLastInst->getOpcode();
XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
-
+
// If the block ends with conditional branch followed by unconditional,
// handle it.
if (BranchCode != XCore::COND_INVALID
@@ -245,10 +245,10 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
FBB = LastInst->getOperand(0).getMBB();
return false;
}
-
+
// If the block ends with two unconditional branches, handle it. The second
// one is not executed, so remove it.
- if (IsBRU(SecondLastInst->getOpcode()) &&
+ if (IsBRU(SecondLastInst->getOpcode()) &&
IsBRU(LastInst->getOpcode())) {
TBB = SecondLastInst->getOperand(0).getMBB();
I = LastInst;
@@ -293,7 +293,7 @@ unsigned XCoreInstrInfo::insertBranch(MachineBasicBlock &MBB,
}
return 1;
}
-
+
// Two-way Conditional branch.
assert(Cond.size() == 2 && "Unexpected number of components!");
unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
@@ -313,17 +313,17 @@ XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const {
if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
return 0;
-
+
// Remove the branch.
I->eraseFromParent();
-
+
I = MBB.end();
if (I == MBB.begin()) return 1;
--I;
if (!IsCondBranch(I->getOpcode()))
return 1;
-
+
// Remove the branch.
I->eraseFromParent();
return 2;
@@ -342,7 +342,7 @@ void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(0);
return;
}
-
+
if (GRDest && SrcReg == XCore::SP) {
BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0);
return;
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index cf469ec3cf1a..6c05ab3f10df 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -43,11 +43,11 @@ class XCoreFunctionInfo : public MachineFunctionInfo {
public:
XCoreFunctionInfo() = default;
-
+
explicit XCoreFunctionInfo(MachineFunction &MF) {}
-
+
~XCoreFunctionInfo() override = default;
-
+
void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 1915aaedc35d..e119d9555f9d 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -296,12 +296,12 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// fold constant into offset.
Offset += MI.getOperand(FIOperandNum + 1).getImm();
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
-
+
assert(Offset%4 == 0 && "Misaligned stack offset");
LLVM_DEBUG(errs() << "Offset : " << Offset << "\n"
<< "<--------->\n");
Offset/=4;
-
+
unsigned Reg = MI.getOperand(0).getReg();
assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand");
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index c31f5d5a7c44..9451a05d8d58 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -32,7 +32,7 @@ public:
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
-
+
bool enableMultipleCopyHints() const override { return true; }
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 140ddba68aab..ed9936ebf2b8 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -43,7 +43,7 @@ public:
XCoreSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM);
- /// ParseSubtargetFeatures - Parses features string setting specified
+ /// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 31e771da3bd3..cd2bd734eb26 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -56,7 +56,7 @@ using namespace llvm;
STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
-STATISTIC(NumArgumentsReplacedWithUndef,
+STATISTIC(NumArgumentsReplacedWithUndef,
"Number of unread args replaced with undef");
namespace {
@@ -109,7 +109,7 @@ namespace {
char DAH::ID = 0;
-INITIALIZE_PASS(DAH, "deadarghaX0r",
+INITIALIZE_PASS(DAH, "deadarghaX0r",
"Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
false, false)
@@ -256,7 +256,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
return true;
}
-/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
+/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
/// arguments that are unused, and changes the caller parameters to be undefined
/// instead.
bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
@@ -640,7 +640,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
Result = Live;
} else {
// See what the effect of this use is (recording any uses that cause
- // MaybeLive in MaybeLiveArgUses).
+ // MaybeLive in MaybeLiveArgUses).
Result = SurveyUses(&*AI, MaybeLiveArgUses);
}
@@ -777,7 +777,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// argument.
// 2) Retain the 'returned' attribute and treat the return value (but not the
// entire function) as live so that it is not eliminated.
- //
+ //
// It's not clear in the general case which option is more profitable because,
// even in the absence of explicit uses of the return value, code generation
// is free to use the 'returned' attribute to do things like eliding
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 2797da6c0abd..010b0a29807d 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -617,7 +617,7 @@ static bool addArgumentAttrsFromCallsites(Function &F) {
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
break;
}
-
+
return Changed;
}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 1af7e6894777..1761d7faff57 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -357,6 +357,41 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
return Changed;
}
+static bool isSafeSROAElementUse(Value *V);
+
+/// Return true if the specified GEP is a safe user of a derived
+/// expression from a global that we want to SROA.
+static bool isSafeSROAGEP(User *U) {
+ // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
+ // don't like < 3 operand CE's, and we don't like non-constant integer
+ // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
+ // value of C.
+ if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
+ !cast<Constant>(U->getOperand(1))->isNullValue())
+ return false;
+
+ gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
+ ++GEPI; // Skip over the pointer index.
+
+ // For all other level we require that the indices are constant and inrange.
+ // In particular, consider: A[0][i]. We cannot know that the user isn't doing
+ // invalid things like allowing i to index an out-of-range subscript that
+ // accesses A[1]. This can also happen between different members of a struct
+ // in llvm IR.
+ for (; GEPI != E; ++GEPI) {
+ if (GEPI.isStruct())
+ continue;
+
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
+ if (!IdxVal || (GEPI.isBoundedSequential() &&
+ IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
+ return false;
+ }
+
+ return llvm::all_of(U->users(),
+ [](User *UU) { return isSafeSROAElementUse(UU); });
+}
+
/// Return true if the specified instruction is a safe user of a derived
/// expression from a global that we want to SROA.
static bool isSafeSROAElementUse(Value *V) {
@@ -374,84 +409,25 @@ static bool isSafeSROAElementUse(Value *V) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getOperand(0) != V;
- // Otherwise, it must be a GEP.
- GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
- if (!GEPI) return false;
-
- if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
- !cast<Constant>(GEPI->getOperand(1))->isNullValue())
- return false;
-
- for (User *U : GEPI->users())
- if (!isSafeSROAElementUse(U))
- return false;
- return true;
-}
-
-/// U is a direct user of the specified global value. Look at it and its uses
-/// and decide whether it is safe to SROA this global.
-static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
- // The user of the global must be a GEP Inst or a ConstantExpr GEP.
- if (!isa<GetElementPtrInst>(U) &&
- (!isa<ConstantExpr>(U) ||
- cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
- return false;
-
- // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
- // don't like < 3 operand CE's, and we don't like non-constant integer
- // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
- // value of C.
- if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
- !cast<Constant>(U->getOperand(1))->isNullValue() ||
- !isa<ConstantInt>(U->getOperand(2)))
- return false;
-
- gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
- ++GEPI; // Skip over the pointer index.
-
- // If this is a use of an array allocation, do a bit more checking for sanity.
- if (GEPI.isSequential()) {
- ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
-
- // Check to make sure that index falls within the array. If not,
- // something funny is going on, so we won't do the optimization.
- //
- if (GEPI.isBoundedSequential() &&
- Idx->getZExtValue() >= GEPI.getSequentialNumElements())
- return false;
-
- // We cannot scalar repl this level of the array unless any array
- // sub-indices are in-range constants. In particular, consider:
- // A[0][i]. We cannot know that the user isn't doing invalid things like
- // allowing i to index an out-of-range subscript that accesses A[1].
- //
- // Scalar replacing *just* the outer index of the array is probably not
- // going to be a win anyway, so just give up.
- for (++GEPI; // Skip array index.
- GEPI != E;
- ++GEPI) {
- if (GEPI.isStruct())
- continue;
-
- ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
- if (!IdxVal ||
- (GEPI.isBoundedSequential() &&
- IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
- return false;
- }
- }
-
- return llvm::all_of(U->users(),
- [](User *UU) { return isSafeSROAElementUse(UU); });
+ // Otherwise, it must be a GEP. Check it and its users are safe to SRA.
+ return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I);
}
/// Look at all uses of the global and decide whether it is safe for us to
/// perform this transformation.
static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
- for (User *U : GV->users())
- if (!IsUserOfGlobalSafeForSRA(U, GV))
+ for (User *U : GV->users()) {
+ // The user of the global must be a GEP Inst or a ConstantExpr GEP.
+ if (!isa<GetElementPtrInst>(U) &&
+ (!isa<ConstantExpr>(U) ||
+ cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
return false;
+ // Check the gep and it's users are safe to SRA
+ if (!isSafeSROAGEP(U))
+ return false;
+ }
+
return true;
}
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index f79b61037f1d..7d55ebecbf92 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -61,12 +61,12 @@ static bool PropagateConstantsIntoArguments(Function &F) {
User *UR = U.getUser();
// Ignore blockaddress uses.
if (isa<BlockAddress>(UR)) continue;
-
+
// Used by a non-instruction, or not the callee of a function, do not
// transform.
if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR))
return false;
-
+
CallSite CS(cast<Instruction>(UR));
if (!CS.isCallee(&U))
return false;
@@ -77,11 +77,11 @@ static bool PropagateConstantsIntoArguments(Function &F) {
Function::arg_iterator Arg = F.arg_begin();
for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
++i, ++AI, ++Arg) {
-
+
// If this argument is known non-constant, ignore it.
if (ArgumentConstants[i].second)
continue;
-
+
Constant *C = dyn_cast<Constant>(*AI);
if (C && ArgumentConstants[i].first == nullptr) {
ArgumentConstants[i].first = C; // First constant seen.
@@ -108,7 +108,7 @@ static bool PropagateConstantsIntoArguments(Function &F) {
if (ArgumentConstants[i].second || AI->use_empty() ||
AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory()))
continue;
-
+
Value *V = ArgumentConstants[i].first;
if (!V) V = UndefValue::get(AI->getType());
AI->replaceAllUsesWith(V);
@@ -147,7 +147,7 @@ static bool PropagateConstantReturn(Function &F) {
SmallVector<Value *,4> RetVals;
StructType *STy = dyn_cast<StructType>(F.getReturnType());
if (STy)
- for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
+ for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
RetVals.push_back(UndefValue::get(STy->getElementType(i)));
else
RetVals.push_back(UndefValue::get(F.getReturnType()));
@@ -172,7 +172,7 @@ static bool PropagateConstantReturn(Function &F) {
// Ignore undefs, we can change them into anything
if (isa<UndefValue>(V))
continue;
-
+
// Try to see if all the rets return the same constant or argument.
if (isa<Constant>(V) || isa<Argument>(V)) {
if (isa<UndefValue>(RV)) {
@@ -206,7 +206,7 @@ static bool PropagateConstantReturn(Function &F) {
// directly?
if (!Call || !CS.isCallee(&U))
continue;
-
+
// Call result not used?
if (Call->use_empty())
continue;
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 139941127dee..3bebb96c6d35 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -27,7 +27,7 @@
// -- We define Function* container class with custom "operator<" (FunctionPtr).
// -- "FunctionPtr" instances are stored in std::set collection, so every
// std::set::insert operation will give you result in log(N) time.
-//
+//
// As an optimization, a hash of the function structure is calculated first, and
// two functions are only compared if they have the same hash. This hash is
// cheap to compute, and has the property that if function F == G according to
@@ -383,7 +383,7 @@ bool MergeFunctions::runOnModule(Module &M) {
for (Function &Func : M) {
if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
- }
+ }
}
std::stable_sort(
@@ -402,7 +402,7 @@ bool MergeFunctions::runOnModule(Module &M) {
Deferred.push_back(WeakTrackingVH(I->second));
}
}
-
+
do {
std::vector<WeakTrackingVH> Worklist;
Deferred.swap(Worklist);
@@ -802,11 +802,11 @@ void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
Function *F = FN.getFunc();
assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
"The two functions must be equal");
-
+
auto I = FNodesInTree.find(F);
assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
-
+
FnTreeType::iterator IterToFNInFnTree = I->second;
assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
// Remove F -> FN and insert G -> FN
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 27d791857314..2be654258aa8 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -77,13 +77,13 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
// Next, check to see if any callees might throw or if there are any external
// functions in this SCC: if so, we cannot prune any functions in this SCC.
- // Definitions that are weak and not declared non-throwing might be
+ // Definitions that are weak and not declared non-throwing might be
// overridden at linktime with something that throws, so assume that.
// If this SCC includes the unwind instruction, we KNOW it throws, so
// obviously the SCC might throw.
//
bool SCCMightUnwind = false, SCCMightReturn = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();
(!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
Function *F = (*I)->getFunction();
if (!F) {
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index aa31e0d850dd..83054588a9aa 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -926,7 +926,13 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add))
return NV;
- Value *X;
+ Value *X, *Y;
+
+ // add (sub X, Y), -1 --> add (not Y), X
+ if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) &&
+ match(Op1, m_AllOnes()))
+ return BinaryOperator::CreateAdd(Builder.CreateNot(Y), X);
+
// zext(bool) + C -> bool ? C + 1 : C
if (match(Op0, m_ZExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
@@ -1608,6 +1614,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (match(Op0, m_Not(m_Value(X))) && match(Op1, m_Not(m_Value(Y))))
return BinaryOperator::CreateSub(Y, X);
+ // (X + -1) - Y --> ~Y + X
+ if (match(Op0, m_OneUse(m_Add(m_Value(X), m_AllOnes()))))
+ return BinaryOperator::CreateAdd(Builder.CreateNot(Op1), X);
+
+ // Y - (X + 1) --> ~X + Y
+ if (match(Op1, m_OneUse(m_Add(m_Value(X), m_One()))))
+ return BinaryOperator::CreateAdd(Builder.CreateNot(X), Op0);
+
if (Constant *C = dyn_cast<Constant>(Op0)) {
bool IsNegate = match(C, m_ZeroInt());
Value *X;
@@ -1858,7 +1872,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
Constant *C;
if (match(Op1, m_Constant(C)) && !isa<ConstantExpr>(Op1))
return BinaryOperator::CreateFAddFMF(Op0, ConstantExpr::getFNeg(C), &I);
-
+
// X - (-Y) --> X + Y
if (match(Op1, m_FNeg(m_Value(Y))))
return BinaryOperator::CreateFAddFMF(Op0, Y, &I);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 372bc41f780e..3d758e2fe7c9 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1550,31 +1550,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return DeMorgan;
{
- Value *A = nullptr, *B = nullptr, *C = nullptr;
- // A&(A^B) => A & ~B
- {
- Value *tmpOp0 = Op0;
- Value *tmpOp1 = Op1;
- if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) {
- if (A == Op1 || B == Op1 ) {
- tmpOp1 = Op0;
- tmpOp0 = Op1;
- // Simplify below
- }
- }
-
- if (match(tmpOp1, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) {
- if (B == tmpOp0) {
- std::swap(A, B);
- }
- // Notice that the pattern (A&(~B)) is actually (A&(-1^B)), so if
- // A is originally -1 (or a vector of -1 and undefs), then we enter
- // an endless loop. By checking that A is non-constant we ensure that
- // we will never get to the loop.
- if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
- return BinaryOperator::CreateAnd(A, Builder.CreateNot(B));
- }
- }
+ Value *A, *B, *C;
+ // A & (A ^ B) --> A & ~B
+ if (match(Op1, m_OneUse(m_c_Xor(m_Specific(Op0), m_Value(B)))))
+ return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(B));
+ // (A ^ B) & A --> A & ~B
+ if (match(Op0, m_OneUse(m_c_Xor(m_Specific(Op1), m_Value(B)))))
+ return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(B));
// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index e8ea7396a96a..fd59c3a7c0c3 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2243,6 +2243,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
Type *DstElTy = DstPTy->getElementType();
Type *SrcElTy = SrcPTy->getElementType();
+ // Casting pointers between the same type, but with different address spaces
+ // is an addrspace cast rather than a bitcast.
+ if ((DstElTy == SrcElTy) &&
+ (DstPTy->getAddressSpace() != SrcPTy->getAddressSpace()))
+ return new AddrSpaceCastInst(Src, DestTy);
+
// If we are casting a alloca to a pointer to a type of the same
// size, rewrite the allocation instruction to allocate the "right" type.
// There is no need to modify malloc calls because it is their bitcast that
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 742caf649007..62769f077b47 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -518,7 +518,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
"can't fold an atomic store of requested type");
-
+
Value *Ptr = SI.getPointerOperand();
unsigned AS = SI.getPointerAddressSpace();
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 4867808478a3..796b4021d273 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -54,6 +54,36 @@ static Value *createMinMax(InstCombiner::BuilderTy &Builder,
return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
}
+/// Fold
+/// %A = icmp eq/ne i8 %x, 0
+/// %B = op i8 %x, %z
+/// %C = select i1 %A, i8 %B, i8 %y
+/// To
+/// %C = select i1 %A, i8 %z, i8 %y
+/// OP: binop with an identity constant
+/// TODO: support for non-commutative and FP opcodes
+static Instruction *foldSelectBinOpIdentity(SelectInst &Sel) {
+
+ Value *Cond = Sel.getCondition();
+ Value *X, *Z;
+ Constant *C;
+ CmpInst::Predicate Pred;
+ if (!match(Cond, m_ICmp(Pred, m_Value(X), m_Constant(C))) ||
+ !ICmpInst::isEquality(Pred))
+ return nullptr;
+
+ bool IsEq = Pred == ICmpInst::ICMP_EQ;
+ auto *BO =
+ dyn_cast<BinaryOperator>(IsEq ? Sel.getTrueValue() : Sel.getFalseValue());
+ // TODO: support for undefs
+ if (BO && match(BO, m_c_BinOp(m_Specific(X), m_Value(Z))) &&
+ ConstantExpr::getBinOpIdentity(BO->getOpcode(), X->getType()) == C) {
+ Sel.setOperand(IsEq ? 1 : 2, Z);
+ return &Sel;
+ }
+ return nullptr;
+}
+
/// This folds:
/// select (icmp eq (and X, C1)), TC, FC
/// iff C1 is a power 2 and the difference between TC and FC is a power-of-2.
@@ -1961,5 +1991,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *Select = foldSelectCmpXchg(SI))
return Select;
+ if (Instruction *Select = foldSelectBinOpIdentity(SI))
+ return Select;
+
return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 34f8037e519f..1ca75f3989d4 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -570,7 +570,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
m_OneUse(m_BinOp(FBO))))) {
const APInt *C;
if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal &&
- match(FBO->getOperand(1), m_APInt(C)) &&
+ match(FBO->getOperand(1), m_APInt(C)) &&
canShiftBinOpWithConstantRHS(I, FBO, *C)) {
Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
cast<Constant>(FBO->getOperand(1)), Op1);
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 2560feb37d66..1c2de6352fa5 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -605,7 +605,7 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
return nullptr;
Value *SplatVal = InsElt.getOperand(1);
- InsertElementInst *CurrIE = &InsElt;
+ InsertElementInst *CurrIE = &InsElt;
SmallVector<bool, 16> ElementPresent(NumElements, false);
InsertElementInst *FirstIE = nullptr;
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 12fcc8752ea9..cff0d5447290 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1424,7 +1424,7 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
bool ConstOp1 = isa<Constant>(Inst.getOperand(1));
if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
NewC = getSafeVectorConstantForBinop(Inst.getOpcode(), NewC, ConstOp1);
-
+
// Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
// Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
Value *NewLHS = isa<Constant>(LHS) ? NewC : V1;
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b3f659194558..6af44354225c 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2464,10 +2464,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {
// If needed, insert __asan_init before checking for SanitizeAddress attr.
// This function needs to be called even if the function body is not
- // instrumented.
+ // instrumented.
if (maybeInsertAsanInitAtFunctionEntry(F))
FunctionModified = true;
-
+
// Leave if the function doesn't need instrumentation.
if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified;
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index acd27c2e226f..132e8089fe3b 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -148,7 +148,7 @@ public:
}
StringRef getPassName() const override { return "GCOV Profiler"; }
- bool runOnModule(Module &M) override {
+ bool runOnModule(Module &M) override {
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
return Profiler.runOnModule(M, TLI);
}
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 22076f04d6ad..4d5dfb0aa66b 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -898,7 +898,7 @@ void InstrProfiling::emitRegistration() {
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
for (Value *Data : UsedVars)
- if (Data != NamesVar)
+ if (Data != NamesVar && !isa<Function>(Data))
IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
if (NamesVar) {
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index fa7bcec677f7..0830ff5dd042 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -280,7 +280,7 @@ bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
return false;
// Sign extend the offset to 64 bits (so that it is like all of the other
- // expressions).
+ // expressions).
unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();
if (OffSCEVBits < 64)
OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty);
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index 3a675b979017..55759e8b1661 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -781,7 +781,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
this->TTI = &TTI;
this->DT = &DT;
this->BFI = BFI;
- this->Entry = &Entry;
+ this->Entry = &Entry;
// Collect all constant candidates.
collectConstantCandidates(Fn);
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index ea148b728a10..2f2d7f620a29 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -473,7 +473,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
// relatively expensive analysis for constants which are obviously either
// null or non-null to start with.
if (Type && !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
- !isa<Constant>(V) &&
+ !isa<Constant>(V) &&
LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
ConstantPointerNull::get(Type),
CS.getInstruction()) == LazyValueInfo::False)
@@ -670,12 +670,12 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
Value *Op0 = C->getOperand(0);
Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
if (!Op1) return nullptr;
-
+
LazyValueInfo::Tristate Result =
LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At);
if (Result == LazyValueInfo::Unknown)
return nullptr;
-
+
return (Result == LazyValueInfo::True) ?
ConstantInt::getTrue(C->getContext()) :
ConstantInt::getFalse(C->getContext());
@@ -747,7 +747,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
if (auto *C = getConstantAt(RetVal, RI, LVI)) {
++NumReturns;
RI->replaceUsesOfWith(RetVal, C);
- BBChanged = true;
+ BBChanged = true;
}
}
}
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index dd1a2a6adb82..9a7405e98e7d 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -188,7 +188,7 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
/// returns true, this function and getLocForRead completely describe the memory
/// operations for this instruction.
static MemoryLocation getLocForWrite(Instruction *Inst) {
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return MemoryLocation::get(SI);
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 565745d12e99..533d16e088c8 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -384,7 +384,7 @@ public:
LoadMapAllocator>;
LoadHTType AvailableLoads;
-
+
// A scoped hash table mapping memory locations (represented as typed
// addresses) to generation numbers at which that memory location became
// (henceforth indefinitely) invariant.
@@ -844,7 +844,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// start a scope in the current generaton which is true for all future
// generations. Also, we dont need to consume the last store since the
// semantics of invariant.start allow us to perform DSE of the last
- // store, if there was a store following invariant.start. Consider:
+ // store, if there was a store following invariant.start. Consider:
//
// store 30, i8* p
// invariant.start(p)
@@ -852,7 +852,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// We can DSE the store to 30, since the store 40 to invariant location p
// causes undefined behaviour.
if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
- // If there are any uses, the scope might end.
+ // If there are any uses, the scope might end.
if (!Inst->use_empty())
continue;
auto *CI = cast<CallInst>(Inst);
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
index 28c5940db1e0..8959038de596 100644
--- a/lib/Transforms/Scalar/GVNSink.cpp
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -568,7 +568,7 @@ public:
ReversePostOrderTraversal<Function*> RPOT(&F);
for (auto *N : RPOT)
NumSunk += sinkBB(N);
-
+
return NumSunk > 0;
}
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index ad1598d7b8bf..055fcbc8436f 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -43,6 +43,7 @@
#include <functional>
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/PostDominators.h"
@@ -61,6 +62,8 @@ using namespace llvm;
#define DEBUG_TYPE "guard-widening"
+STATISTIC(GuardsEliminated, "Number of eliminated guards");
+
namespace {
class GuardWideningImpl {
@@ -75,21 +78,33 @@ class GuardWideningImpl {
/// The set of guards whose conditions have been widened into dominating
/// guards.
- SmallVector<IntrinsicInst *, 16> EliminatedGuards;
+ SmallVector<Instruction *, 16> EliminatedGuards;
/// The set of guards which have been widened to include conditions to other
/// guards.
- DenseSet<IntrinsicInst *> WidenedGuards;
+ DenseSet<Instruction *> WidenedGuards;
/// Try to eliminate guard \p Guard by widening it into an earlier dominating
/// guard. \p DFSI is the DFS iterator on the dominator tree that is
/// currently visiting the block containing \p Guard, and \p GuardsPerBlock
/// maps BasicBlocks to the set of guards seen in that block.
bool eliminateGuardViaWidening(
- IntrinsicInst *Guard, const df_iterator<DomTreeNode *> &DFSI,
- const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> &
+ Instruction *Guard, const df_iterator<DomTreeNode *> &DFSI,
+ const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
GuardsPerBlock);
+ // Get the condition from \p GuardInst.
+ Value *getGuardCondition(Instruction *GuardInst);
+
+ // Set the condition for \p GuardInst.
+ void setGuardCondition(Instruction *GuardInst, Value *NewCond);
+
+ // Whether or not the particular instruction is a guard.
+ bool isGuard(const Instruction *I);
+
+ // Eliminates the guard instruction properly.
+ void eliminateGuard(Instruction *GuardInst);
+
/// Used to keep track of which widening potential is more effective.
enum WideningScore {
/// Don't widen.
@@ -113,9 +128,9 @@ class GuardWideningImpl {
/// Compute the score for widening the condition in \p DominatedGuard
/// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in
/// \p DominatingGuardLoop).
- WideningScore computeWideningScore(IntrinsicInst *DominatedGuard,
+ WideningScore computeWideningScore(Instruction *DominatedGuard,
Loop *DominatedGuardLoop,
- IntrinsicInst *DominatingGuard,
+ Instruction *DominatingGuard,
Loop *DominatingGuardLoop);
/// Helper to check if \p V can be hoisted to \p InsertPos.
@@ -206,10 +221,10 @@ class GuardWideningImpl {
/// Widen \p ToWiden to fail if \p NewCondition is false (in addition to
/// whatever it is already checking).
- void widenGuard(IntrinsicInst *ToWiden, Value *NewCondition) {
+ void widenGuard(Instruction *ToWiden, Value *NewCondition) {
Value *Result;
- widenCondCommon(ToWiden->getArgOperand(0), NewCondition, ToWiden, Result);
- ToWiden->setArgOperand(0, Result);
+ widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result);
+ setGuardCondition(ToWiden, Result);
}
public:
@@ -225,9 +240,7 @@ public:
}
bool GuardWideningImpl::run() {
- using namespace llvm::PatternMatch;
-
- DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> GuardsInBlock;
+ DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
bool Changed = false;
for (auto DFI = df_begin(Root), DFE = df_end(Root);
@@ -239,8 +252,8 @@ bool GuardWideningImpl::run() {
auto &CurrentList = GuardsInBlock[BB];
for (auto &I : *BB)
- if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
- CurrentList.push_back(cast<IntrinsicInst>(&I));
+ if (isGuard(&I))
+ CurrentList.push_back(cast<Instruction>(&I));
for (auto *II : CurrentList)
Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock);
@@ -249,16 +262,16 @@ bool GuardWideningImpl::run() {
assert(EliminatedGuards.empty() || Changed);
for (auto *II : EliminatedGuards)
if (!WidenedGuards.count(II))
- II->eraseFromParent();
+ eliminateGuard(II);
return Changed;
}
bool GuardWideningImpl::eliminateGuardViaWidening(
- IntrinsicInst *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
- const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> &
+ Instruction *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
+ const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
GuardsInBlock) {
- IntrinsicInst *BestSoFar = nullptr;
+ Instruction *BestSoFar = nullptr;
auto BestScoreSoFar = WS_IllegalOrNegative;
auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent());
@@ -302,8 +315,8 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
for (auto *Candidate : make_range(I, E)) {
auto Score =
computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop);
- LLVM_DEBUG(dbgs() << "Score between " << *GuardInst->getArgOperand(0)
- << " and " << *Candidate->getArgOperand(0) << " is "
+ LLVM_DEBUG(dbgs() << "Score between " << *getGuardCondition(GuardInst)
+ << " and " << *getGuardCondition(Candidate) << " is "
<< scoreTypeToString(Score) << "\n");
if (Score > BestScoreSoFar) {
BestScoreSoFar = Score;
@@ -323,16 +336,41 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
LLVM_DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar
<< " with score " << scoreTypeToString(BestScoreSoFar)
<< "\n");
- widenGuard(BestSoFar, GuardInst->getArgOperand(0));
- GuardInst->setArgOperand(0, ConstantInt::getTrue(GuardInst->getContext()));
+ widenGuard(BestSoFar, getGuardCondition(GuardInst));
+ setGuardCondition(GuardInst, ConstantInt::getTrue(GuardInst->getContext()));
EliminatedGuards.push_back(GuardInst);
WidenedGuards.insert(BestSoFar);
return true;
}
+Value *GuardWideningImpl::getGuardCondition(Instruction *GuardInst) {
+ IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst);
+ assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
+ "Bad guard intrinsic?");
+ return GI->getArgOperand(0);
+}
+
+void GuardWideningImpl::setGuardCondition(Instruction *GuardInst,
+ Value *NewCond) {
+ IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst);
+ assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
+ "Bad guard intrinsic?");
+ GI->setArgOperand(0, NewCond);
+}
+
+bool GuardWideningImpl::isGuard(const Instruction* I) {
+ using namespace llvm::PatternMatch;
+ return match(I, m_Intrinsic<Intrinsic::experimental_guard>());
+}
+
+void GuardWideningImpl::eliminateGuard(Instruction *GuardInst) {
+ GuardInst->eraseFromParent();
+ ++GuardsEliminated;
+}
+
GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
- IntrinsicInst *DominatedGuard, Loop *DominatedGuardLoop,
- IntrinsicInst *DominatingGuard, Loop *DominatingGuardLoop) {
+ Instruction *DominatedGuard, Loop *DominatedGuardLoop,
+ Instruction *DominatingGuard, Loop *DominatingGuardLoop) {
bool HoistingOutOfLoop = false;
if (DominatingGuardLoop != DominatedGuardLoop) {
@@ -345,7 +383,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
HoistingOutOfLoop = true;
}
- if (!isAvailableAt(DominatedGuard->getArgOperand(0), DominatingGuard))
+ if (!isAvailableAt(getGuardCondition(DominatedGuard), DominatingGuard))
return WS_IllegalOrNegative;
// If the guard was conditional executed, it may never be reached
@@ -355,9 +393,9 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
// case. At the moment, we really only consider the second in our heuristic
// here. TODO: evaluate cost model for spurious deopt
// NOTE: As written, this also lets us hoist right over another guard which
- // is essentially just another spelling for control flow.
- if (isWideningCondProfitable(DominatedGuard->getArgOperand(0),
- DominatingGuard->getArgOperand(0)))
+ // is essentially just another spelling for control flow.
+ if (isWideningCondProfitable(getGuardCondition(DominatedGuard),
+ getGuardCondition(DominatingGuard)))
return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
if (HoistingOutOfLoop)
@@ -369,7 +407,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
auto MaybeHoistingOutOfIf = [&]() {
auto *DominatingBlock = DominatingGuard->getParent();
auto *DominatedBlock = DominatedGuard->getParent();
-
+
// Same Block?
if (DominatedBlock == DominatingBlock)
return false;
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index e2f29705f2dd..c5ed6d5c1b87 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -735,7 +735,7 @@ static bool isSafeDecreasingBound(const SCEV *Start,
assert(LatchBrExitIdx == 0 &&
"LatchBrExitIdx should be either 0 or 1");
-
+
const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType()));
unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth) :
@@ -786,7 +786,7 @@ static bool isSafeIncreasingBound(const SCEV *Start,
const SCEV *StepMinusOne =
SE.getMinusSCEV(Step, SE.getOne(Step->getType()));
unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
- APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) :
+ APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) :
APInt::getMaxValue(BitWidth);
const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne);
@@ -798,7 +798,7 @@ static bool isSafeIncreasingBound(const SCEV *Start,
static bool CannotBeMinInLoop(const SCEV *BoundSCEV, Loop *L,
ScalarEvolution &SE, bool Signed) {
unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
- APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
+ APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
APInt::getMinValue(BitWidth);
auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index ff66632f0391..c4ea43a43249 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -455,7 +455,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// Keep track of whether the prefix of instructions visited so far are such
// that the next instruction visited is guaranteed to execute if the loop
- // is entered.
+ // is entered.
bool IsMustExecute = CurLoop->getHeader() == BB;
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
@@ -1186,9 +1186,9 @@ bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
if (isa<AllocaInst>(Object))
// Since the alloca goes out of scope, we know the caller can't retain a
// reference to it and be well defined. Thus, we don't need to check for
- // capture.
+ // capture.
return true;
-
+
// For all other objects we need to know that the caller can't possibly
// have gotten a reference to the object. There are two components of
// that:
@@ -1282,7 +1282,7 @@ bool llvm::promoteLoopAccessesToScalars(
// That said, we can't actually make the unwind edge explicit. Therefore,
// we have to prove that the store is dead along the unwind edge. We do
// this by proving that the caller can't have a reference to the object
- // after return and thus can't possibly load from the object.
+ // after return and thus can't possibly load from the object.
Value *Object = GetUnderlyingObject(SomePtr, MDL);
if (!isKnownNonEscaping(Object, TLI))
return false;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index d8692198f7a3..653948717fb9 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1573,7 +1573,7 @@ void LoopIdiomRecognize::transformLoopToCountable(
InitXNext =
Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
else
- llvm_unreachable("Unexpected opcode!");
+ llvm_unreachable("Unexpected opcode!");
} else
InitXNext = InitX;
CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp
index 561ceea1d880..cbb6594cf8f4 100644
--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -74,7 +74,7 @@
// }
//
// One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step)
-//
+//
// Informal proof that the transformation above is correct:
//
// By the definition of guards we can rewrite the guard condition to:
@@ -83,7 +83,7 @@
// Let's prove that for each iteration of the loop:
// G(0) && M => G(I)
// And the condition above can be simplified to G(Start) && M.
-//
+//
// Induction base.
// G(0) && M => G(0)
//
@@ -379,7 +379,7 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander,
ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS, Instruction *InsertAt) {
// TODO: we can check isLoopEntryGuardedByCond before emitting the check
-
+
Type *Ty = LHS->getType();
assert(Ty == RHS->getType() && "expandCheck operands have different types?");
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 634215c9770f..e955821effa0 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -888,7 +888,7 @@ bool llvm::computeUnrollCount(
UP.Count = 0;
return false;
}
-
+
// Check if the runtime trip count is too small when profile is available.
if (L->getHeader()->getParent()->hasProfileData()) {
if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
@@ -897,7 +897,7 @@ bool llvm::computeUnrollCount(
else
UP.AllowExpensiveTripCount = true;
}
- }
+ }
// Reduce count based on the type of unrolling and the threshold values.
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index b12586758925..6aad077ff19e 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -708,7 +708,7 @@ bool LoopUnswitch::processCurrentLoop() {
// Unswitch only those branches that are reachable.
if (isUnreachableDueToPreviousUnswitching(*I))
continue;
-
+
// If this isn't branching on an invariant condition, we can't unswitch
// it.
if (BI->isConditional()) {
@@ -754,7 +754,7 @@ bool LoopUnswitch::processCurrentLoop() {
// We are unswitching ~0 out.
UnswitchVal = AllOne;
} else {
- assert(OpChain == OC_OpChainNone &&
+ assert(OpChain == OC_OpChainNone &&
"Expect to unswitch on trivial chain");
// Do not process same value again and again.
// At this point we have some cases already unswitched and
@@ -1440,11 +1440,11 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// This in-loop instruction has been simplified w.r.t. its context,
// i.e. LIC != Val, make sure we propagate its replacement value to
// all its users.
- //
+ //
// We can not yet delete UI, the LIC user, yet, because that would invalidate
// the LIC->users() iterator !. However, we can make this instruction
// dead by replacing all its users and push it onto the worklist so that
- // it can be properly deleted and its operands simplified.
+ // it can be properly deleted and its operands simplified.
UI->replaceAllUsesWith(Replacement);
}
}
@@ -1609,7 +1609,7 @@ Value *LoopUnswitch::SimplifyInstructionWithNotEqual(Instruction *Inst,
LLVMContext &Ctx = Inst->getContext();
if (CI->getPredicate() == CmpInst::ICMP_EQ)
return ConstantInt::getFalse(Ctx);
- else
+ else
return ConstantInt::getTrue(Ctx);
}
}
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 2eb887c986be..3e47e9441d15 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -2007,7 +2007,7 @@ NewGVN::performSymbolicEvaluation(Value *V,
case Instruction::Load:
E = performSymbolicLoadEvaluation(I);
break;
- case Instruction::BitCast:
+ case Instruction::BitCast:
E = createExpression(I);
break;
case Instruction::ICmp:
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index c81ac70d99e6..1df0a9c49fb1 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1179,7 +1179,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
// and both "Res" and "ConstOpnd" remain unchanged.
bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt &ConstOpnd, Value *&Res) {
- // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
+ // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
// = ((x | c1) ^ c1) ^ (c1 ^ c2)
// = (x & ~c1) ^ (c1 ^ c2)
// It is useful only when c1 == c2.
@@ -1202,12 +1202,12 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
RedoInsts.insert(T);
return true;
}
-
+
// Helper function of OptimizeXor(). It tries to simplify
// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
-// symbolic value.
-//
-// If it was successful, true is returned, and the "R" and "C" is returned
+// symbolic value.
+//
+// If it was successful, true is returned, and the "R" and "C" is returned
// via "Res" and "ConstOpnd", respectively (If the entire expression is
// evaluated to a constant, the Res is set to NULL); otherwise, false is
// returned, and both "Res" and "ConstOpnd" remain unchanged.
@@ -1254,7 +1254,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
const APInt &C1 = Opnd1->getConstPart();
const APInt &C2 = Opnd2->getConstPart();
APInt C3 = C1 ^ C2;
-
+
// Do not increase code size
if (!C3.isNullValue() && !C3.isAllOnesValue()) {
int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
@@ -1290,7 +1290,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
SmallVectorImpl<ValueEntry> &Ops) {
if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
return V;
-
+
if (Ops.size() == 1)
return nullptr;
@@ -1365,7 +1365,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
}
// step 3.2: When previous and current operands share the same symbolic
- // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
+ // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
// Remove previous operand
PrevOpnd->Invalidate();
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 391e43f79121..0de2bc72b522 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -401,7 +401,7 @@ namespace {
/// defining value. The 'base defining value' for 'Def' is the transitive
/// closure of this relation stopping at the first instruction which has no
/// immediate base defining value. The b.d.v. might itself be a base pointer,
-/// but it can also be an arbitrary derived pointer.
+/// but it can also be an arbitrary derived pointer.
struct BaseDefiningValueResult {
/// Contains the value which is the base defining value.
Value * const BDV;
@@ -427,13 +427,13 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I);
/// Return a base defining value for the 'Index' element of the given vector
/// instruction 'I'. If Index is null, returns a BDV for the entire vector
-/// 'I'. As an optimization, this method will try to determine when the
+/// 'I'. As an optimization, this method will try to determine when the
/// element is known to already be a base pointer. If this can be established,
/// the second value in the returned pair will be true. Note that either a
/// vector or a pointer typed value can be returned. For the former, the
/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
/// If the later, the return pointer is a BDV (or possibly a base) for the
-/// particular element in 'I'.
+/// particular element in 'I'.
static BaseDefiningValueResult
findBaseDefiningValueOfVector(Value *I) {
// Each case parallels findBaseDefiningValue below, see that code for
@@ -444,7 +444,7 @@ findBaseDefiningValueOfVector(Value *I) {
return BaseDefiningValueResult(I, true);
if (isa<Constant>(I))
- // Base of constant vector consists only of constant null pointers.
+ // Base of constant vector consists only of constant null pointers.
// For reasoning see similar case inside 'findBaseDefiningValue' function.
return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()),
true);
@@ -508,11 +508,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
if (isa<Constant>(I)) {
// We assume that objects with a constant base (e.g. a global) can't move
// and don't need to be reported to the collector because they are always
- // live. Besides global references, all kinds of constants (e.g. undef,
+ // live. Besides global references, all kinds of constants (e.g. undef,
// constant expressions, null pointers) can be introduced by the inliner or
// the optimizer, especially on dynamically dead paths.
// Here we treat all of them as having single null base. By doing this we
- // trying to avoid problems reporting various conflicts in a form of
+ // trying to avoid problems reporting various conflicts in a form of
// "phi (const1, const2)" or "phi (const, regular gc ptr)".
// See constant.ll file for relevant test cases.
@@ -1285,14 +1285,14 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
return Index;
};
Module *M = StatepointToken->getModule();
-
+
// All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
// element type is i8 addrspace(1)*). We originally generated unique
// declarations for each pointer type, but this proved problematic because
// the intrinsic mangling code is incomplete and fragile. Since we're moving
// towards a single unified pointer type anyways, we can just cast everything
// to an i8* of the right address space. A bitcast is added later to convert
- // gc_relocate to the actual value's type.
+ // gc_relocate to the actual value's type.
auto getGCRelocateDecl = [&] (Type *Ty) {
assert(isHandledGCPointerType(Ty));
auto AS = Ty->getScalarType()->getPointerAddressSpace();
@@ -1413,7 +1413,7 @@ static StringRef getDeoptLowering(CallSite CS) {
}
return "live-through";
}
-
+
static void
makeStatepointExplicitImpl(const CallSite CS, /* to replace */
const SmallVectorImpl<Value *> &BasePtrs,
@@ -2570,7 +2570,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
}
// Before we start introducing relocations, we want to tweak the IR a bit to
- // avoid unfortunate code generation effects. The main example is that we
+ // avoid unfortunate code generation effects. The main example is that we
// want to try to make sure the comparison feeding a branch is after any
// safepoints. Otherwise, we end up with a comparison of pre-relocation
// values feeding a branch after relocation. This is semantically correct,
@@ -2593,7 +2593,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
TerminatorInst *TI = BB.getTerminator();
if (auto *Cond = getConditionInst(TI))
// TODO: Handle more than just ICmps here. We should be able to move
- // most instructions without side effects or memory access.
+ // most instructions without side effects or memory access.
if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
MadeChange = true;
Cond->moveBefore(TI);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 6c3f012c6280..de16b608f752 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -3730,7 +3730,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
PartPtrTy, BasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
- PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
+ PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
// Append this load onto the list of split loads so we can find it later
// to rewrite the stores.
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 34510cb40732..5834b619046b 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -459,9 +459,11 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
*ParentBB, *OldPH, FullUnswitch);
// Now we need to update the dominator tree.
- DT.insertEdge(OldPH, UnswitchedBB);
+ SmallVector<DominatorTree::UpdateType, 2> DTUpdates;
+ DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
if (FullUnswitch)
- DT.deleteEdge(ParentBB, UnswitchedBB);
+ DTUpdates.push_back({DT.Delete, ParentBB, LoopExitBB});
+ DT.applyUpdates(DTUpdates);
// The constant we can replace all of our invariants with inside the loop
// body. If any of the invariants have a value other than this the loop won't
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 5f5c4150d3bb..d0396e6ce47d 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -911,7 +911,7 @@ static void appendTypeSuffix(Value *Op, StringRef &Name,
NameBuffer += 'l';
Name = NameBuffer;
- }
+ }
}
Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp
index 4d9c22e57a68..6d18d0614611 100644
--- a/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -392,7 +392,7 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
auto CalleeType = Callee->getFunctionType();
auto CalleeParamNum = CalleeType->getNumParams();
for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) {
- auto *Arg = CS.getArgument(ArgNo);
+ auto *Arg = CS.getArgument(ArgNo);
Type *FormalTy = CalleeType->getParamType(ArgNo);
Type *ActualTy = Arg->getType();
if (FormalTy != ActualTy) {
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 61448e9acb57..807360340055 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -290,7 +290,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// Have we already cloned this block?
if (BBEntry) return;
-
+
// Nope, clone it now.
BasicBlock *NewBB;
BBEntry = NewBB = BasicBlock::Create(BB->getContext());
@@ -363,7 +363,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
hasDynamicAllocas = true;
}
}
-
+
// Finally, clone over the terminator.
const TerminatorInst *OldTI = BB->getTerminator();
bool TerminatorDone = false;
@@ -400,7 +400,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
TerminatorDone = true;
}
}
-
+
if (!TerminatorDone) {
Instruction *NewInst = OldTI->clone();
if (OldTI->hasName())
@@ -418,11 +418,11 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
for (const BasicBlock *Succ : TI->successors())
ToClone.push_back(Succ);
}
-
+
if (CodeInfo) {
CodeInfo->ContainsCalls |= hasCalls;
CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
- CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
BB != &BB->getParent()->front();
}
}
@@ -468,7 +468,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
CloneWorklist.pop_back();
PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
}
-
+
// Loop over all of the basic blocks in the old function. If the block was
// reachable, we have cloned it and the old block is now in the value map:
// insert it into the new function in the right order. If not, ignore it.
@@ -500,7 +500,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
TypeMapper, Materializer);
}
-
+
// Defer PHI resolution until rest of function is resolved, PHI resolution
// requires the CFG to be up-to-date.
for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
@@ -519,7 +519,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
Value *V = VMap.lookup(PN->getIncomingBlock(pred));
if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
Value *InVal = MapValue(PN->getIncomingValue(pred),
- VMap,
+ VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
assert(InVal && "Unknown input value?");
PN->setIncomingValue(pred, InVal);
@@ -529,9 +529,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
--pred; // Revisit the next entry.
--e;
}
- }
+ }
}
-
+
// The loop above has removed PHI entries for those blocks that are dead
// and has updated others. However, if a block is live (i.e. copied over)
// but its terminator has been changed to not go to this block, then our
@@ -546,11 +546,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
PI != E; ++PI)
--PredCount[*PI];
-
+
// Figure out how many entries to remove from each PHI.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
++PredCount[PN->getIncomingBlock(i)];
-
+
// At this point, the excess predecessor entries are positive in the
// map. Loop over all of the PHIs and remove excess predecessor
// entries.
@@ -563,7 +563,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
}
}
}
-
+
// If the loops above have made these phi nodes have 0 or 1 operand,
// replace them with undef or the input value. We must do this for
// correctness, because 0-operand phis are not valid.
@@ -655,7 +655,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
if (!BI || BI->isConditional()) { ++I; continue; }
-
+
BasicBlock *Dest = BI->getSuccessor(0);
if (!Dest->getSinglePredecessor()) {
++I; continue;
@@ -668,16 +668,16 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// We know all single-entry PHI nodes in the inlined function have been
// removed, so we just need to splice the blocks.
BI->eraseFromParent();
-
+
// Make all PHI nodes that referred to Dest now refer to I as their source.
Dest->replaceAllUsesWith(&*I);
// Move all the instructions in the succ to the pred.
I->getInstList().splice(I->end(), Dest->getInstList());
-
+
// Remove the dest block.
Dest->eraseFromParent();
-
+
// Do not increment I, iteratively merge all things this block branches to.
}
@@ -703,7 +703,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
- const char *NameSuffix,
+ const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
Instruction *TheCall) {
CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
@@ -730,7 +730,7 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
const Twine &NameSuffix, LoopInfo *LI,
DominatorTree *DT,
SmallVectorImpl<BasicBlock *> &Blocks) {
- assert(OrigLoop->getSubLoops().empty() &&
+ assert(OrigLoop->getSubLoops().empty() &&
"Loop to be cloned cannot have inner loop");
Function *F = OrigLoop->getHeader()->getParent();
Loop *ParentLoop = OrigLoop->getParentLoop();
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 35c7511a24b9..c7d68bab8170 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -61,7 +61,7 @@ std::unique_ptr<Module> llvm::CloneModule(
//
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
- GlobalVariable *GV = new GlobalVariable(*New,
+ GlobalVariable *GV = new GlobalVariable(*New,
I->getValueType(),
I->isConstant(), I->getLinkage(),
(Constant*) nullptr, I->getName(),
@@ -110,7 +110,7 @@ std::unique_ptr<Module> llvm::CloneModule(
GA->copyAttributesFrom(&*I);
VMap[&*I] = GA;
}
-
+
// Now that all of the things that global variable initializer can refer to
// have been created, loop through and copy the global variable referrers
// over... We also set the attributes on the global now.
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index f31dab9f96af..cb349e34606c 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1020,7 +1020,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
} else {
// Otherwise we must have code extracted an unwind or something, just
// return whatever we want.
- ReturnInst::Create(Context,
+ ReturnInst::Create(Context,
Constant::getNullValue(OldFnRetTy), TheSwitch);
}
@@ -1158,13 +1158,13 @@ Function *CodeExtractor::extractCodeRegion() {
splitReturnBlocks();
// This takes place of the original loop
- BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
+ BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
"codeRepl", oldFunction,
header);
// The new function needs a root node because other nodes can branch to the
// head of the region, but the entry node of a function cannot have preds.
- BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
+ BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
"newFuncRoot");
auto *BranchI = BranchInst::Create(header);
// If the original function has debug info, we have to add a debug location
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 0315aac1cf84..ddc6e07e2f59 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1199,7 +1199,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
// Only copy the edge if the call was inlined!
if (VMI == VMap.end() || VMI->second == nullptr)
continue;
-
+
// If the call was inlined, but then constant folded, there is no edge to
// add. Check for this case.
Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
@@ -1211,7 +1211,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
CallSite CS = CallSite(NewCall);
if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic())
continue;
-
+
// Remember that this call site got inlined for the client of
// InlineFunction.
IFI.InlinedCalls.push_back(NewCall);
@@ -1231,7 +1231,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
CallerNode->addCalledFunction(CallSite(NewCall), I->second);
}
-
+
// Update the call graph by deleting the edge from Callee to Caller. We must
// do this after the loop above in case Caller and Callee are the same.
CallerNode->removeCallEdgeFor(CS);
@@ -1380,7 +1380,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
if (CalleeHasDebugInfo)
continue;
-
+
// If the inlined instruction has no line number, make it look as if it
// originates from the call location. This is important for
// ((__always_inline__, __nodebug__)) functions which must use caller
@@ -1777,7 +1777,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
E = FirstNewBlock->end(); I != E; ) {
AllocaInst *AI = dyn_cast<AllocaInst>(I++);
if (!AI) continue;
-
+
// If the alloca is now dead, remove it. This often occurs due to code
// specialization.
if (AI->use_empty()) {
@@ -1787,10 +1787,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (!allocaWouldBeStaticInEntry(AI))
continue;
-
+
// Keep track of the static allocas that we inline into the caller.
IFI.StaticAllocas.push_back(AI);
-
+
// Scan for the block of allocas that we can move over, and move them
// all at once.
while (isa<AllocaInst>(I) &&
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 3fbb3487884b..4a359b99bebd 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -476,10 +476,10 @@ bool llvm::expandDivision(BinaryOperator *Div) {
return true;
}
-/// Generate code to compute the remainder of two integers of bitwidth up to
+/// Generate code to compute the remainder of two integers of bitwidth up to
/// 32 bits. Uses the above routines and extends the inputs/truncates the
/// outputs to operate in 32 bits; that is, these routines are good for targets
-/// that have no or very little suppport for smaller than 32 bit integer
+/// that have no or very little suppport for smaller than 32 bit integer
/// arithmetic.
///
/// Replace Rem with emulation code.
@@ -527,7 +527,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
return expandRemainder(cast<BinaryOperator>(ExtRem));
}
-/// Generate code to compute the remainder of two integers of bitwidth up to
+/// Generate code to compute the remainder of two integers of bitwidth up to
/// 64 bits. Uses the above routines and extends the inputs/truncates the
/// outputs to operate in 64 bits.
///
@@ -613,7 +613,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
} else {
ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty);
ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty);
- ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
}
Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
@@ -662,7 +662,7 @@ bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
} else {
ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty);
ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty);
- ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
}
Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 956d0387c7a8..a1f8e7484bcf 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -10,7 +10,7 @@
// This pass transforms loops by placing phi nodes at the end of the loops for
// all values that are live across the loop boundary. For example, it turns
// the left into the right code:
-//
+//
// for (...) for (...)
// if (c) if (c)
// X1 = ... X1 = ...
@@ -21,8 +21,8 @@
// ... = X4 + 4
//
// This is still valid LLVM; the extra phi nodes are purely redundant, and will
-// be trivially eliminated by InstCombine. The major benefit of this
-// transformation is that it makes many other loop optimizations, such as
+// be trivially eliminated by InstCombine. The major benefit of this
+// transformation is that it makes many other loop optimizations, such as
// LoopUnswitching, simpler.
//
//===----------------------------------------------------------------------===//
@@ -144,7 +144,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
I->getName() + ".lcssa", &ExitBB->front());
-
+ // Get the debug location from the original instruction.
+ PN->setDebugLoc(I->getDebugLoc());
// Add inputs from inside the loop for this PHI.
for (BasicBlock *Pred : PredCache.get(ExitBB)) {
PN->addIncoming(I, Pred);
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 13794c53f24b..78afe748e596 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -344,7 +344,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
/// Update the branch weights of the latch of a peeled-off loop
/// iteration.
/// This sets the branch weights for the latch of the recently peeled off loop
-/// iteration correctly.
+/// iteration correctly.
/// Our goal is to make sure that:
/// a) The total weight of all the copies of the loop body is preserved.
/// b) The total weight of the loop exit is preserved.
@@ -544,7 +544,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
//
// Each following iteration will split the current bottom anchor in two,
// and put the new copy of the loop body between these two blocks. That is,
- // after peeling another iteration from the example above, we'll split
+ // after peeling another iteration from the example above, we'll split
// InsertBot, and get:
//
// InsertTop:
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 323f2552ca80..88d595ee02ab 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -68,7 +68,7 @@ namespace {
PRNG prng;
};
-
+
struct MetaRenamer : public ModulePass {
// Pass identification, replacement for typeid
static char ID;
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index ca184ed7c4e3..4a1fd8d571aa 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -201,13 +201,13 @@ void SSAUpdater::RewriteUse(Use &U) {
void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
Instruction *User = cast<Instruction>(U.getUser());
-
+
Value *V;
if (PHINode *UserPN = dyn_cast<PHINode>(User))
V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
else
V = GetValueAtEndOfBlock(User->getParent());
-
+
U.set(V);
}
@@ -235,7 +235,7 @@ public:
PHI_iterator(PHINode *P, bool) // end iterator
: PHI(P), idx(PHI->getNumIncomingValues()) {}
- PHI_iterator &operator++() { ++idx; return *this; }
+ PHI_iterator &operator++() { ++idx; return *this; }
bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
@@ -333,7 +333,7 @@ LoadAndStorePromoter::
LoadAndStorePromoter(ArrayRef<const Instruction *> Insts,
SSAUpdater &S, StringRef BaseName) : SSA(S) {
if (Insts.empty()) return;
-
+
const Value *SomeVal;
if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
SomeVal = LI;
@@ -354,7 +354,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
for (Instruction *User : Insts)
UsesByBlock[User->getParent()].push_back(User);
-
+
// Okay, now we can iterate over all the blocks in the function with uses,
// processing them. Keep track of which loads are loading a live-in value.
// Walk the uses in the use-list order to be determinstic.
@@ -364,10 +364,10 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
for (Instruction *User : Insts) {
BasicBlock *BB = User->getParent();
TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB];
-
+
// If this block has already been processed, ignore this repeat use.
if (BlockUses.empty()) continue;
-
+
// Okay, this is the first use in the block. If this block just has a
// single user in it, we can rewrite it trivially.
if (BlockUses.size() == 1) {
@@ -375,13 +375,13 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
updateDebugInfo(SI);
SSA.AddAvailableValue(BB, SI->getOperand(0));
- } else
+ } else
// Otherwise it is a load, queue it to rewrite as a live-in load.
LiveInLoads.push_back(cast<LoadInst>(User));
BlockUses.clear();
continue;
}
-
+
// Otherwise, check to see if this block is all loads.
bool HasStore = false;
for (Instruction *I : BlockUses) {
@@ -390,7 +390,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
break;
}
}
-
+
// If so, we can queue them all as live in loads. We don't have an
// efficient way to tell which on is first in the block and don't want to
// scan large blocks, so just add all loads as live ins.
@@ -400,7 +400,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
BlockUses.clear();
continue;
}
-
+
// Otherwise, we have mixed loads and stores (or just a bunch of stores).
// Since SSAUpdater is purely for cross-block values, we need to determine
// the order of these instructions in the block. If the first use in the
@@ -411,7 +411,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
// If this is a load from an unrelated pointer, ignore it.
if (!isInstInList(L, Insts)) continue;
-
+
// If we haven't seen a store yet, this is a live in use, otherwise
// use the stored value.
if (StoredValue) {
@@ -433,13 +433,13 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
StoredValue = SI->getOperand(0);
}
}
-
+
// The last stored value that happened is the live-out for the block.
assert(StoredValue && "Already checked that there is a store in block");
SSA.AddAvailableValue(BB, StoredValue);
BlockUses.clear();
}
-
+
// Okay, now we rewrite all loads that use live-in values in the loop,
// inserting PHI nodes as necessary.
for (LoadInst *ALoad : LiveInLoads) {
@@ -451,10 +451,10 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
ALoad->replaceAllUsesWith(NewVal);
ReplacedLoads[ALoad] = NewVal;
}
-
+
// Allow the client to do stuff before we start nuking things.
doExtraRewritesBeforeFinalDeletion();
-
+
// Now that everything is rewritten, delete the old instructions from the
// function. They should all be dead now.
for (Instruction *User : Insts) {
@@ -465,7 +465,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
if (!User->use_empty()) {
Value *NewVal = ReplacedLoads[User];
assert(NewVal && "not a replaced load?");
-
+
// Propagate down to the ultimate replacee. The intermediately loads
// could theoretically already have been deleted, so we don't want to
// dereference the Value*'s.
@@ -474,11 +474,11 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
NewVal = RLI->second;
RLI = ReplacedLoads.find(NewVal);
}
-
+
replaceLoadWithValue(cast<LoadInst>(User), NewVal);
User->replaceAllUsesWith(NewVal);
}
-
+
instructionDeleted(User);
User->eraseFromParent();
}
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index e381fbc34ab4..65b23f4d94a1 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -196,7 +196,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
SmallDenseMap<const SCEV*, Value*> CheapExpansions;
CheapExpansions[S] = ICmp->getOperand(IVOperIdx);
CheapExpansions[X] = ICmp->getOperand(1 - IVOperIdx);
-
+
// TODO: Support multiple entry loops? (We currently bail out of these in
// the IndVarSimplify pass)
if (auto *BB = L->getLoopPredecessor()) {
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 8c48597fc2e4..15e035874002 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -890,7 +890,7 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
return nullptr;
// Replace the malloc with a calloc. We need the data layout to know what the
- // actual size of a 'size_t' parameter is.
+ // actual size of a 'size_t' parameter is.
B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
const DataLayout &DL = Malloc->getModule()->getDataLayout();
IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
@@ -970,7 +970,7 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
Value *V = valueHasFloatPrecision(CI->getArgOperand(0));
if (V == nullptr)
return nullptr;
-
+
// If call isn't an intrinsic, check that it isn't within a function with the
// same name as the float version of this call.
//
@@ -1126,165 +1126,164 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
if (!Pow->isFast())
return nullptr;
- const APFloat *Arg1C;
- if (!match(Pow->getArgOperand(1), m_APFloat(Arg1C)))
- return nullptr;
- if (!Arg1C->isExactlyValue(0.5) && !Arg1C->isExactlyValue(-0.5))
+ Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+ Type *Ty = Pow->getType();
+
+ const APFloat *ExpoF;
+ if (!match(Expo, m_APFloat(ExpoF)) ||
+ (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
return nullptr;
- // Fast-math flags from the pow() are propagated to all replacement ops.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(Pow->getFastMathFlags());
- Type *Ty = Pow->getType();
- Value *Sqrt;
+ // If errno is never set, then use the intrinsic for sqrt().
if (Pow->hasFnAttr(Attribute::ReadNone)) {
- // We know that errno is never set, so replace with an intrinsic:
- // pow(x, 0.5) --> llvm.sqrt(x)
- // llvm.pow(x, 0.5) --> llvm.sqrt(x)
- auto *F = Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::sqrt, Ty);
- Sqrt = B.CreateCall(F, Pow->getArgOperand(0));
- } else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf,
- LibFunc_sqrtl)) {
- // Errno could be set, so we must use a sqrt libcall.
- // TODO: We also should check that the target can in fact lower the sqrt
- // libcall. We currently have no way to ask this question, so we ask
- // whether the target has a sqrt libcall which is not exactly the same.
- Sqrt = emitUnaryFloatFnCall(Pow->getArgOperand(0),
- TLI->getName(LibFunc_sqrt), B,
+ Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(),
+ Intrinsic::sqrt, Ty);
+ Sqrt = B.CreateCall(SqrtFn, Base);
+ }
+ // Otherwise, use the libcall for sqrt().
+ else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl))
+ // TODO: We also should check that the target can in fact lower the sqrt()
+ // libcall. We currently have no way to ask this question, so we ask if
+ // the target has a sqrt() libcall, which is not exactly the same.
+ Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), B,
Pow->getCalledFunction()->getAttributes());
- } else {
- // We can't replace with an intrinsic or a libcall.
+ else
return nullptr;
- }
- // If this is pow(x, -0.5), get the reciprocal.
- if (Arg1C->isExactlyValue(-0.5))
- Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt);
+ // If the exponent is negative, then get the reciprocal.
+ if (ExpoF->isNegative())
+ Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
return Sqrt;
}
-Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- Value *Ret = nullptr;
+Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
+ Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+ Function *Callee = Pow->getCalledFunction();
+ AttributeList Attrs = Callee->getAttributes();
StringRef Name = Callee->getName();
- if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
+ Module *Module = Pow->getModule();
+ Type *Ty = Pow->getType();
+ Value *Shrunk = nullptr;
+ bool Ignored;
+
+ if (UnsafeFPShrink &&
+ Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
+ Shrunk = optimizeUnaryDoubleFP(Pow, B, true);
+
+ // Propagate the math semantics from the call to any created instructions.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(Pow->getFastMathFlags());
- Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+ // Evaluate special cases related to the base.
// pow(1.0, x) -> 1.0
- if (match(Op1, m_SpecificFP(1.0)))
- return Op1;
- // pow(2.0, x) -> llvm.exp2(x)
- if (match(Op1, m_SpecificFP(2.0))) {
- Value *Exp2 = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::exp2,
- CI->getType());
- return B.CreateCall(Exp2, Op2, "exp2");
- }
-
- // There's no llvm.exp10 intrinsic yet, but, maybe, some day there will
- // be one.
- if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
- // pow(10.0, x) -> exp10(x)
- if (Op1C->isExactlyValue(10.0) &&
- hasUnaryFloatFn(TLI, Op1->getType(), LibFunc_exp10, LibFunc_exp10f,
- LibFunc_exp10l))
- return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc_exp10), B,
- Callee->getAttributes());
+ if (match(Base, m_SpecificFP(1.0)))
+ return Base;
+
+ // pow(2.0, x) -> exp2(x)
+ if (match(Base, m_SpecificFP(2.0))) {
+ Value *Exp2 = Intrinsic::getDeclaration(Module, Intrinsic::exp2, Ty);
+ return B.CreateCall(Exp2, Expo, "exp2");
}
+ // pow(10.0, x) -> exp10(x)
+ if (ConstantFP *BaseC = dyn_cast<ConstantFP>(Base))
+ // There's no exp10 intrinsic yet, but, maybe, some day there shall be one.
+ if (BaseC->isExactlyValue(10.0) &&
+ hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
+ return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs);
+
// pow(exp(x), y) -> exp(x * y)
// pow(exp2(x), y) -> exp2(x * y)
// We enable these only with fast-math. Besides rounding differences, the
// transformation changes overflow and underflow behavior quite dramatically.
// Example: x = 1000, y = 0.001.
// pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
- auto *OpC = dyn_cast<CallInst>(Op1);
- if (OpC && OpC->isFast() && CI->isFast()) {
- LibFunc Func;
- Function *OpCCallee = OpC->getCalledFunction();
- if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
- TLI->has(Func) && (Func == LibFunc_exp || Func == LibFunc_exp2)) {
+ auto *BaseFn = dyn_cast<CallInst>(Base);
+ if (BaseFn && BaseFn->isFast() && Pow->isFast()) {
+ LibFunc LibFn;
+ Function *CalleeFn = BaseFn->getCalledFunction();
+ if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
+ (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) {
IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
- Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul");
- return emitUnaryFloatFnCall(FMul, OpCCallee->getName(), B,
- OpCCallee->getAttributes());
+ B.setFastMathFlags(Pow->getFastMathFlags());
+
+ Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
+ return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B,
+ CalleeFn->getAttributes());
}
}
- if (Value *Sqrt = replacePowWithSqrt(CI, B))
+ // Evaluate special cases related to the exponent.
+
+ if (Value *Sqrt = replacePowWithSqrt(Pow, B))
return Sqrt;
- ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
- if (!Op2C)
- return Ret;
+ ConstantFP *ExpoC = dyn_cast<ConstantFP>(Expo);
+ if (!ExpoC)
+ return Shrunk;
- if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
- return ConstantFP::get(CI->getType(), 1.0);
+ // pow(x, -1.0) -> 1.0 / x
+ if (ExpoC->isExactlyValue(-1.0))
+ return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
- // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
- if (Op2C->isExactlyValue(0.5) &&
- hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
- LibFunc_sqrtl)) {
- // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
- // This is faster than calling pow, and still handles negative zero
- // and negative infinity correctly.
- // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
- Value *Inf = ConstantFP::getInfinity(CI->getType());
- Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+ // pow(x, 0.0) -> 1.0
+ if (ExpoC->getValueAPF().isZero())
+ return ConstantFP::get(Ty, 1.0);
- // TODO: As above, we should lower to the sqrt intrinsic if the pow is an
- // intrinsic, to match errno semantics.
- Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes());
+ // pow(x, 1.0) -> x
+ if (ExpoC->isExactlyValue(1.0))
+ return Base;
- Module *M = Callee->getParent();
- Function *FabsF = Intrinsic::getDeclaration(M, Intrinsic::fabs,
- CI->getType());
- Value *FAbs = B.CreateCall(FabsF, Sqrt);
+ // pow(x, 2.0) -> x * x
+ if (ExpoC->isExactlyValue(2.0))
+ return B.CreateFMul(Base, Base, "square");
- Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
- Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
- return Sel;
+ // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
+ if (ExpoC->isExactlyValue(0.5) &&
+ hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) {
+ // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+ // This is faster than calling pow(), and still handles -0.0 and
+ // negative infinity correctly.
+ // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+ Value *PosInf = ConstantFP::getInfinity(Ty);
+ Value *NegInf = ConstantFP::getInfinity(Ty, true);
+
+ // TODO: As above, we should lower to the sqrt() intrinsic if the pow() is
+ // an intrinsic, to match errno semantics.
+ Value *Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt),
+ B, Attrs);
+ Function *FAbsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty);
+ Value *FAbs = B.CreateCall(FAbsFn, Sqrt, "abs");
+ Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
+ Sqrt = B.CreateSelect(FCmp, PosInf, FAbs);
+ return Sqrt;
}
- // Propagate fast-math-flags from the call to any created instructions.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
- // pow(x, 1.0) --> x
- if (Op2C->isExactlyValue(1.0))
- return Op1;
- // pow(x, 2.0) --> x * x
- if (Op2C->isExactlyValue(2.0))
- return B.CreateFMul(Op1, Op1, "pow2");
- // pow(x, -1.0) --> 1.0 / x
- if (Op2C->isExactlyValue(-1.0))
- return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
-
- // In -ffast-math, generate repeated fmul instead of generating pow(x, n).
- if (CI->isFast()) {
- APFloat V = abs(Op2C->getValueAPF());
- // We limit to a max of 7 fmul(s). Thus max exponent is 32.
+ // pow(x, n) -> x * x * x * ....
+ if (Pow->isFast()) {
+ APFloat ExpoA = abs(ExpoC->getValueAPF());
+ // We limit to a max of 7 fmul(s). Thus the maximum exponent is 32.
// This transformation applies to integer exponents only.
- if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan ||
- !V.isInteger())
+ if (!ExpoA.isInteger() ||
+ ExpoA.compare
+ (APFloat(ExpoA.getSemantics(), 32.0)) == APFloat::cmpGreaterThan)
return nullptr;
// We will memoize intermediate products of the Addition Chain.
Value *InnerChain[33] = {nullptr};
- InnerChain[1] = Op1;
- InnerChain[2] = B.CreateFMul(Op1, Op1);
+ InnerChain[1] = Base;
+ InnerChain[2] = B.CreateFMul(Base, Base, "square");
// We cannot readily convert a non-double type (like float) to a double.
- // So we first convert V to something which could be converted to double.
- bool Ignored;
- V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
-
- Value *FMul = getPow(InnerChain, V.convertToDouble(), B);
- // For negative exponents simply compute the reciprocal.
- if (Op2C->isNegative())
- FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul);
+ // So we first convert it to something which could be converted to double.
+ ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
+ Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
+
+ // If the exponent is negative, then get the reciprocal.
+ if (ExpoC->isNegative())
+ FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
return FMul;
}
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index 3640541e63cc..fd0da79487f1 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -536,7 +536,7 @@ private:
char RewriteSymbolsLegacyPass::ID = 0;
RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) {
- initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());
+ initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());
}
RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass(
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index e633ac0c874d..d49b26472548 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -61,7 +61,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
} else if (UnreachableBlocks.size() == 1) {
UnreachableBlock = UnreachableBlocks.front();
} else {
- UnreachableBlock = BasicBlock::Create(F.getContext(),
+ UnreachableBlock = BasicBlock::Create(F.getContext(),
"UnifiedUnreachableBlock", &F);
new UnreachableInst(F.getContext(), UnreachableBlock);
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3c693f5d5ee0..859d0c92ca5a 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -535,13 +535,13 @@ protected:
/// Returns true if we should generate a scalar version of \p IV.
bool needsScalarInduction(Instruction *IV) const;
- /// If there is a cast involved in the induction variable \p ID, which should
- /// be ignored in the vectorized loop body, this function records the
- /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the
- /// cast. We had already proved that the casted Phi is equal to the uncasted
- /// Phi in the vectorized loop (under a runtime guard), and therefore
- /// there is no need to vectorize the cast - the same value can be used in the
- /// vector loop for both the Phi and the cast.
+ /// If there is a cast involved in the induction variable \p ID, which should
+ /// be ignored in the vectorized loop body, this function records the
+ /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the
+ /// cast. We had already proved that the casted Phi is equal to the uncasted
+ /// Phi in the vectorized loop (under a runtime guard), and therefore
+ /// there is no need to vectorize the cast - the same value can be used in the
+ /// vector loop for both the Phi and the cast.
/// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,
/// Otherwise, \p VectorLoopValue is a widened/vectorized value.
///
@@ -5443,7 +5443,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){
// high enough value to practically disable vectorization with such
// operations, except where previously deployed legality hack allowed
// using very low cost values. This is to avoid regressions coming simply
- // from moving "masked load/store" check from legality to cost model.
+ // from moving "masked load/store" check from legality to cost model.
// Masked Load/Gather emulation was previously never allowed.
// Limited number of Masked Store/Scatter emulation was allowed.
assert(isScalarWithPredication(I) &&
@@ -6412,12 +6412,12 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
}))
DeadInstructions.insert(IndUpdate);
- // We record as "Dead" also the type-casting instructions we had identified
+ // We record as "Dead" also the type-casting instructions we had identified
// during induction analysis. We don't need any handling for them in the
- // vectorized loop because we have proven that, under a proper runtime
- // test guarding the vectorized loop, the value of the phi, and the casted
+ // vectorized loop because we have proven that, under a proper runtime
+ // test guarding the vectorized loop, the value of the phi, and the casted
// value of the phi, are the same. The last instruction in this casting chain
- // will get its scalar/vector/widened def from the scalar/vector/widened def
+ // will get its scalar/vector/widened def from the scalar/vector/widened def
// of the respective phi node. Any other casts in the induction def-use chain
// have no other uses outside the phi update chain, and will be ignored.
InductionDescriptor &IndDes = Induction.second;
@@ -7060,8 +7060,8 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
auto Plan = llvm::make_unique<VPlan>();
// Build hierarchical CFG
- VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI);
- HCFGBuilder.buildHierarchicalCFG(*Plan.get());
+ VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
+ HCFGBuilder.buildHierarchicalCFG();
return Plan;
}
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ac8c4f046c6f..5c2efe885e22 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -345,7 +345,7 @@ static Value *isOneOf(const InstructionsState &S, Value *Op) {
}
/// \returns analysis of the Instructions in \p VL described in
-/// InstructionsState, the Opcode that we suppose the whole list
+/// InstructionsState, the Opcode that we suppose the whole list
/// could be vectorized even if its structure is diverse.
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
unsigned BaseIndex = 0) {
@@ -3111,6 +3111,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// TODO: Merge this shuffle with the ReorderShuffleMask.
if (!E->ReorderIndices.empty())
Builder.SetInsertPoint(VL0);
+ else if (auto *I = dyn_cast<Instruction>(V))
+ Builder.SetInsertPoint(I->getParent(),
+ std::next(I->getIterator()));
+ else
+ Builder.SetInsertPoint(&F->getEntryBlock(),
+ F->getEntryBlock().getFirstInsertionPt());
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}
diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp
index f7b07b722bb1..0780e70809d0 100644
--- a/lib/Transforms/Vectorize/VPlan.cpp
+++ b/lib/Transforms/Vectorize/VPlan.cpp
@@ -18,6 +18,7 @@
//===----------------------------------------------------------------------===//
#include "VPlan.h"
+#include "VPlanDominatorTree.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -25,7 +26,6 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -34,6 +34,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -576,3 +577,5 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
}
O << "\\l\"";
}
+
+template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h
index 866951cb79a4..883e6f52369a 100644
--- a/lib/Transforms/Vectorize/VPlan.h
+++ b/lib/Transforms/Vectorize/VPlan.h
@@ -26,8 +26,10 @@
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
+#include "VPlanLoopInfo.h"
#include "VPlanValue.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -51,7 +53,6 @@ class BasicBlock;
class DominatorTree;
class InnerLoopVectorizer;
class InterleaveGroup;
-class LoopInfo;
class raw_ostream;
class Value;
class VPBasicBlock;
@@ -516,6 +517,23 @@ public:
/// Delete all blocks reachable from a given VPBlockBase, inclusive.
static void deleteCFG(VPBlockBase *Entry);
+
+ void printAsOperand(raw_ostream &OS, bool PrintType) const {
+ OS << getName();
+ }
+
+ void print(raw_ostream &OS) const {
+ // TODO: Only printing VPBB name for now since we only have dot printing
+ // support for VPInstructions/Recipes.
+ printAsOperand(OS, false);
+ }
+
+ /// Return true if it is legal to hoist instructions into this block.
+ bool isLegalToHoistInto() {
+ // There are currently no constraints that prevent an instruction to be
+ // hoisted into a VPBlockBase.
+ return true;
+ }
};
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
@@ -1037,6 +1055,12 @@ public:
EntryBlock->setParent(this);
}
+ // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a
+ // specific interface of llvm::Function, instead of using
+ // GraphTraints::getEntryNode. We should add a new template parameter to
+ // DominatorTreeBase representing the Graph type.
+ VPBlockBase &front() const { return *Entry; }
+
const VPBlockBase *getExit() const { return Exit; }
VPBlockBase *getExit() { return Exit; }
@@ -1087,6 +1111,9 @@ private:
/// VPlan.
Value2VPValueTy Value2VPValue;
+ /// Holds the VPLoopInfo analysis for this VPlan.
+ VPLoopInfo VPLInfo;
+
public:
VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {}
@@ -1133,6 +1160,10 @@ public:
return Value2VPValue[V];
}
+ /// Return the VPLoopInfo analysis for this VPlan.
+ VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
+ const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
+
private:
/// Add to the given dominator tree the header block and every new basic block
/// that was created between it and the latch block, inclusive.
@@ -1210,12 +1241,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) {
return OS;
}
-//===--------------------------------------------------------------------===//
-// GraphTraits specializations for VPlan/VPRegionBlock Control-Flow Graphs //
-//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs //
+//===----------------------------------------------------------------------===//
-// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a
-// graph of VPBlockBase nodes...
+// The following set of template specializations implement GraphTraits to treat
+// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
+// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
+// VPBlockBase is a VPRegionBlock, this specialization provides access to its
+// successors/predecessors but not to the blocks inside the region.
template <> struct GraphTraits<VPBlockBase *> {
using NodeRef = VPBlockBase *;
@@ -1247,17 +1281,13 @@ template <> struct GraphTraits<const VPBlockBase *> {
}
};
-// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a
-// graph of VPBlockBase nodes... and to walk it in inverse order. Inverse order
-// for a VPBlockBase is considered to be when traversing the predecessors of a
-// VPBlockBase instead of its successors.
+// Inverse order specialization for VPBasicBlocks. Predecessors are used instead
+// of successors for the inverse traversal.
template <> struct GraphTraits<Inverse<VPBlockBase *>> {
using NodeRef = VPBlockBase *;
using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
- static Inverse<VPBlockBase *> getEntryNode(Inverse<VPBlockBase *> B) {
- return B;
- }
+ static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
static inline ChildIteratorType child_begin(NodeRef N) {
return N->getPredecessors().begin();
@@ -1268,6 +1298,71 @@ template <> struct GraphTraits<Inverse<VPBlockBase *>> {
}
};
+// The following set of template specializations implement GraphTraits to
+// treat VPRegionBlock as a graph and recurse inside its nodes. It's important
+// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
+// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
+// there won't be automatic recursion into other VPBlockBases that turn to be
+// VPRegionBlocks.
+
+template <>
+struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
+ using GraphRef = VPRegionBlock *;
+ using nodes_iterator = df_iterator<NodeRef>;
+
+ static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
+
+ static nodes_iterator nodes_begin(GraphRef N) {
+ return nodes_iterator::begin(N->getEntry());
+ }
+
+ static nodes_iterator nodes_end(GraphRef N) {
+ // df_iterator::end() returns an empty iterator so the node used doesn't
+ // matter.
+ return nodes_iterator::end(N);
+ }
+};
+
+template <>
+struct GraphTraits<const VPRegionBlock *>
+ : public GraphTraits<const VPBlockBase *> {
+ using GraphRef = const VPRegionBlock *;
+ using nodes_iterator = df_iterator<NodeRef>;
+
+ static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
+
+ static nodes_iterator nodes_begin(GraphRef N) {
+ return nodes_iterator::begin(N->getEntry());
+ }
+
+ static nodes_iterator nodes_end(GraphRef N) {
+ // df_iterator::end() returns an empty iterator so the node used doesn't
+ // matter.
+ return nodes_iterator::end(N);
+ }
+};
+
+template <>
+struct GraphTraits<Inverse<VPRegionBlock *>>
+ : public GraphTraits<Inverse<VPBlockBase *>> {
+ using GraphRef = VPRegionBlock *;
+ using nodes_iterator = df_iterator<NodeRef>;
+
+ static NodeRef getEntryNode(Inverse<GraphRef> N) {
+ return N.Graph->getExit();
+ }
+
+ static nodes_iterator nodes_begin(GraphRef N) {
+ return nodes_iterator::begin(N->getExit());
+ }
+
+ static nodes_iterator nodes_end(GraphRef N) {
+ // df_iterator::end() returns an empty iterator so the node used doesn't
+ // matter.
+ return nodes_iterator::end(N);
+ }
+};
+
//===----------------------------------------------------------------------===//
// VPlan Utilities
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Vectorize/VPlanDominatorTree.h b/lib/Transforms/Vectorize/VPlanDominatorTree.h
new file mode 100644
index 000000000000..1b81097b6d31
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -0,0 +1,41 @@
+//===-- VPlanDominatorTree.h ------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements dominator tree analysis for a single level of a VPlan's
+/// H-CFG.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H
+
+#include "VPlan.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/IR/Dominators.h"
+
+namespace llvm {
+
+/// Template specialization of the standard LLVM dominator tree utility for
+/// VPBlockBases.
+using VPDominatorTree = DomTreeBase<VPBlockBase>;
+
+using VPDomTreeNode = DomTreeNodeBase<VPBlockBase>;
+
+/// Template specializations of GraphTraits for VPDomTreeNode.
+template <>
+struct GraphTraits<VPDomTreeNode *>
+ : public DomTreeGraphTraitsBase<VPDomTreeNode, VPDomTreeNode::iterator> {};
+
+template <>
+struct GraphTraits<const VPDomTreeNode *>
+ : public DomTreeGraphTraitsBase<const VPDomTreeNode,
+ VPDomTreeNode::const_iterator> {};
+} // namespace llvm
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 08129b74cddf..b6307acb9474 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -324,13 +324,28 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {
return TopRegion;
}
+VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() {
+ PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
+ return PCFGBuilder.buildPlainCFG();
+}
+
// Public interface to build a H-CFG.
-void VPlanHCFGBuilder::buildHierarchicalCFG(VPlan &Plan) {
+void VPlanHCFGBuilder::buildHierarchicalCFG() {
// Build Top Region enclosing the plain CFG and set it as VPlan entry.
- PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
- VPRegionBlock *TopRegion = PCFGBuilder.buildPlainCFG();
+ VPRegionBlock *TopRegion = buildPlainCFG();
Plan.setEntry(TopRegion);
LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
Verifier.verifyHierarchicalCFG(TopRegion);
+
+ // Compute plain CFG dom tree for VPLInfo.
+ VPDomTree.recalculate(*TopRegion);
+ LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n";
+ VPDomTree.print(dbgs()));
+
+ // Compute VPLInfo and keep it in Plan.
+ VPLoopInfo &VPLInfo = Plan.getVPLoopInfo();
+ VPLInfo.analyze(VPDomTree);
+ LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n";
+ VPLInfo.print(dbgs()));
}
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index c4e69843615a..3f11dcb5164d 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -26,14 +26,18 @@
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H
#include "VPlan.h"
+#include "VPlanDominatorTree.h"
#include "VPlanVerifier.h"
namespace llvm {
class Loop;
+class VPlanTestBase;
/// Main class to build the VPlan H-CFG for an incoming IR.
class VPlanHCFGBuilder {
+ friend VPlanTestBase;
+
private:
// The outermost loop of the input loop nest considered for vectorization.
Loop *TheLoop;
@@ -41,14 +45,27 @@ private:
// Loop Info analysis.
LoopInfo *LI;
+ // The VPlan that will contain the H-CFG we are building.
+ VPlan &Plan;
+
// VPlan verifier utility.
VPlanVerifier Verifier;
+ // Dominator analysis for VPlan plain CFG to be used in the
+ // construction of the H-CFG. This analysis is no longer valid once regions
+ // are introduced.
+ VPDominatorTree VPDomTree;
+
+ /// Build plain CFG for TheLoop. Return a new VPRegionBlock (TopRegion)
+ /// enclosing the plain CFG.
+ VPRegionBlock *buildPlainCFG();
+
public:
- VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI) : TheLoop(Lp), LI(LI) {}
+ VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
+ : TheLoop(Lp), LI(LI), Plan(P) {}
- /// Build H-CFG for TheLoop and update \p Plan accordingly.
- void buildHierarchicalCFG(VPlan &Plan);
+ /// Build H-CFG for TheLoop and update Plan accordingly.
+ void buildHierarchicalCFG();
};
} // namespace llvm
diff --git a/lib/Transforms/Vectorize/VPlanLoopInfo.h b/lib/Transforms/Vectorize/VPlanLoopInfo.h
new file mode 100644
index 000000000000..5c2485fc2145
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanLoopInfo.h
@@ -0,0 +1,45 @@
+//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines VPLoopInfo analysis and VPLoop class. VPLoopInfo is a
+/// specialization of LoopInfoBase for VPBlockBase. VPLoops is a specialization
+/// of LoopBase that is used to hold loop metadata from VPLoopInfo. Further
+/// information can be found in VectorizationPlanner.rst.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
+
+#include "llvm/Analysis/LoopInfoImpl.h"
+
+namespace llvm {
+class VPBlockBase;
+
+/// Hold analysis information for every loop detected by VPLoopInfo. It is an
+/// instantiation of LoopBase.
+class VPLoop : public LoopBase<VPBlockBase, VPLoop> {
+private:
+ friend class LoopInfoBase<VPBlockBase, VPLoop>;
+ explicit VPLoop(VPBlockBase *VPB) : LoopBase<VPBlockBase, VPLoop>(VPB) {}
+};
+
+/// VPLoopInfo provides analysis of natural loop for VPBlockBase-based
+/// Hierarchical CFG. It is a specialization of LoopInfoBase class.
+// TODO: VPLoopInfo is initially computed on top of the VPlan plain CFG, which
+// is the same as the incoming IR CFG. If it's more efficient than running the
+// whole loop detection algorithm, we may want to create a mechanism to
+// translate LoopInfo into VPLoopInfo. However, that would require significant
+// changes in LoopInfoBase class.
+typedef LoopInfoBase<VPBlockBase, VPLoop> VPLoopInfo;
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H