aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-08-02 17:32:43 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-08-02 17:32:43 +0000
commitb7eb8e35e481a74962664b63dfb09483b200209a (patch)
tree1937fb4a348458ce2d02ade03ac3bb0aa18d2fcd
parenteb11fae6d08f479c0799db45860a98af528fa6e7 (diff)
downloadsrc-b7eb8e35e481a74962664b63dfb09483b200209a.tar.gz
src-b7eb8e35e481a74962664b63dfb09483b200209a.zip
Vendor import of llvm trunk r338536:vendor/llvm/llvm-trunk-r338536
Notes
Notes: svn path=/vendor/llvm/dist/; revision=337137 svn path=/vendor/llvm/llvm-trunk-r338536/; revision=337138; tag=vendor/llvm/llvm-trunk-r338536
-rwxr-xr-xcmake/modules/AddLLVM.cmake1
-rw-r--r--docs/CommandGuide/llvm-mca.rst238
-rw-r--r--docs/GettingStarted.rst2
-rw-r--r--docs/LangRef.rst37
-rw-r--r--docs/SourceLevelDebugging.rst5
-rw-r--r--include/llvm/ADT/DenseSet.h2
-rw-r--r--include/llvm/Analysis/BasicAliasAnalysis.h12
-rw-r--r--include/llvm/Analysis/LoopAccessAnalysis.h4
-rw-r--r--include/llvm/Analysis/MemoryDependenceAnalysis.h6
-rw-r--r--include/llvm/Analysis/MustExecute.h4
-rw-r--r--include/llvm/Analysis/TargetTransformInfoImpl.h2
-rw-r--r--include/llvm/Analysis/ValueTracking.h2
-rw-r--r--include/llvm/BinaryFormat/Dwarf.def1
-rw-r--r--include/llvm/BinaryFormat/ELF.h6
-rw-r--r--include/llvm/CodeGen/GCStrategy.h6
-rw-r--r--include/llvm/CodeGen/GlobalISel/LegalizerInfo.h4
-rw-r--r--include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h10
-rw-r--r--include/llvm/CodeGen/MachORelocation.h10
-rw-r--r--include/llvm/CodeGen/MachineModuleInfo.h2
-rw-r--r--include/llvm/CodeGen/MachineOutliner.h14
-rw-r--r--include/llvm/CodeGen/ScheduleDAG.h2
-rw-r--r--include/llvm/CodeGen/StackMaps.h2
-rw-r--r--include/llvm/CodeGen/TargetLowering.h13
-rw-r--r--include/llvm/CodeGen/TargetPassConfig.h2
-rw-r--r--include/llvm/CodeGen/TargetRegisterInfo.h4
-rw-r--r--include/llvm/DebugInfo/CodeView/CodeViewSymbols.def2
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolRecord.h13
-rw-r--r--include/llvm/DebugInfo/DIContext.h2
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFContext.h4
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h2
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h98
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDie.h129
-rw-r--r--include/llvm/ExecutionEngine/Orc/RPCSerialization.h145
-rw-r--r--include/llvm/IR/Attributes.td1
-rw-r--r--include/llvm/IR/Instruction.h2
-rw-r--r--include/llvm/IR/Instructions.h2
-rw-r--r--include/llvm/IR/Intrinsics.td2
-rw-r--r--include/llvm/IR/IntrinsicsAMDGPU.td35
-rw-r--r--include/llvm/IR/IntrinsicsARM.td2
-rw-r--r--include/llvm/IR/IntrinsicsPowerPC.td36
-rw-r--r--include/llvm/IR/LegacyPassManagers.h2
-rw-r--r--include/llvm/IR/Statepoint.h2
-rw-r--r--include/llvm/IR/User.h12
-rw-r--r--include/llvm/LinkAllIR.h2
-rw-r--r--include/llvm/MC/MCDwarf.h7
-rw-r--r--include/llvm/MC/MCFragment.h16
-rw-r--r--include/llvm/MC/MCInstrAnalysis.h15
-rw-r--r--include/llvm/MC/MCParser/AsmCond.h2
-rw-r--r--include/llvm/MC/MCStreamer.h4
-rw-r--r--include/llvm/Object/MachO.h2
-rw-r--r--include/llvm/PassAnalysisSupport.h2
-rw-r--r--include/llvm/PassRegistry.h2
-rw-r--r--include/llvm/ProfileData/Coverage/CoverageMapping.h2
-rw-r--r--include/llvm/Support/ARMBuildAttributes.h2
-rw-r--r--include/llvm/Support/DataExtractor.h2
-rw-r--r--include/llvm/Support/GenericDomTree.h18
-rw-r--r--include/llvm/Support/MemoryBuffer.h4
-rw-r--r--include/llvm/Support/SmallVectorMemoryBuffer.h4
-rw-r--r--include/llvm/Support/TargetOpcodes.def5
-rw-r--r--include/llvm/Support/xxhash.h2
-rw-r--r--include/llvm/Target/GenericOpcodes.td7
-rw-r--r--include/llvm/Target/TargetCallingConv.td4
-rw-r--r--include/llvm/Target/TargetInstrPredicate.td4
-rw-r--r--include/llvm/Transforms/Scalar/SpeculativeExecution.h2
-rw-r--r--include/llvm/Transforms/Utils/CodeExtractor.h2
-rw-r--r--include/llvm/Transforms/Utils/FunctionComparator.h2
-rw-r--r--include/llvm/Transforms/Utils/SymbolRewriter.h2
-rw-r--r--lib/Analysis/AliasSetTracker.cpp16
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp92
-rw-r--r--lib/Analysis/CFGPrinter.cpp2
-rw-r--r--lib/Analysis/CallGraph.cpp2
-rw-r--r--lib/Analysis/CallGraphSCCPass.cpp98
-rw-r--r--lib/Analysis/DemandedBits.cpp4
-rw-r--r--lib/Analysis/GlobalsModRef.cpp12
-rw-r--r--lib/Analysis/InstructionSimplify.cpp325
-rw-r--r--lib/Analysis/LazyValueInfo.cpp2
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp52
-rw-r--r--lib/Analysis/MemDepPrinter.cpp2
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp17
-rw-r--r--lib/Analysis/MustExecute.cpp6
-rw-r--r--lib/Analysis/ScalarEvolution.cpp10
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp18
-rw-r--r--lib/Analysis/ValueTracking.cpp12
-rw-r--r--lib/AsmParser/LLParser.cpp4
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp2
-rw-r--r--lib/CodeGen/AntiDepBreaker.h2
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.cpp18
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.h3
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp15
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h3
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp4
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp16
-rw-r--r--lib/CodeGen/BuiltinGCs.cpp2
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp2
-rw-r--r--lib/CodeGen/GCMetadata.cpp8
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp15
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp9
-rw-r--r--lib/CodeGen/GlobalMerge.cpp2
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp28
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp4
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp4
-rw-r--r--lib/CodeGen/MachineOutliner.cpp47
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp2
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp4
-rw-r--r--lib/CodeGen/MachineSink.cpp2
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp4
-rw-r--r--lib/CodeGen/MachineVerifier.cpp4
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp231
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp14
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp246
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h10
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp39
-rw-r--r--lib/CodeGen/ShadowStackGCLowering.cpp4
-rw-r--r--lib/CodeGen/SplitKit.h2
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp13
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp67
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp2
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp2
-rw-r--r--lib/DebugInfo/CodeView/RecordName.cpp3
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp6
-rw-r--r--lib/DebugInfo/CodeView/SymbolRecordMapping.cpp8
-rw-r--r--lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp3
-rw-r--r--lib/DebugInfo/CodeView/TypeStreamMerger.cpp5
-rw-r--r--lib/DebugInfo/DWARF/CMakeLists.txt1
-rw-r--r--lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp2
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp65
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugAddr.cpp198
-rw-r--r--lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp2
-rw-r--r--lib/Demangle/ItaniumDemangle.cpp8
-rw-r--r--lib/Demangle/MicrosoftDemangle.cpp1048
-rw-r--r--lib/Demangle/StringView.h24
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp16
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h6
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/jitprofiling.h86
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp42
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.h6
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp6
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp2
-rw-r--r--lib/FuzzMutate/FuzzerCLI.cpp6
-rw-r--r--lib/IR/Attributes.cpp9
-rw-r--r--lib/IR/AutoUpgrade.cpp2
-rw-r--r--lib/IR/Function.cpp2
-rw-r--r--lib/IR/InlineAsm.cpp32
-rw-r--r--lib/IR/Instructions.cpp182
-rw-r--r--lib/IR/LLVMContextImpl.h16
-rw-r--r--lib/IR/SymbolTableListTraitsImpl.h10
-rw-r--r--lib/IR/ValueSymbolTable.cpp4
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp10
-rw-r--r--lib/MC/MCAsmStreamer.cpp4
-rw-r--r--lib/MC/MCAssembler.cpp44
-rw-r--r--lib/MC/MCDisassembler/Disassembler.cpp2
-rw-r--r--lib/MC/MCDisassembler/Disassembler.h4
-rw-r--r--lib/MC/MCDwarf.cpp53
-rw-r--r--lib/MC/MCInstrAnalysis.cpp5
-rw-r--r--lib/MC/MCObjectFileInfo.cpp15
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp2
-rw-r--r--lib/MC/MCStreamer.cpp2
-rw-r--r--lib/MC/MachObjectWriter.cpp2
-rw-r--r--lib/Object/COFFObjectFile.cpp2
-rw-r--r--lib/ObjectYAML/CodeViewYAMLSymbols.cpp4
-rw-r--r--lib/Support/APFloat.cpp2
-rw-r--r--lib/Support/ConvertUTF.cpp28
-rw-r--r--lib/Support/CrashRecoveryContext.cpp4
-rw-r--r--lib/Support/DAGDeltaAlgorithm.cpp6
-rw-r--r--lib/Support/Errno.cpp2
-rw-r--r--lib/Support/FoldingSet.cpp40
-rw-r--r--lib/Support/FormattedStream.cpp2
-rw-r--r--lib/Support/ManagedStatic.cpp6
-rw-r--r--lib/Support/MemoryBuffer.cpp5
-rw-r--r--lib/Support/Path.cpp10
-rw-r--r--lib/Support/PrettyStackTrace.cpp20
-rw-r--r--lib/Support/SourceMgr.cpp24
-rw-r--r--lib/Support/StringPool.cpp4
-rw-r--r--lib/Support/StringRef.cpp2
-rw-r--r--lib/Support/TargetRegistry.cpp2
-rw-r--r--lib/Support/Windows/Path.inc2
-rw-r--r--lib/Support/YAMLParser.cpp2
-rw-r--r--lib/Support/regex_impl.h2
-rw-r--r--lib/Support/xxhash.cpp4
-rw-r--r--lib/TableGen/StringMatcher.cpp38
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp2
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp13
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h2
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td15
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp254
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h31
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp96
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp2
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.h2
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td69
-rw-r--r--lib/Target/AArch64/AArch64SVEInstrInfo.td106
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp3
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp2
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp161
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp14
-rw-r--r--lib/Target/AArch64/SVEInstrFormats.td365
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp17
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.td5
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructions.td1
-rw-r--r--lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp26
-rw-r--r--lib/Target/AMDGPU/MIMGInstructions.td26
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp147
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h13
-rw-r--r--lib/Target/AMDGPU/SIInsertSkips.cpp22
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp30
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h3
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td10
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp1
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h9
-rw-r--r--lib/Target/AMDGPU/VOP3PInstructions.td31
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp9
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMCallingConv.h11
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp2
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h2
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp2
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp10
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp2
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h2
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp8
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h2
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp51
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp10
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp38
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp2
-rw-r--r--lib/Target/AVR/AVRISelLowering.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonBitSimplify.cpp123
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.cpp36
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.h1
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h2
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h2
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp2
-rw-r--r--lib/Target/Mips/MipsCallLowering.cpp3
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp12
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp2
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp6
-rw-r--r--lib/Target/Mips/MipsISelLowering.h5
-rw-r--r--lib/Target/Mips/MipsInstructionSelector.cpp27
-rw-r--r--lib/Target/Mips/MipsLegalizerInfo.cpp3
-rw-r--r--lib/Target/Mips/MipsRegisterBankInfo.cpp1
-rw-r--r--lib/Target/Mips/MipsSubtarget.h2
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h2
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXMachineFunctionInfo.h2
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp2
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp22
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h2
-rw-r--r--lib/Target/PowerPC/PPC.h4
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp16
-rw-r--r--lib/Target/PowerPC/PPCEarlyReturn.cpp2
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp2
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp6
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp4
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp12
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp4
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp2
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp6
-rw-r--r--lib/Target/PowerPC/PPCMIPeephole.cpp2
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCVSXSwapRemoval.cpp2
-rw-r--r--lib/Target/Sparc/AsmParser/SparcAsmParser.cpp8
-rw-r--r--lib/Target/Sparc/Disassembler/SparcDisassembler.cpp8
-rw-r--r--lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp6
-rw-r--r--lib/Target/Sparc/Sparc.h2
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h4
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZHazardRecognizer.cpp45
-rw-r--r--lib/Target/SystemZ/SystemZHazardRecognizer.h17
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp80
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h1
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td48
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.h4
-rw-r--r--lib/Target/SystemZ/SystemZOperands.td1
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td10
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp8
-rw-r--r--lib/Target/Target.cpp2
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp4
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp6
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp74
-rw-r--r--lib/Target/X86/X86CallingConv.h2
-rw-r--r--lib/Target/X86/X86CmovConversion.cpp2
-rw-r--r--lib/Target/X86/X86FastISel.cpp6
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp2
-rw-r--r--lib/Target/X86/X86FlagsCopyLowering.cpp7
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp6
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp65
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp317
-rw-r--r--lib/Target/X86/X86ISelLowering.h7
-rw-r--r--lib/Target/X86/X86InstrFoldTables.cpp2
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp2
-rw-r--r--lib/Target/X86/X86InstrInfo.td16
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td18
-rwxr-xr-xlib/Target/X86/X86SchedBroadwell.td46
-rw-r--r--lib/Target/X86/X86SchedHaswell.td52
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td53
-rw-r--r--lib/Target/X86/X86SchedSkylakeClient.td48
-rwxr-xr-xlib/Target/X86/X86SchedSkylakeServer.td48
-rw-r--r--lib/Target/X86/X86Schedule.td10
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td20
-rw-r--r--lib/Target/X86/X86ScheduleBtVer2.td37
-rw-r--r--lib/Target/X86/X86ScheduleSLM.td12
-rw-r--r--lib/Target/X86/X86ScheduleZnver1.td12
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp14
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp6
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp36
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.h6
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp4
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h2
-rw-r--r--lib/Target/XCore/XCoreSubtarget.h2
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp10
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp2
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp118
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp16
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp10
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp18
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp32
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp33
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp2
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp4
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp2
-rw-r--r--lib/Transforms/Instrumentation/InstrProfiling.cpp2
-rw-r--r--lib/Transforms/Scalar/AlignmentFromAssumptions.cpp2
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp2
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp8
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp2
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp6
-rw-r--r--lib/Transforms/Scalar/GVNSink.cpp2
-rw-r--r--lib/Transforms/Scalar/GuardWidening.cpp96
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp6
-rw-r--r--lib/Transforms/Scalar/LICM.cpp8
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopPredication.cpp6
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp10
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp2
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp16
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp22
-rw-r--r--lib/Transforms/Scalar/SROA.cpp2
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp6
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp2
-rw-r--r--lib/Transforms/Utils/CallPromotionUtils.cpp2
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp38
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp4
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp6
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp14
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp10
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp9
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp4
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp2
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp38
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp237
-rw-r--r--lib/Transforms/Utils/SymbolRewriter.cpp2
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp2
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp28
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp8
-rw-r--r--lib/Transforms/Vectorize/VPlan.cpp5
-rw-r--r--lib/Transforms/Vectorize/VPlan.h121
-rw-r--r--lib/Transforms/Vectorize/VPlanDominatorTree.h41
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp21
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGBuilder.h23
-rw-r--r--lib/Transforms/Vectorize/VPlanLoopInfo.h45
-rw-r--r--test/Analysis/BasicAA/invalidation.ll12
-rw-r--r--test/Analysis/BasicAA/phi-aa.ll40
-rw-r--r--test/Analysis/BasicAA/phi-values-usage.ll50
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll4
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll18
-rw-r--r--test/CodeGen/AArch64/GlobalISel/irtranslator-block-order.ll19
-rw-r--r--test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir45
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir64
-rw-r--r--test/CodeGen/AArch64/O3-pipeline.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-cse.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-memset-to-bzero.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll14
-rw-r--r--test/CodeGen/AArch64/cond-sel.ll4
-rw-r--r--test/CodeGen/AArch64/machine-outliner-default.mir71
-rw-r--r--test/CodeGen/AArch64/machine-outliner-flags.ll8
-rw-r--r--test/CodeGen/AArch64/machine-outliner-regsave.mir112
-rw-r--r--test/CodeGen/AArch64/machine-outliner.ll12
-rw-r--r--test/CodeGen/AArch64/machine-outliner.mir6
-rw-r--r--test/CodeGen/AArch64/max-jump-table.ll1
-rw-r--r--test/CodeGen/AArch64/rotate-extract.ll21
-rw-r--r--test/CodeGen/AArch64/signbit-shift.ll18
-rw-r--r--test/CodeGen/AMDGPU/bfi_int.ll2
-rw-r--r--test/CodeGen/AMDGPU/call-argument-types.ll192
-rw-r--r--test/CodeGen/AMDGPU/fcanonicalize-elimination.ll2
-rw-r--r--test/CodeGen/AMDGPU/fcanonicalize.f16.ll205
-rw-r--r--test/CodeGen/AMDGPU/fcanonicalize.ll13
-rw-r--r--test/CodeGen/AMDGPU/fmax3.ll32
-rw-r--r--test/CodeGen/AMDGPU/fmaxnum.ll357
-rw-r--r--test/CodeGen/AMDGPU/fmaxnum.r600.ll203
-rw-r--r--test/CodeGen/AMDGPU/fmin3.ll17
-rw-r--r--test/CodeGen/AMDGPU/fminnum.ll345
-rw-r--r--test/CodeGen/AMDGPU/fminnum.r600.ll202
-rw-r--r--test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll6
-rw-r--r--test/CodeGen/AMDGPU/fneg-combines.ll21
-rw-r--r--test/CodeGen/AMDGPU/function-args.ll39
-rw-r--r--test/CodeGen/AMDGPU/function-returns.ll39
-rw-r--r--test/CodeGen/AMDGPU/kernel-args.ll337
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll60
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll26
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll113
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll26
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll28
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll28
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll15
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll26
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll28
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll28
-rw-r--r--test/CodeGen/AMDGPU/lower-kernargs.ll32
-rw-r--r--test/CodeGen/AMDGPU/mad-mix-lo.ll25
-rw-r--r--test/CodeGen/AMDGPU/mad-mix.ll58
-rw-r--r--test/CodeGen/AMDGPU/mul.i16.ll2
-rw-r--r--test/CodeGen/AMDGPU/r600.extract-lowbits.ll369
-rw-r--r--test/CodeGen/AMDGPU/skip-if-dead.ll10
-rw-r--r--test/CodeGen/AMDGPU/store-global.ll48
-rw-r--r--test/CodeGen/AMDGPU/store-private.ll8
-rw-r--r--test/CodeGen/AMDGPU/zero_extend.ll10
-rw-r--r--test/CodeGen/ARM/aggregate-padding.ll16
-rw-r--r--test/CodeGen/ARM/inline-asm-operand-implicit-cast.ll80
-rw-r--r--test/CodeGen/ARM/inlineasm-64bit.ll8
-rw-r--r--test/CodeGen/ARM/machine-cse-cmp.ll4
-rw-r--r--test/CodeGen/Hexagon/bit-cmp0.mir154
-rw-r--r--test/CodeGen/Mips/GlobalISel/instruction-select/gloal_address.mir46
-rw-r--r--test/CodeGen/Mips/GlobalISel/irtranslator/global_address.ll26
-rw-r--r--test/CodeGen/Mips/GlobalISel/legalizer/global_address.mir43
-rw-r--r--test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll34
-rw-r--r--test/CodeGen/Mips/GlobalISel/regbankselect/global_address.mir44
-rw-r--r--test/CodeGen/Mips/const-mult.ll48
-rw-r--r--test/CodeGen/PowerPC/signbit-shift.ll16
-rw-r--r--test/CodeGen/RISCV/tail-calls.ll2
-rw-r--r--test/CodeGen/SystemZ/shift-12.ll12
-rw-r--r--test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll4
-rw-r--r--test/CodeGen/X86/atom-fixup-lea2.ll1
-rw-r--r--test/CodeGen/X86/combine-sdiv.ll604
-rw-r--r--test/CodeGen/X86/combine-shl.ll46
-rw-r--r--test/CodeGen/X86/dagcombine-select.ll72
-rw-r--r--test/CodeGen/X86/fast-isel-fold-mem.ll9
-rw-r--r--test/CodeGen/X86/fast-isel-select.ll19
-rw-r--r--test/CodeGen/X86/fast-isel-sext-zext.ll40
-rw-r--r--test/CodeGen/X86/flags-copy-lowering.mir119
-rw-r--r--test/CodeGen/X86/lea-opt.ll151
-rw-r--r--test/CodeGen/X86/machine-outliner-tailcalls.ll2
-rw-r--r--test/CodeGen/X86/mul-constant-i16.ll44
-rw-r--r--test/CodeGen/X86/mul-constant-i32.ll112
-rw-r--r--test/CodeGen/X86/mul-constant-i64.ll138
-rw-r--r--test/CodeGen/X86/pku.ll16
-rw-r--r--test/CodeGen/X86/pmaddubsw.ll553
-rw-r--r--test/CodeGen/X86/rem.ll8
-rw-r--r--test/CodeGen/X86/rotate-extract-vector.ll122
-rw-r--r--test/CodeGen/X86/rotate-extract.ll52
-rw-r--r--test/CodeGen/X86/signbit-shift.ll36
-rw-r--r--test/CodeGen/X86/speculative-load-hardening.ll60
-rw-r--r--test/CodeGen/X86/vector-idiv-sdiv-128.ll36
-rw-r--r--test/CodeGen/X86/vector-idiv-sdiv-256.ll48
-rw-r--r--test/CodeGen/X86/vector-idiv-sdiv-512.ll48
-rw-r--r--test/CodeGen/X86/vector-idiv-udiv-128.ll36
-rw-r--r--test/CodeGen/X86/vector-idiv-udiv-256.ll48
-rw-r--r--test/CodeGen/X86/vector-idiv-udiv-512.ll48
-rw-r--r--test/CodeGen/X86/vector-shift-lshr-128.ll49
-rw-r--r--test/CodeGen/X86/vector-shift-lshr-256.ll40
-rw-r--r--test/CodeGen/X86/win_coreclr_chkstk.ll7
-rw-r--r--test/CodeGen/X86/win_coreclr_chkstk_liveins.mir24
-rw-r--r--test/DebugInfo/PDB/pdb-invalid-type.test15
-rw-r--r--test/DebugInfo/PDB/using-namespace.test51
-rw-r--r--test/DebugInfo/RISCV/lit.local.cfg2
-rw-r--r--test/DebugInfo/RISCV/relax-debug-line.ll75
-rw-r--r--test/DebugInfo/X86/accel-tables-dwarf5.ll5
-rw-r--r--test/DebugInfo/X86/accel-tables.ll9
-rw-r--r--test/DebugInfo/X86/debug_addr.ll79
-rw-r--r--test/Demangle/ms-cxx11.test148
-rw-r--r--test/Demangle/ms-mangle.test14
-rw-r--r--test/Demangle/ms-nested-scopes.test146
-rw-r--r--test/Demangle/ms-return-qualifiers.test184
-rw-r--r--test/Demangle/ms-template-callback.test53
-rw-r--r--test/Instrumentation/InstrProfiling/linkage.ll33
-rw-r--r--test/Instrumentation/InstrProfiling/platform.ll37
-rw-r--r--test/MC/AArch64/SVE/abs.s28
-rw-r--r--test/MC/AArch64/SVE/add-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/add.s40
-rw-r--r--test/MC/AArch64/SVE/adr-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/and-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/and.s40
-rw-r--r--test/MC/AArch64/SVE/andv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/asr-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/asr.s52
-rw-r--r--test/MC/AArch64/SVE/asrd.s28
-rw-r--r--test/MC/AArch64/SVE/asrr.s28
-rw-r--r--test/MC/AArch64/SVE/bic-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/bic.s40
-rw-r--r--test/MC/AArch64/SVE/brka-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/brka.s20
-rw-r--r--test/MC/AArch64/SVE/brkas-diagnostics.s19
-rw-r--r--test/MC/AArch64/SVE/brkas.s14
-rw-r--r--test/MC/AArch64/SVE/brkb-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/brkb.s20
-rw-r--r--test/MC/AArch64/SVE/brkbs-diagnostics.s19
-rw-r--r--test/MC/AArch64/SVE/brkbs.s14
-rw-r--r--test/MC/AArch64/SVE/brkn-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/brkn.s20
-rw-r--r--test/MC/AArch64/SVE/brkns-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/brkns.s20
-rw-r--r--test/MC/AArch64/SVE/brkpa-diagnostics.s11
-rw-r--r--test/MC/AArch64/SVE/brkpa.s20
-rw-r--r--test/MC/AArch64/SVE/brkpas-diagnostics.s11
-rw-r--r--test/MC/AArch64/SVE/brkpas.s20
-rw-r--r--test/MC/AArch64/SVE/brkpb-diagnostics.s11
-rw-r--r--test/MC/AArch64/SVE/brkpb.s20
-rw-r--r--test/MC/AArch64/SVE/brkpbs-diagnostics.s11
-rw-r--r--test/MC/AArch64/SVE/brkpbs.s20
-rw-r--r--test/MC/AArch64/SVE/clasta-diagnostics.s34
-rw-r--r--test/MC/AArch64/SVE/clasta.s16
-rw-r--r--test/MC/AArch64/SVE/clastb-diagnostics.s34
-rw-r--r--test/MC/AArch64/SVE/clastb.s16
-rw-r--r--test/MC/AArch64/SVE/cls.s28
-rw-r--r--test/MC/AArch64/SVE/clz.s28
-rw-r--r--test/MC/AArch64/SVE/cmpeq-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/cmpge-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/cmpgt-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/cmphi-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/cmphs-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/cmple-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/cmplo-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/cmpls-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/cmplt-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/cmpne-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/cnot.s28
-rw-r--r--test/MC/AArch64/SVE/cnt.s28
-rw-r--r--test/MC/AArch64/SVE/compact-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/cpy.s76
-rw-r--r--test/MC/AArch64/SVE/ctermeq-diagnostics.s25
-rw-r--r--test/MC/AArch64/SVE/ctermeq.s32
-rw-r--r--test/MC/AArch64/SVE/ctermne-diagnostics.s25
-rw-r--r--test/MC/AArch64/SVE/ctermne.s32
-rw-r--r--test/MC/AArch64/SVE/decp-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/decp.s16
-rw-r--r--test/MC/AArch64/SVE/dup-diagnostics.s40
-rw-r--r--test/MC/AArch64/SVE/dupm-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/eon-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/eon.s16
-rw-r--r--test/MC/AArch64/SVE/eor-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/eor.s40
-rw-r--r--test/MC/AArch64/SVE/eorv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/ext-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/ext.s16
-rw-r--r--test/MC/AArch64/SVE/fabd.s28
-rw-r--r--test/MC/AArch64/SVE/fabs.s28
-rw-r--r--test/MC/AArch64/SVE/facge-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/facgt-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/facle-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/faclt-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/fadd-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/fadd.s52
-rw-r--r--test/MC/AArch64/SVE/fadda-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/faddv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/fcadd.s28
-rw-r--r--test/MC/AArch64/SVE/fcmeq-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/fcmge-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/fcmgt-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/fcmla-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/fcmla.s40
-rw-r--r--test/MC/AArch64/SVE/fcmle-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/fcmlt-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/fcmne-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/fcmuo-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/fcpy.s28
-rw-r--r--test/MC/AArch64/SVE/fcvt.s28
-rw-r--r--test/MC/AArch64/SVE/fcvtzs.s28
-rw-r--r--test/MC/AArch64/SVE/fcvtzu.s28
-rw-r--r--test/MC/AArch64/SVE/fdiv.s28
-rw-r--r--test/MC/AArch64/SVE/fdivr.s28
-rw-r--r--test/MC/AArch64/SVE/fdup-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/fexpa-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/fmad.s28
-rw-r--r--test/MC/AArch64/SVE/fmax.s52
-rw-r--r--test/MC/AArch64/SVE/fmaxnm.s52
-rw-r--r--test/MC/AArch64/SVE/fmaxnmv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/fmaxv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/fmin.s52
-rw-r--r--test/MC/AArch64/SVE/fminnm.s52
-rw-r--r--test/MC/AArch64/SVE/fminnmv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/fminv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/fmla-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/fmla.s40
-rw-r--r--test/MC/AArch64/SVE/fmls-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/fmls.s40
-rw-r--r--test/MC/AArch64/SVE/fmov-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/fmov.s28
-rw-r--r--test/MC/AArch64/SVE/fmsb.s28
-rw-r--r--test/MC/AArch64/SVE/fmul-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/fmul.s52
-rw-r--r--test/MC/AArch64/SVE/fmulx.s28
-rw-r--r--test/MC/AArch64/SVE/fneg.s28
-rw-r--r--test/MC/AArch64/SVE/fnmad.s28
-rw-r--r--test/MC/AArch64/SVE/fnmla.s28
-rw-r--r--test/MC/AArch64/SVE/fnmls.s28
-rw-r--r--test/MC/AArch64/SVE/fnmsb.s28
-rw-r--r--test/MC/AArch64/SVE/frecpe-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/frecps-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/frecpx.s28
-rw-r--r--test/MC/AArch64/SVE/frinta.s28
-rw-r--r--test/MC/AArch64/SVE/frinti.s28
-rw-r--r--test/MC/AArch64/SVE/frintm.s28
-rw-r--r--test/MC/AArch64/SVE/frintn.s28
-rw-r--r--test/MC/AArch64/SVE/frintp.s28
-rw-r--r--test/MC/AArch64/SVE/frintx.s28
-rw-r--r--test/MC/AArch64/SVE/frintz.s28
-rw-r--r--test/MC/AArch64/SVE/frsqrte-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/frsqrts-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/fscale.s28
-rw-r--r--test/MC/AArch64/SVE/fsqrt.s28
-rw-r--r--test/MC/AArch64/SVE/fsub-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/fsub.s52
-rw-r--r--test/MC/AArch64/SVE/fsubr.s52
-rw-r--r--test/MC/AArch64/SVE/ftmad-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/ftmad.s16
-rw-r--r--test/MC/AArch64/SVE/ftsmul-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ftssel-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/incd-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/incd.s40
-rw-r--r--test/MC/AArch64/SVE/inch-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/inch.s40
-rw-r--r--test/MC/AArch64/SVE/incp-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/incp.s16
-rw-r--r--test/MC/AArch64/SVE/incw-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/incw.s40
-rw-r--r--test/MC/AArch64/SVE/index-diagnostics.s40
-rw-r--r--test/MC/AArch64/SVE/insr-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/insr.s28
-rw-r--r--test/MC/AArch64/SVE/lasta-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/lastb-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/ld1b-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1d-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1h-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1rb-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1rd-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1rh-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1rqb-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1rqd-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1rqh-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1rqw-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1rsb-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1rsh-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1rsw-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1rw-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1sb-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1sh-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1sw-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld1w-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld2b-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld2d-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld2h-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld2w-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld3b-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld3d-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld3h-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld3w-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld4b-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld4d-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld4h-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ld4w-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldff1b-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldff1d-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldff1h-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldff1sb-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldff1sh-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldff1sw-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldff1w-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldnf1b-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldnf1d-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldnf1h-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldnf1sb-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldnf1sh-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldnf1sw-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldnf1w-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldnt1b-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldnt1d-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldnt1h-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/ldnt1w-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/lsl-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/lsl.s52
-rw-r--r--test/MC/AArch64/SVE/lslr.s28
-rw-r--r--test/MC/AArch64/SVE/lsr-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/lsr.s52
-rw-r--r--test/MC/AArch64/SVE/lsrr.s28
-rw-r--r--test/MC/AArch64/SVE/mad.s28
-rw-r--r--test/MC/AArch64/SVE/mla.s28
-rw-r--r--test/MC/AArch64/SVE/mls.s28
-rw-r--r--test/MC/AArch64/SVE/mov-diagnostics.s76
-rw-r--r--test/MC/AArch64/SVE/mov.s76
-rw-r--r--test/MC/AArch64/SVE/movprfx-diagnostics.s193
-rw-r--r--test/MC/AArch64/SVE/movprfx.s97
-rw-r--r--test/MC/AArch64/SVE/msb.s28
-rw-r--r--test/MC/AArch64/SVE/mul-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/mul.s40
-rw-r--r--test/MC/AArch64/SVE/neg.s28
-rw-r--r--test/MC/AArch64/SVE/not.s28
-rw-r--r--test/MC/AArch64/SVE/orn-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/orn.s16
-rw-r--r--test/MC/AArch64/SVE/orr-diagnostics.s34
-rw-r--r--test/MC/AArch64/SVE/orr.s40
-rw-r--r--test/MC/AArch64/SVE/orv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/pfalse-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/pfalse.s14
-rw-r--r--test/MC/AArch64/SVE/pfirst-diagnostics.s19
-rw-r--r--test/MC/AArch64/SVE/pfirst.s20
-rw-r--r--test/MC/AArch64/SVE/pnext-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/pnext.s38
-rw-r--r--test/MC/AArch64/SVE/prfb-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/prfd-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/prfh-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/prfw-diagnostics.s28
-rw-r--r--test/MC/AArch64/SVE/ptest-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/ptest.s20
-rw-r--r--test/MC/AArch64/SVE/rbit.s28
-rw-r--r--test/MC/AArch64/SVE/rev-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/revb.s28
-rw-r--r--test/MC/AArch64/SVE/revh.s28
-rw-r--r--test/MC/AArch64/SVE/revw.s28
-rw-r--r--test/MC/AArch64/SVE/sabd.s28
-rw-r--r--test/MC/AArch64/SVE/saddv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/scvtf.s28
-rw-r--r--test/MC/AArch64/SVE/sdiv.s28
-rw-r--r--test/MC/AArch64/SVE/sdivr.s28
-rw-r--r--test/MC/AArch64/SVE/sdot-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/sdot.s28
-rw-r--r--test/MC/AArch64/SVE/sel-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/smax-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/smax.s40
-rw-r--r--test/MC/AArch64/SVE/smaxv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/smin-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/smin.s40
-rw-r--r--test/MC/AArch64/SVE/sminv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/smulh.s28
-rw-r--r--test/MC/AArch64/SVE/splice-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/splice.s16
-rw-r--r--test/MC/AArch64/SVE/sqadd-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/sqadd.s16
-rw-r--r--test/MC/AArch64/SVE/sqdecd-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/sqdecd.s40
-rw-r--r--test/MC/AArch64/SVE/sqdech-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/sqdech.s40
-rw-r--r--test/MC/AArch64/SVE/sqdecp-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/sqdecp.s16
-rw-r--r--test/MC/AArch64/SVE/sqdecw-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/sqdecw.s40
-rw-r--r--test/MC/AArch64/SVE/sqincd-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/sqincd.s40
-rw-r--r--test/MC/AArch64/SVE/sqinch-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/sqinch.s40
-rw-r--r--test/MC/AArch64/SVE/sqincp-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/sqincp.s16
-rw-r--r--test/MC/AArch64/SVE/sqincw-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/sqincw.s40
-rw-r--r--test/MC/AArch64/SVE/sqsub-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/sqsub.s16
-rw-r--r--test/MC/AArch64/SVE/st1b-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st1d-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st1h-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st1w-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st2b-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st2d-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st2h-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st2w-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st3b-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st3d-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st3h-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st3w-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st4b-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st4d-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st4h-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/st4w-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/stnt1b-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/stnt1d-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/stnt1h-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/stnt1w-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/sub-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/sub.s40
-rw-r--r--test/MC/AArch64/SVE/subr-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/subr.s40
-rw-r--r--test/MC/AArch64/SVE/sunpkhi-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/sunpklo-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/sxtb.s28
-rw-r--r--test/MC/AArch64/SVE/sxth.s28
-rw-r--r--test/MC/AArch64/SVE/sxtw.s28
-rw-r--r--test/MC/AArch64/SVE/tbl-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/trn1-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/trn2-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/uabd.s28
-rw-r--r--test/MC/AArch64/SVE/uaddv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/ucvtf.s28
-rw-r--r--test/MC/AArch64/SVE/udiv.s28
-rw-r--r--test/MC/AArch64/SVE/udivr.s28
-rw-r--r--test/MC/AArch64/SVE/udot-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/udot.s28
-rw-r--r--test/MC/AArch64/SVE/umax-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/umax.s40
-rw-r--r--test/MC/AArch64/SVE/umaxv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/umin-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/umin.s40
-rw-r--r--test/MC/AArch64/SVE/uminv-diagnostics.s17
-rw-r--r--test/MC/AArch64/SVE/umulh.s28
-rw-r--r--test/MC/AArch64/SVE/uqadd-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/uqadd.s16
-rw-r--r--test/MC/AArch64/SVE/uqdecd-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/uqdecd.s40
-rw-r--r--test/MC/AArch64/SVE/uqdech-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/uqdech.s40
-rw-r--r--test/MC/AArch64/SVE/uqdecp-diagnostics.s11
-rw-r--r--test/MC/AArch64/SVE/uqdecp.s16
-rw-r--r--test/MC/AArch64/SVE/uqdecw-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/uqdecw.s40
-rw-r--r--test/MC/AArch64/SVE/uqincd-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/uqincd.s40
-rw-r--r--test/MC/AArch64/SVE/uqinch-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/uqinch.s40
-rw-r--r--test/MC/AArch64/SVE/uqincp-diagnostics.s10
-rw-r--r--test/MC/AArch64/SVE/uqincp.s16
-rw-r--r--test/MC/AArch64/SVE/uqincw-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/uqincw.s40
-rw-r--r--test/MC/AArch64/SVE/uqsub-diagnostics.s22
-rw-r--r--test/MC/AArch64/SVE/uqsub.s16
-rw-r--r--test/MC/AArch64/SVE/uunpkhi-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/uunpklo-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/uxtb.s28
-rw-r--r--test/MC/AArch64/SVE/uxth.s28
-rw-r--r--test/MC/AArch64/SVE/uxtw.s28
-rw-r--r--test/MC/AArch64/SVE/uzp1-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/uzp2-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/whilele-diagnostics.s20
-rw-r--r--test/MC/AArch64/SVE/whilele.s68
-rw-r--r--test/MC/AArch64/SVE/whilelo-diagnostics.s20
-rw-r--r--test/MC/AArch64/SVE/whilelo.s68
-rw-r--r--test/MC/AArch64/SVE/whilels-diagnostics.s20
-rw-r--r--test/MC/AArch64/SVE/whilels.s68
-rw-r--r--test/MC/AArch64/SVE/whilelt-diagnostics.s20
-rw-r--r--test/MC/AArch64/SVE/whilelt.s68
-rw-r--r--test/MC/AArch64/SVE/zip1-diagnostics.s16
-rw-r--r--test/MC/AArch64/SVE/zip2-diagnostics.s16
-rw-r--r--test/MC/AArch64/arm64-directive_loh.s4
-rw-r--r--test/MC/AArch64/inst-directive-other.s42
-rw-r--r--test/MC/ARM/directive-unsupported.s18
-rw-r--r--test/MC/ARM/inst-directive-other.s47
-rw-r--r--test/MC/ARM/inst-thumb-suffixes-auto.s16
-rw-r--r--test/MC/ARM/inst-thumb-suffixes.s2
-rw-r--r--test/MC/WebAssembly/debug-info.ll32
-rw-r--r--test/Other/new-pm-defaults.ll5
-rw-r--r--test/Other/new-pm-lto-defaults.ll1
-rw-r--r--test/Other/new-pm-thinlto-defaults.ll5
-rw-r--r--test/Other/opt-O2-pipeline.ll7
-rw-r--r--test/Other/opt-O3-pipeline.ll7
-rw-r--r--test/Other/opt-Os-pipeline.ll7
-rw-r--r--test/Transforms/GVN/PRE/pre-after-rle.ll6
-rw-r--r--test/Transforms/GlobalOpt/globalsra-multigep.ll16
-rw-r--r--test/Transforms/GlobalOpt/globalsra-partial.ll5
-rw-r--r--test/Transforms/Inline/attributes.ll20
-rw-r--r--test/Transforms/InstCombine/and-xor-or.ll95
-rw-r--r--test/Transforms/InstCombine/and2.ll12
-rw-r--r--test/Transforms/InstCombine/double-float-shrink-1.ll601
-rw-r--r--test/Transforms/InstCombine/gep-addrspace.ll77
-rw-r--r--test/Transforms/InstCombine/pow-1.ll95
-rw-r--r--test/Transforms/InstCombine/pow-cbrt.ll117
-rw-r--r--test/Transforms/InstCombine/pow-sqrt.ll6
-rw-r--r--test/Transforms/InstCombine/select-binop-icmp.ll391
-rw-r--r--test/Transforms/InstCombine/sub-not.ll51
-rw-r--r--test/Transforms/InstCombine/xor.ll11
-rw-r--r--test/Transforms/InstSimplify/AndOrXor.ll76
-rw-r--r--test/Transforms/InstSimplify/call.ll70
-rw-r--r--test/Transforms/InstSimplify/select-and-cmp.ll339
-rw-r--r--test/Transforms/InstSimplify/select-or-cmp.ll339
-rw-r--r--test/Transforms/InstSimplify/shift.ll52
-rw-r--r--test/Transforms/LCSSA/basictest.ll7
-rw-r--r--test/Transforms/SCCP/preserve-analysis.ll2
-rw-r--r--test/Transforms/SLPVectorizer/AArch64/PR38339.ll29
-rw-r--r--test/Transforms/SimplifyCFG/merge-cond-stores.ll37
-rw-r--r--test/tools/dsymutil/X86/accelerator.test4
-rw-r--r--test/tools/dsymutil/X86/update-one-CU.test4
-rw-r--r--test/tools/dsymutil/X86/update.test4
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr.s38
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_64bit_address.s29
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_absent.s4
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_address_size_mismatch.s42
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_address_size_not_multiple.s18
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_dwarf4.s20
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_dwarf64.s19
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_empty.s7
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_invalid_addr_size.s18
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_segment_selector.s17
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_small_length_field.s18
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_too_small_for_length_field.s13
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_too_small_for_section.s16
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_unsupported_version.s42
-rw-r--r--test/tools/llvm-dwarfdump/X86/debug_addr_version_mismatch.s42
-rw-r--r--test/tools/llvm-mca/X86/Atom/resources-x86_32.s72
-rw-r--r--test/tools/llvm-mca/X86/Atom/resources-x86_64.s61
-rw-r--r--test/tools/llvm-mca/X86/Broadwell/resources-x86_32.s80
-rw-r--r--test/tools/llvm-mca/X86/Broadwell/resources-x86_64.s61
-rw-r--r--test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s22
-rw-r--r--test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s39
-rw-r--r--test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s36
-rw-r--r--test/tools/llvm-mca/X86/BtVer2/one-idioms.s112
-rw-r--r--test/tools/llvm-mca/X86/BtVer2/resources-x86_32.s84
-rw-r--r--test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s61
-rw-r--r--test/tools/llvm-mca/X86/Generic/resources-x86_32.s78
-rw-r--r--test/tools/llvm-mca/X86/Generic/resources-x86_64.s61
-rw-r--r--test/tools/llvm-mca/X86/Haswell/resources-x86_32.s80
-rw-r--r--test/tools/llvm-mca/X86/Haswell/resources-x86_64.s61
-rw-r--r--test/tools/llvm-mca/X86/SLM/resources-x86_32.s78
-rw-r--r--test/tools/llvm-mca/X86/SLM/resources-x86_64.s61
-rw-r--r--test/tools/llvm-mca/X86/SandyBridge/resources-x86_32.s78
-rw-r--r--test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s61
-rw-r--r--test/tools/llvm-mca/X86/SkylakeClient/resources-x86_32.s80
-rw-r--r--test/tools/llvm-mca/X86/SkylakeClient/resources-x86_64.s61
-rw-r--r--test/tools/llvm-mca/X86/SkylakeServer/resources-x86_32.s80
-rw-r--r--test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s61
-rw-r--r--test/tools/llvm-mca/X86/Znver1/resources-x86_32.s82
-rw-r--r--test/tools/llvm-mca/X86/Znver1/resources-x86_64.s61
-rw-r--r--test/tools/llvm-objcopy/strip-debug.test8
-rw-r--r--tools/dsymutil/DwarfLinker.cpp160
-rw-r--r--tools/dsymutil/DwarfLinker.h2
-rw-r--r--tools/dsymutil/MachOUtils.cpp42
-rw-r--r--tools/dsymutil/MachOUtils.h17
-rw-r--r--tools/dsymutil/dsymutil.cpp40
-rw-r--r--tools/llvm-mca/DispatchStage.cpp12
-rw-r--r--tools/llvm-mca/DispatchStage.h2
-rw-r--r--tools/llvm-mca/InstrBuilder.cpp4
-rw-r--r--tools/llvm-mca/Instruction.h13
-rw-r--r--tools/llvm-mca/README.txt865
-rw-r--r--tools/llvm-mca/RetireControlUnit.h2
-rw-r--r--tools/llvm-mca/RetireStage.cpp8
-rw-r--r--tools/llvm-mca/llvm-mca.cpp2
-rw-r--r--tools/llvm-objcopy/llvm-objcopy.cpp18
-rw-r--r--tools/llvm-pdbutil/MinimalSymbolDumper.cpp6
-rw-r--r--unittests/DebugInfo/CodeView/TypeIndexDiscoveryTest.cpp9
-rw-r--r--unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp71
-rw-r--r--unittests/ExecutionEngine/Orc/RPCUtilsTest.cpp81
-rw-r--r--unittests/Transforms/Vectorize/CMakeLists.txt2
-rw-r--r--unittests/Transforms/Vectorize/VPlanDominatorTreeTest.cpp196
-rw-r--r--unittests/Transforms/Vectorize/VPlanLoopInfoTest.cpp87
-rw-r--r--unittests/Transforms/Vectorize/VPlanTestBase.h15
-rw-r--r--utils/LLVMVisualizers/llvm.natvis4
-rw-r--r--utils/TableGen/CodeGenDAGPatterns.cpp44
973 files changed, 25287 insertions, 6430 deletions
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 5ea2024d1c81..ce2057f803c8 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -867,6 +867,7 @@ if(NOT LLVM_TOOLCHAIN_TOOLS)
llvm-ranlib
llvm-lib
llvm-objdump
+ llvm-rc
)
endif()
diff --git a/docs/CommandGuide/llvm-mca.rst b/docs/CommandGuide/llvm-mca.rst
index dd2320b15ffb..e44eb2f8ce9b 100644
--- a/docs/CommandGuide/llvm-mca.rst
+++ b/docs/CommandGuide/llvm-mca.rst
@@ -114,8 +114,8 @@ option specifies "``-``", then the output will also be sent to standard output.
.. option:: -register-file-size=<size>
Specify the size of the register file. When specified, this flag limits how
- many temporary registers are available for register renaming purposes. A value
- of zero for this flag means "unlimited number of temporary registers".
+ many physical registers are available for register renaming purposes. A value
+ of zero for this flag means "unlimited number of physical registers".
.. option:: -iterations=<number of iterations>
@@ -207,23 +207,23 @@ EXIT STATUS
:program:`llvm-mca` returns 0 on success. Otherwise, an error message is printed
to standard error, and the tool returns 1.
-HOW MCA WORKS
--------------
+HOW LLVM-MCA WORKS
+------------------
-MCA takes assembly code as input. The assembly code is parsed into a sequence
-of MCInst with the help of the existing LLVM target assembly parsers. The
-parsed sequence of MCInst is then analyzed by a ``Pipeline`` module to generate
-a performance report.
+:program:`llvm-mca` takes assembly code as input. The assembly code is parsed
+into a sequence of MCInst with the help of the existing LLVM target assembly
+parsers. The parsed sequence of MCInst is then analyzed by a ``Pipeline`` module
+to generate a performance report.
The Pipeline module simulates the execution of the machine code sequence in a
loop of iterations (default is 100). During this process, the pipeline collects
a number of execution related statistics. At the end of this process, the
pipeline generates and prints a report from the collected statistics.
-Here is an example of a performance report generated by MCA for a dot-product
-of two packed float vectors of four elements. The analysis is conducted for
-target x86, cpu btver2. The following result can be produced via the following
-command using the example located at
+Here is an example of a performance report generated by the tool for a
+dot-product of two packed float vectors of four elements. The analysis is
+conducted for target x86, cpu btver2. The following result can be produced via
+the following command using the example located at
``test/tools/llvm-mca/X86/BtVer2/dot-product.s``:
.. code-block:: bash
@@ -287,10 +287,30 @@ for a total of 900 dynamically executed instructions.
The report is structured in three main sections. The first section collects a
few performance numbers; the goal of this section is to give a very quick
overview of the performance throughput. In this example, the two important
-performance indicators are the predicted total number of cycles, and the IPC.
-IPC is probably the most important throughput indicator. A big delta between
-the Dispatch Width and the computed IPC is an indicator of potential
-performance issues.
+performance indicators are **IPC** and **Block RThroughput** (Block Reciprocal
+Throughput).
+
+IPC is computed dividing the total number of simulated instructions by the total
+number of cycles. A delta between Dispatch Width and IPC is an indicator of a
+performance issue. In the absence of loop-carried data dependencies, the
+observed IPC tends to a theoretical maximum which can be computed by dividing
+the number of instructions of a single iteration by the *Block RThroughput*.
+
+IPC is bounded from above by the dispatch width. That is because the dispatch
+width limits the maximum size of a dispatch group. IPC is also limited by the
+amount of hardware parallelism. The availability of hardware resources affects
+the resource pressure distribution, and it limits the number of instructions
+that can be executed in parallel every cycle. A delta between Dispatch
+Width and the theoretical maximum IPC is an indicator of a performance
+bottleneck caused by the lack of hardware resources. In general, the lower the
+Block RThroughput, the better.
+
+In this example, ``Instructions per iteration/Block RThroughput`` is 1.50. Since
+there are no loop-carried dependencies, the observed IPC is expected to approach
+1.50 when the number of iterations tends to infinity. The delta between the
+Dispatch Width (2.00), and the theoretical maximum IPC (1.50) is an indicator of
+a performance bottleneck caused by the lack of hardware resources, and the
+*Resource pressure view* can help to identify the problematic resource usage.
The second section of the report shows the latency and reciprocal
throughput of every instruction in the sequence. That section also reports
@@ -316,7 +336,7 @@ pressure should be uniformly distributed between multiple resources.
Timeline View
^^^^^^^^^^^^^
-MCA's timeline view produces a detailed report of each instruction's state
+The timeline view produces a detailed report of each instruction's state
transitions through an instruction pipeline. This view is enabled by the
command line option ``-timeline``. As instructions transition through the
various stages of the pipeline, their states are depicted in the view report.
@@ -331,7 +351,7 @@ These states are represented by the following characters:
Below is the timeline view for a subset of the dot-product example located in
``test/tools/llvm-mca/X86/BtVer2/dot-product.s`` and processed by
-MCA using the following command:
+:program:`llvm-mca` using the following command:
.. code-block:: bash
@@ -366,7 +386,7 @@ MCA using the following command:
2. 3 5.7 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4
The timeline view is interesting because it shows instruction state changes
-during execution. It also gives an idea of how MCA processes instructions
+during execution. It also gives an idea of how the tool processes instructions
executed on the target, and how their timing information might be calculated.
The timeline view is structured in two tables. The first table shows
@@ -411,12 +431,12 @@ Parallelism).
In the dot-product example, there are anti-dependencies introduced by
instructions from different iterations. However, those dependencies can be
removed at register renaming stage (at the cost of allocating register aliases,
-and therefore consuming temporary registers).
+and therefore consuming physical registers).
Table *Average Wait times* helps diagnose performance issues that are caused by
the presence of long latency instructions and potentially long data dependencies
-which may limit the ILP. Note that MCA, by default, assumes at least 1cy
-between the dispatch event and the issue event.
+which may limit the ILP. Note that :program:`llvm-mca`, by default, assumes at
+least 1cy between the dispatch event and the issue event.
When the performance is limited by data dependencies and/or long latency
instructions, the number of cycles spent while in the *ready* state is expected
@@ -549,3 +569,177 @@ statistics are displayed by using the command option ``-all-stats`` or
In this example, we can conclude that the IPC is mostly limited by data
dependencies, and not by resource pressure.
+
+Instruction Flow
+^^^^^^^^^^^^^^^^
+This section describes the instruction flow through MCA's default out-of-order
+pipeline, as well as the functional units involved in the process.
+
+The default pipeline implements the following sequence of stages used to
+process instructions.
+
+* Dispatch (Instruction is dispatched to the schedulers).
+* Issue (Instruction is issued to the processor pipelines).
+* Write Back (Instruction is executed, and results are written back).
+* Retire (Instruction is retired; writes are architecturally committed).
+
+The default pipeline only models the out-of-order portion of a processor.
+Therefore, the instruction fetch and decode stages are not modeled. Performance
+bottlenecks in the frontend are not diagnosed. MCA assumes that instructions
+have all been decoded and placed into a queue. Also, MCA does not model branch
+prediction.
+
+Instruction Dispatch
+""""""""""""""""""""
+During the dispatch stage, instructions are picked in program order from a
+queue of already decoded instructions, and dispatched in groups to the
+simulated hardware schedulers.
+
+The size of a dispatch group depends on the availability of the simulated
+hardware resources. The processor dispatch width defaults to the value
+of the ``IssueWidth`` in LLVM's scheduling model.
+
+An instruction can be dispatched if:
+
+* The size of the dispatch group is smaller than processor's dispatch width.
+* There are enough entries in the reorder buffer.
+* There are enough physical registers to do register renaming.
+* The schedulers are not full.
+
+Scheduling models can optionally specify which register files are available on
+the processor. MCA uses that information to initialize register file
+descriptors. Users can limit the number of physical registers that are
+globally available for register renaming by using the command option
+``-register-file-size``. A value of zero for this option means *unbounded*.
+By knowing how many registers are available for renaming, MCA can predict
+dispatch stalls caused by the lack of registers.
+
+The number of reorder buffer entries consumed by an instruction depends on the
+number of micro-opcodes specified by the target scheduling model. MCA's
+reorder buffer's purpose is to track the progress of instructions that are
+"in-flight," and to retire instructions in program order. The number of
+entries in the reorder buffer defaults to the `MicroOpBufferSize` provided by
+the target scheduling model.
+
+Instructions that are dispatched to the schedulers consume scheduler buffer
+entries. :program:`llvm-mca` queries the scheduling model to determine the set
+of buffered resources consumed by an instruction. Buffered resources are
+treated like scheduler resources.
+
+Instruction Issue
+"""""""""""""""""
+Each processor scheduler implements a buffer of instructions. An instruction
+has to wait in the scheduler's buffer until input register operands become
+available. Only at that point, does the instruction becomes eligible for
+execution and may be issued (potentially out-of-order) for execution.
+Instruction latencies are computed by :program:`llvm-mca` with the help of the
+scheduling model.
+
+:program:`llvm-mca`'s scheduler is designed to simulate multiple processor
+schedulers. The scheduler is responsible for tracking data dependencies, and
+dynamically selecting which processor resources are consumed by instructions.
+It delegates the management of processor resource units and resource groups to a
+resource manager. The resource manager is responsible for selecting resource
+units that are consumed by instructions. For example, if an instruction
+consumes 1cy of a resource group, the resource manager selects one of the
+available units from the group; by default, the resource manager uses a
+round-robin selector to guarantee that resource usage is uniformly distributed
+between all units of a group.
+
+:program:`llvm-mca`'s scheduler implements three instruction queues:
+
+* WaitQueue: a queue of instructions whose operands are not ready.
+* ReadyQueue: a queue of instructions ready to execute.
+* IssuedQueue: a queue of instructions executing.
+
+Depending on the operand availability, instructions that are dispatched to the
+scheduler are either placed into the WaitQueue or into the ReadyQueue.
+
+Every cycle, the scheduler checks if instructions can be moved from the
+WaitQueue to the ReadyQueue, and if instructions from the ReadyQueue can be
+issued to the underlying pipelines. The algorithm prioritizes older instructions
+over younger instructions.
+
+Write-Back and Retire Stage
+"""""""""""""""""""""""""""
+Issued instructions are moved from the ReadyQueue to the IssuedQueue. There,
+instructions wait until they reach the write-back stage. At that point, they
+get removed from the queue and the retire control unit is notified.
+
+When instructions are executed, the retire control unit flags the
+instruction as "ready to retire."
+
+Instructions are retired in program order. The register file is notified of
+the retirement so that it can free the physical registers that were allocated
+for the instruction during the register renaming stage.
+
+Load/Store Unit and Memory Consistency Model
+""""""""""""""""""""""""""""""""""""""""""""
+To simulate an out-of-order execution of memory operations, :program:`llvm-mca`
+utilizes a simulated load/store unit (LSUnit) to simulate the speculative
+execution of loads and stores.
+
+Each load (or store) consumes an entry in the load (or store) queue. Users can
+specify flags ``-lqueue`` and ``-squeue`` to limit the number of entries in the
+load and store queues respectively. The queues are unbounded by default.
+
+The LSUnit implements a relaxed consistency model for memory loads and stores.
+The rules are:
+
+1. A younger load is allowed to pass an older load only if there are no
+ intervening stores or barriers between the two loads.
+2. A younger load is allowed to pass an older store provided that the load does
+ not alias with the store.
+3. A younger store is not allowed to pass an older store.
+4. A younger store is not allowed to pass an older load.
+
+By default, the LSUnit optimistically assumes that loads do not alias
+(`-noalias=true`) store operations. Under this assumption, younger loads are
+always allowed to pass older stores. Essentially, the LSUnit does not attempt
+to run any alias analysis to predict when loads and stores do not alias with
+each other.
+
+Note that, in the case of write-combining memory, rule 3 could be relaxed to
+allow reordering of non-aliasing store operations. That being said, at the
+moment, there is no way to further relax the memory model (``-noalias`` is the
+only option). Essentially, there is no option to specify a different memory
+type (e.g., write-back, write-combining, write-through; etc.) and consequently
+to weaken, or strengthen, the memory model.
+
+Other limitations are:
+
+* The LSUnit does not know when store-to-load forwarding may occur.
+* The LSUnit does not know anything about cache hierarchy and memory types.
+* The LSUnit does not know how to identify serializing operations and memory
+ fences.
+
+The LSUnit does not attempt to predict if a load or store hits or misses the L1
+cache. It only knows if an instruction "MayLoad" and/or "MayStore." For
+loads, the scheduling model provides an "optimistic" load-to-use latency (which
+usually matches the load-to-use latency for when there is a hit in the L1D).
+
+:program:`llvm-mca` does not know about serializing operations or memory-barrier
+like instructions. The LSUnit conservatively assumes that an instruction which
+has both "MayLoad" and unmodeled side effects behaves like a "soft"
+load-barrier. That means, it serializes loads without forcing a flush of the
+load queue. Similarly, instructions that "MayStore" and have unmodeled side
+effects are treated like store barriers. A full memory barrier is a "MayLoad"
+and "MayStore" instruction with unmodeled side effects. This is inaccurate, but
+it is the best that we can do at the moment with the current information
+available in LLVM.
+
+A load/store barrier consumes one entry of the load/store queue. A load/store
+barrier enforces ordering of loads/stores. A younger load cannot pass a load
+barrier. Also, a younger store cannot pass a store barrier. A younger load
+has to wait for the memory/load barrier to execute. A load/store barrier is
+"executed" when it becomes the oldest entry in the load/store queue(s). That
+also means, by construction, all of the older loads/stores have been executed.
+
+In conclusion, the full set of load/store consistency rules are:
+
+#. A store may not pass a previous store.
+#. A store may not pass a previous load (regardless of ``-noalias``).
+#. A store has to wait until an older store barrier is fully executed.
+#. A load may pass a previous load.
+#. A load may not pass a previous store unless ``-noalias`` is set.
+#. A load has to wait until an older load barrier is fully executed.
diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst
index 7cfd67ce7157..16660acc2e5c 100644
--- a/docs/GettingStarted.rst
+++ b/docs/GettingStarted.rst
@@ -838,7 +838,7 @@ To configure LLVM, follow these steps:
.. code-block:: console
- % cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=prefix=/install/path
+ % cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/install/path
[other options] SRC_ROOT
Compiling the LLVM Suite Source Code
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 38bed417104d..a60c9b515abc 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -4588,9 +4588,12 @@ DIExpression
``DIExpression`` nodes represent expressions that are inspired by the DWARF
expression language. They are used in :ref:`debug intrinsics<dbg_intrinsics>`
(such as ``llvm.dbg.declare`` and ``llvm.dbg.value``) to describe how the
-referenced LLVM variable relates to the source language variable.
+referenced LLVM variable relates to the source language variable. Debug
+intrinsics are interpreted left-to-right: start by pushing the value/address
+operand of the intrinsic onto a stack, then repeatedly push and evaluate
+opcodes from the DIExpression until the final variable description is produced.
-The current supported vocabulary is limited:
+The current supported opcode vocabulary is limited:
- ``DW_OP_deref`` dereferences the top of the expression stack.
- ``DW_OP_plus`` pops the last two entries from the expression stack, adds
@@ -4610,12 +4613,30 @@ The current supported vocabulary is limited:
- ``DW_OP_stack_value`` marks a constant value.
DWARF specifies three kinds of simple location descriptions: Register, memory,
-and implicit location descriptions. Register and memory location descriptions
-describe the *location* of a source variable (in the sense that a debugger might
-modify its value), whereas implicit locations describe merely the *value* of a
-source variable. DIExpressions also follow this model: A DIExpression that
-doesn't have a trailing ``DW_OP_stack_value`` will describe an *address* when
-combined with a concrete location.
+and implicit location descriptions. Note that a location description is
+defined over certain ranges of a program, i.e the location of a variable may
+change over the course of the program. Register and memory location
+descriptions describe the *concrete location* of a source variable (in the
+sense that a debugger might modify its value), whereas *implicit locations*
+describe merely the actual *value* of a source variable which might not exist
+in registers or in memory (see ``DW_OP_stack_value``).
+
+A ``llvm.dbg.addr`` or ``llvm.dbg.declare`` intrinsic describes an indirect
+value (the address) of a source variable. The first operand of the intrinsic
+must be an address of some kind. A DIExpression attached to the intrinsic
+refines this address to produce a concrete location for the source variable.
+
+A ``llvm.dbg.value`` intrinsic describes the direct value of a source variable.
+The first operand of the intrinsic may be a direct or indirect value. A
+DIExpresion attached to the intrinsic refines the first operand to produce a
+direct value. For example, if the first operand is an indirect value, it may be
+necessary to insert ``DW_OP_deref`` into the DIExpresion in order to produce a
+valid debug intrinsic.
+
+.. note::
+
+ A DIExpression is interpreted in the same way regardless of which kind of
+ debug intrinsic it's attached to.
.. code-block:: text
diff --git a/docs/SourceLevelDebugging.rst b/docs/SourceLevelDebugging.rst
index 3fa738c7e442..7064c403ef1e 100644
--- a/docs/SourceLevelDebugging.rst
+++ b/docs/SourceLevelDebugging.rst
@@ -244,6 +244,11 @@ argument is a `local variable <LangRef.html#dilocalvariable>`_ containing a
description of the variable. The third argument is a `complex expression
<LangRef.html#diexpression>`_.
+An `llvm.dbg.value` intrinsic describes the *value* of a source variable
+directly, not its address. Note that the value operand of this intrinsic may
+be indirect (i.e, a pointer to the source variable), provided that interpreting
+the complex expression derives the direct value.
+
Object lifetimes and scoping
============================
diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h
index 7e5171c3f3a4..b495e25dd5e5 100644
--- a/include/llvm/ADT/DenseSet.h
+++ b/include/llvm/ADT/DenseSet.h
@@ -17,7 +17,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/Support/type_traits.h"
-#include <algorithm>
+#include <algorithm>
#include <cstddef>
#include <initializer_list>
#include <iterator>
diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h
index fa81539a9d6f..6344e84b58eb 100644
--- a/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -43,6 +43,7 @@ class LoopInfo;
class PHINode;
class SelectInst;
class TargetLibraryInfo;
+class PhiValues;
class Value;
/// This is the AA result object for the basic, local, and stateless alias
@@ -60,19 +61,22 @@ class BasicAAResult : public AAResultBase<BasicAAResult> {
AssumptionCache &AC;
DominatorTree *DT;
LoopInfo *LI;
+ PhiValues *PV;
public:
BasicAAResult(const DataLayout &DL, const Function &F,
const TargetLibraryInfo &TLI, AssumptionCache &AC,
- DominatorTree *DT = nullptr, LoopInfo *LI = nullptr)
- : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), LI(LI) {}
+ DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
+ PhiValues *PV = nullptr)
+ : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), PV(PV)
+ {}
BasicAAResult(const BasicAAResult &Arg)
: AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC),
- DT(Arg.DT), LI(Arg.LI) {}
+ DT(Arg.DT), LI(Arg.LI), PV(Arg.PV) {}
BasicAAResult(BasicAAResult &&Arg)
: AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI),
- AC(Arg.AC), DT(Arg.DT), LI(Arg.LI) {}
+ AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), PV(Arg.PV) {}
/// Handle invalidation events in the new pass manager.
bool invalidate(Function &Fn, const PreservedAnalyses &PA,
diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h
index 0f3f2be9aeb4..d27b3e42bbeb 100644
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -682,7 +682,7 @@ bool sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
SmallVectorImpl<unsigned> &SortedIndices);
/// Returns true if the memory operations \p A and \p B are consecutive.
-/// This is a simple API that does not depend on the analysis pass.
+/// This is a simple API that does not depend on the analysis pass.
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
ScalarEvolution &SE, bool CheckType = true);
@@ -734,7 +734,7 @@ private:
/// accesses of a loop.
///
/// It runs the analysis for a loop on demand. This can be initiated by
-/// querying the loop access info via AM.getResult<LoopAccessAnalysis>.
+/// querying the loop access info via AM.getResult<LoopAccessAnalysis>.
/// getResult return a LoopAccessInfo object. See this class for the
/// specifics of what information is provided.
class LoopAccessAnalysis
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 1c6ec98dfedc..1c40cffc7f67 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -44,6 +44,7 @@ class Instruction;
class LoadInst;
class PHITransAddr;
class TargetLibraryInfo;
+class PhiValues;
class Value;
/// A memory dependence query can return one of three different answers.
@@ -360,13 +361,14 @@ private:
AssumptionCache &AC;
const TargetLibraryInfo &TLI;
DominatorTree &DT;
+ PhiValues &PV;
PredIteratorCache PredCache;
public:
MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC,
const TargetLibraryInfo &TLI,
- DominatorTree &DT)
- : AA(AA), AC(AC), TLI(TLI), DT(DT) {}
+ DominatorTree &DT, PhiValues &PV)
+ : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV) {}
/// Handle invalidation in the new PM.
bool invalidate(Function &F, const PreservedAnalyses &PA,
diff --git a/include/llvm/Analysis/MustExecute.h b/include/llvm/Analysis/MustExecute.h
index 8daf156567cd..97ad76d451ca 100644
--- a/include/llvm/Analysis/MustExecute.h
+++ b/include/llvm/Analysis/MustExecute.h
@@ -10,7 +10,7 @@
/// Contains a collection of routines for determining if a given instruction is
/// guaranteed to execute if a given point in control flow is reached. The most
/// common example is an instruction within a loop being provably executed if we
-/// branch to the header of it's containing loop.
+/// branch to the header of it's containing loop.
///
//===----------------------------------------------------------------------===//
@@ -58,7 +58,7 @@ void computeLoopSafetyInfo(LoopSafetyInfo *, Loop *);
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT,
const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo);
-
+
}
#endif
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index e14e2bd44034..d80ae1d6845d 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -326,7 +326,7 @@ public:
bool haveFastSqrt(Type *Ty) { return false; }
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
-
+
unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index e6a219a8045b..c1a91a8e5981 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -464,7 +464,7 @@ class Value;
/// This is equivelent to saying that all instructions within the basic block
/// are guaranteed to transfer execution to their successor within the basic
/// block. This has the same assumptions w.r.t. undefined behavior as the
- /// instruction variant of this function.
+ /// instruction variant of this function.
bool isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB);
/// Return true if this function can prove that the instruction I
diff --git a/include/llvm/BinaryFormat/Dwarf.def b/include/llvm/BinaryFormat/Dwarf.def
index 57e259615d0c..944c5dd1c157 100644
--- a/include/llvm/BinaryFormat/Dwarf.def
+++ b/include/llvm/BinaryFormat/Dwarf.def
@@ -856,6 +856,7 @@ HANDLE_DW_UT(0x06, split_type)
// TODO: Add Mach-O and COFF names.
// Official DWARF sections.
HANDLE_DWARF_SECTION(DebugAbbrev, ".debug_abbrev", "debug-abbrev")
+HANDLE_DWARF_SECTION(DebugAddr, ".debug_addr", "debug-addr")
HANDLE_DWARF_SECTION(DebugAranges, ".debug_aranges", "debug-aranges")
HANDLE_DWARF_SECTION(DebugInfo, ".debug_info", "debug-info")
HANDLE_DWARF_SECTION(DebugTypes, ".debug_types", "debug-types")
diff --git a/include/llvm/BinaryFormat/ELF.h b/include/llvm/BinaryFormat/ELF.h
index 0f3f1939ce68..2e778779117b 100644
--- a/include/llvm/BinaryFormat/ELF.h
+++ b/include/llvm/BinaryFormat/ELF.h
@@ -413,8 +413,10 @@ enum {
// ARM Specific e_flags
enum : unsigned {
- EF_ARM_SOFT_FLOAT = 0x00000200U,
- EF_ARM_VFP_FLOAT = 0x00000400U,
+ EF_ARM_SOFT_FLOAT = 0x00000200U, // Legacy pre EABI_VER5
+ EF_ARM_ABI_FLOAT_SOFT = 0x00000200U, // EABI_VER5
+ EF_ARM_VFP_FLOAT = 0x00000400U, // Legacy pre EABI_VER5
+ EF_ARM_ABI_FLOAT_HARD = 0x00000400U, // EABI_VER5
EF_ARM_EABI_UNKNOWN = 0x00000000U,
EF_ARM_EABI_VER1 = 0x01000000U,
EF_ARM_EABI_VER2 = 0x02000000U,
diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h
index 91604fd2df87..f835bacfb548 100644
--- a/include/llvm/CodeGen/GCStrategy.h
+++ b/include/llvm/CodeGen/GCStrategy.h
@@ -104,12 +104,12 @@ public:
const std::string &getName() const { return Name; }
/// By default, write barriers are replaced with simple store
- /// instructions. If true, you must provide a custom pass to lower
+ /// instructions. If true, you must provide a custom pass to lower
/// calls to \@llvm.gcwrite.
bool customWriteBarrier() const { return CustomWriteBarriers; }
/// By default, read barriers are replaced with simple load
- /// instructions. If true, you must provide a custom pass to lower
+ /// instructions. If true, you must provide a custom pass to lower
/// calls to \@llvm.gcread.
bool customReadBarrier() const { return CustomReadBarriers; }
@@ -146,7 +146,7 @@ public:
}
/// By default, roots are left for the code generator so it can generate a
- /// stack map. If true, you must provide a custom pass to lower
+ /// stack map. If true, you must provide a custom pass to lower
/// calls to \@llvm.gcroot.
bool customRoots() const { return CustomRoots; }
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 713d72eb4c9b..a8c26082f221 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -786,7 +786,7 @@ public:
/// setAction ({G_ADD, 0, LLT::scalar(32)}, Legal);
/// setLegalizeScalarToDifferentSizeStrategy(
/// G_ADD, 0, widenToLargerTypesAndNarrowToLargest);
- /// will end up defining getAction({G_ADD, 0, T}) to return the following
+ /// will end up defining getAction({G_ADD, 0, T}) to return the following
/// actions for different scalar types T:
/// LLT::scalar(1)..LLT::scalar(31): {WidenScalar, 0, LLT::scalar(32)}
/// LLT::scalar(32): {Legal, 0, LLT::scalar(32)}
@@ -814,7 +814,7 @@ public:
VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] = S;
}
- /// A SizeChangeStrategy for the common case where legalization for a
+ /// A SizeChangeStrategy for the common case where legalization for a
/// particular operation consists of only supporting a specific set of type
/// sizes. E.g.
/// setAction ({G_DIV, 0, LLT::scalar(32)}, Legal);
diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 983a4e680d5c..ac1673de5f3f 100644
--- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -942,6 +942,16 @@ public:
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
unsigned Val, MachineMemOperand &MMO);
+
+ /// Build and insert \p Res = G_BLOCK_ADDR \p BA
+ ///
+ /// G_BLOCK_ADDR computes the address of a basic block.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p Res must be a generic virtual register of a pointer type.
+ ///
+ /// \return The newly created instruction.
+ MachineInstrBuilder buildBlockAddress(unsigned Res, const BlockAddress *BA);
};
/// A CRTP class that contains methods for building instructions that can
diff --git a/include/llvm/CodeGen/MachORelocation.h b/include/llvm/CodeGen/MachORelocation.h
index 8c9b7a84e5b8..cbb49695af75 100644
--- a/include/llvm/CodeGen/MachORelocation.h
+++ b/include/llvm/CodeGen/MachORelocation.h
@@ -27,15 +27,15 @@ namespace llvm {
uint32_t r_symbolnum; // symbol index if r_extern == 1 else section index
bool r_pcrel; // was relocated pc-relative already
uint8_t r_length; // length = 2 ^ r_length
- bool r_extern; //
+ bool r_extern; //
uint8_t r_type; // if not 0, machine-specific relocation type.
bool r_scattered; // 1 = scattered, 0 = non-scattered
int32_t r_value; // the value the item to be relocated is referring
// to.
- public:
+ public:
uint32_t getPackedFields() const {
if (r_scattered)
- return (1 << 31) | (r_pcrel << 30) | ((r_length & 3) << 28) |
+ return (1 << 31) | (r_pcrel << 30) | ((r_length & 3) << 28) |
((r_type & 15) << 24) | (r_address & 0x00FFFFFF);
else
return (r_symbolnum << 8) | (r_pcrel << 7) | ((r_length & 3) << 5) |
@@ -45,8 +45,8 @@ namespace llvm {
uint32_t getRawAddress() const { return r_address; }
MachORelocation(uint32_t addr, uint32_t index, bool pcrel, uint8_t len,
- bool ext, uint8_t type, bool scattered = false,
- int32_t value = 0) :
+ bool ext, uint8_t type, bool scattered = false,
+ int32_t value = 0) :
r_address(addr), r_symbolnum(index), r_pcrel(pcrel), r_length(len),
r_extern(ext), r_type(type), r_scattered(scattered), r_value(value) {}
};
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 6be304fa368b..554e89019b76 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -105,7 +105,7 @@ class MachineModuleInfo : public ImmutablePass {
/// basic block's address of label.
MMIAddrLabelMap *AddrLabelSymbols;
- // TODO: Ideally, what we'd like is to have a switch that allows emitting
+ // TODO: Ideally, what we'd like is to have a switch that allows emitting
// synchronous (precise at call-sites only) CFA into .eh_frame. However,
// even under this switch, we'd like .debug_frame to be precise when using
// -g. At this moment, there's no way to specify that some CFI directives
diff --git a/include/llvm/CodeGen/MachineOutliner.h b/include/llvm/CodeGen/MachineOutliner.h
index 4249a99a891b..95bfc24b57ff 100644
--- a/include/llvm/CodeGen/MachineOutliner.h
+++ b/include/llvm/CodeGen/MachineOutliner.h
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
namespace llvm {
namespace outliner {
@@ -74,6 +75,13 @@ public:
/// cost model information.
LiveRegUnits LRU;
+ /// Contains the accumulated register liveness information for the
+ /// instructions in this \p Candidate.
+ ///
+ /// This is optionally used by the target to determine which registers have
+ /// been used across the sequence.
+ LiveRegUnits UsedInSequence;
+
/// Return the number of instructions in this Candidate.
unsigned getLength() const { return Len; }
@@ -137,6 +145,12 @@ public:
// outlining candidate.
std::for_each(MBB->rbegin(), (MachineBasicBlock::reverse_iterator)front(),
[this](MachineInstr &MI) { LRU.stepBackward(MI); });
+
+ // Walk over the sequence itself and figure out which registers were used
+ // in the sequence.
+ UsedInSequence.init(TRI);
+ std::for_each(front(), std::next(back()),
+ [this](MachineInstr &MI) { UsedInSequence.accumulate(MI); });
}
};
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 5e7837834ec8..56adc2e2fbfa 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -252,7 +252,7 @@ class TargetRegisterInfo;
MachineInstr *Instr = nullptr; ///< Alternatively, a MachineInstr.
public:
- SUnit *OrigNode = nullptr; ///< If not this, the node from which this node
+ SUnit *OrigNode = nullptr; ///< If not this, the node from which this node
/// was cloned. (SD scheduling only)
const MCSchedClassDesc *SchedClass =
diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h
index 3c9850265737..e584a4136e4f 100644
--- a/include/llvm/CodeGen/StackMaps.h
+++ b/include/llvm/CodeGen/StackMaps.h
@@ -156,7 +156,7 @@ class StatepointOpers {
// TODO:: we should change the STATEPOINT representation so that CC and
// Flags should be part of meta operands, with args and deopt operands, and
// gc operands all prefixed by their length and a type code. This would be
- // much more consistent.
+ // much more consistent.
public:
// These values are aboolute offsets into the operands of the statepoint
// instruction.
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index d5ff71cf9ac2..40540bd6e1ff 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -718,7 +718,7 @@ public:
/// always broken down into scalars in some contexts. This occurs even if the
/// vector type is legal.
virtual unsigned getVectorTypeBreakdownForCallingConv(
- LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const {
return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
RegisterVT);
@@ -1174,7 +1174,7 @@ public:
/// are legal for some operations and not for other operations.
/// For MIPS all vector types must be passed through the integer register set.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
- EVT VT) const {
+ CallingConv::ID CC, EVT VT) const {
return getRegisterType(Context, VT);
}
@@ -1182,6 +1182,7 @@ public:
/// this occurs when a vector type is used, as vector are passed through the
/// integer register set.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
return getNumRegisters(Context, VT);
}
@@ -3489,10 +3490,10 @@ public:
//
SDValue BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
bool IsAfterLegalization,
- std::vector<SDNode *> *Created) const;
+ SmallVectorImpl<SDNode *> &Created) const;
SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
bool IsAfterLegalization,
- std::vector<SDNode *> *Created) const;
+ SmallVectorImpl<SDNode *> &Created) const;
/// Targets may override this function to provide custom SDIV lowering for
/// power-of-2 denominators. If the target returns an empty SDValue, LLVM
@@ -3500,7 +3501,7 @@ public:
/// operations.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const;
+ SmallVectorImpl<SDNode *> &Created) const;
/// Indicate whether this target prefers to combine FDIVs with the same
/// divisor. If the transform should never be done, return zero. If the
@@ -3690,7 +3691,7 @@ private:
/// Given an LLVM IR type and return type attributes, compute the return value
/// EVTs and flags, and optionally also the offsets, if the return value is
/// being lowered to memory.
-void GetReturnInfo(Type *ReturnType, AttributeList attr,
+void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
const TargetLowering &TLI, const DataLayout &DL);
diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h
index 5918c524d11c..8f5c9cb8c3fa 100644
--- a/include/llvm/CodeGen/TargetPassConfig.h
+++ b/include/llvm/CodeGen/TargetPassConfig.h
@@ -16,7 +16,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
-#include <cassert>
+#include <cassert>
#include <string>
namespace llvm {
diff --git a/include/llvm/CodeGen/TargetRegisterInfo.h b/include/llvm/CodeGen/TargetRegisterInfo.h
index 538a5845466c..55a8ba630a59 100644
--- a/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -456,7 +456,7 @@ public:
/// stack frame offset. The first register is closest to the incoming stack
/// pointer if stack grows down, and vice versa.
/// Notice: This function does not take into account disabled CSRs.
- /// In most cases you will want to use instead the function
+ /// In most cases you will want to use instead the function
/// getCalleeSavedRegs that is implemented in MachineRegisterInfo.
virtual const MCPhysReg*
getCalleeSavedRegs(const MachineFunction *MF) const = 0;
@@ -518,7 +518,7 @@ public:
/// guaranteed to be restored before any uses. This is useful for targets that
/// have call sequences where a GOT register may be updated by the caller
/// prior to a call and is guaranteed to be restored (also by the caller)
- /// after the call.
+ /// after the call.
virtual bool isCallerPreservedPhysReg(unsigned PhysReg,
const MachineFunction &MF) const {
return false;
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def b/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def
index 41c538076798..b5f1cc0198dc 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def
+++ b/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def
@@ -143,7 +143,6 @@ CV_SYMBOL(S_MANSLOT , 0x1120)
CV_SYMBOL(S_MANMANYREG , 0x1121)
CV_SYMBOL(S_MANREGREL , 0x1122)
CV_SYMBOL(S_MANMANYREG2 , 0x1123)
-CV_SYMBOL(S_UNAMESPACE , 0x1124)
CV_SYMBOL(S_DATAREF , 0x1126)
CV_SYMBOL(S_ANNOTATIONREF , 0x1128)
CV_SYMBOL(S_TOKENREF , 0x1129)
@@ -255,6 +254,7 @@ SYMBOL_RECORD_ALIAS(S_GMANDATA , 0x111d, ManagedGlobalData, DataSym)
SYMBOL_RECORD(S_LTHREAD32 , 0x1112, ThreadLocalDataSym)
SYMBOL_RECORD_ALIAS(S_GTHREAD32 , 0x1113, GlobalTLS, ThreadLocalDataSym)
+SYMBOL_RECORD(S_UNAMESPACE , 0x1124, UsingNamespaceSym)
#undef CV_SYMBOL
#undef SYMBOL_RECORD
diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index cf267f23967b..93306824012e 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -942,6 +942,19 @@ public:
uint32_t RecordOffset;
};
+// S_UNAMESPACE
+class UsingNamespaceSym : public SymbolRecord {
+public:
+ explicit UsingNamespaceSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+ explicit UsingNamespaceSym(uint32_t RecordOffset)
+ : SymbolRecord(SymbolRecordKind::RegRelativeSym),
+ RecordOffset(RecordOffset) {}
+
+ StringRef Name;
+
+ uint32_t RecordOffset;
+};
+
// S_ANNOTATION
using CVSymbol = CVRecord<SymbolKind>;
diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h
index f89eb34fdd77..bbdd5e0d9c3f 100644
--- a/include/llvm/DebugInfo/DIContext.h
+++ b/include/llvm/DebugInfo/DIContext.h
@@ -154,6 +154,8 @@ enum DIDumpType : unsigned {
struct DIDumpOptions {
unsigned DumpType = DIDT_All;
unsigned RecurseDepth = -1U;
+ uint16_t Version = 0; // DWARF version to assume when extracting.
+ uint8_t AddrSize = 4; // Address byte size to assume when extracting.
bool ShowAddresses = true;
bool ShowChildren = false;
bool ShowParents = false;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h
index fe7430c9f04c..f5419fe02421 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -323,6 +323,10 @@ public:
/// have initialized the relevant target descriptions.
Error loadRegisterInfo(const object::ObjectFile &Obj);
+ /// Get address size from CUs.
+ /// TODO: refactor compile_units() to make this const.
+ uint8_t getCUAddrSize();
+
private:
/// Return the compile unit which contains instruction with provided
/// address.
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
index 10e146b70ec7..1ed087520b30 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
@@ -51,6 +51,8 @@ public:
/// reflect the absolute address of this pointer.
Optional<uint64_t> getEncodedPointer(uint32_t *Offset, uint8_t Encoding,
uint64_t AbsPosOffset = 0) const;
+
+ size_t size() const { return Section == nullptr ? 0 : Section->Data.size(); }
};
} // end namespace llvm
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
new file mode 100644
index 000000000000..ffbd1b06d1e2
--- /dev/null
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
@@ -0,0 +1,98 @@
+//===- DWARFDebugAddr.h -------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGADDR_H
+#define LLVM_DEBUGINFO_DWARFDEBUGADDR_H
+
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+class Error;
+class raw_ostream;
+
+/// A class representing an address table as specified in DWARF v5.
+/// The table consists of a header followed by an array of address values from
+/// .debug_addr section.
+class DWARFDebugAddrTable {
+public:
+ struct Header {
+ /// The total length of the entries for this table, not including the length
+ /// field itself.
+ uint32_t Length = 0;
+ /// The DWARF version number.
+ uint16_t Version = 5;
+ /// The size in bytes of an address on the target architecture. For
+ /// segmented addressing, this is the size of the offset portion of the
+ /// address.
+ uint8_t AddrSize;
+ /// The size in bytes of a segment selector on the target architecture.
+ /// If the target system uses a flat address space, this value is 0.
+ uint8_t SegSize = 0;
+ };
+
+private:
+ dwarf::DwarfFormat Format;
+ uint32_t HeaderOffset;
+ Header HeaderData;
+ uint32_t DataSize = 0;
+ std::vector<uint64_t> Addrs;
+
+public:
+ void clear();
+
+ /// Extract an entire table, including all addresses.
+ Error extract(DWARFDataExtractor Data, uint32_t *OffsetPtr,
+ uint16_t Version, uint8_t AddrSize,
+ std::function<void(Error)> WarnCallback);
+
+ uint32_t getHeaderOffset() const { return HeaderOffset; }
+ uint8_t getAddrSize() const { return HeaderData.AddrSize; }
+ void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const;
+
+ /// Return the address based on a given index.
+ Expected<uint64_t> getAddrEntry(uint32_t Index) const;
+
+ /// Return the size of the table header including the length
+ /// but not including the addresses.
+ uint8_t getHeaderSize() const {
+ switch (Format) {
+ case dwarf::DwarfFormat::DWARF32:
+ return 8; // 4 + 2 + 1 + 1
+ case dwarf::DwarfFormat::DWARF64:
+ return 16; // 12 + 2 + 1 + 1
+ }
+ llvm_unreachable("Invalid DWARF format (expected DWARF32 or DWARF64)");
+ }
+
+ /// Returns the length of this table, including the length field, or 0 if the
+ /// length has not been determined (e.g. because the table has not yet been
+ /// parsed, or there was a problem in parsing).
+ uint32_t getLength() const;
+
+ /// Verify that the given length is valid for this table.
+ bool hasValidLength() const { return getLength() != 0; }
+
+ /// Invalidate Length field to stop further processing.
+ void invalidateLength() { HeaderData.Length = 0; }
+
+ /// Returns the length of the array of addresses.
+ uint32_t getDataSize() const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_DWARFDEBUGADDR_H
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h
index 6e6b57cbcbd4..c77034f6348f 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -46,7 +46,7 @@ class DWARFDie {
public:
DWARFDie() = default;
- DWARFDie(DWARFUnit *Unit, const DWARFDebugInfoEntry * D) : U(Unit), Die(D) {}
+ DWARFDie(DWARFUnit *Unit, const DWARFDebugInfoEntry *D) : U(Unit), Die(D) {}
bool isValid() const { return U && Die; }
explicit operator bool() const { return isValid(); }
@@ -82,9 +82,7 @@ public:
}
/// Returns true for a valid DIE that terminates a sibling chain.
- bool isNULL() const {
- return getAbbreviationDeclarationPtr() == nullptr;
- }
+ bool isNULL() const { return getAbbreviationDeclarationPtr() == nullptr; }
/// Returns true if DIE represents a subprogram (not inlined).
bool isSubprogramDIE() const;
@@ -129,7 +127,6 @@ public:
void dump(raw_ostream &OS, unsigned indent = 0,
DIDumpOptions DumpOpts = DIDumpOptions()) const;
-
/// Convenience zero-argument overload for debugging.
LLVM_DUMP_METHOD void dump() const;
@@ -275,12 +272,16 @@ public:
iterator begin() const;
iterator end() const;
+
+ std::reverse_iterator<iterator> rbegin() const;
+ std::reverse_iterator<iterator> rend() const;
+
iterator_range<iterator> children() const;
};
-class DWARFDie::attribute_iterator :
- public iterator_facade_base<attribute_iterator, std::forward_iterator_tag,
- const DWARFAttribute> {
+class DWARFDie::attribute_iterator
+ : public iterator_facade_base<attribute_iterator, std::forward_iterator_tag,
+ const DWARFAttribute> {
/// The DWARF DIE we are extracting attributes from.
DWARFDie Die;
/// The value vended to clients via the operator*() or operator->().
@@ -288,6 +289,9 @@ class DWARFDie::attribute_iterator :
/// The attribute index within the abbreviation declaration in Die.
uint32_t Index;
+ friend bool operator==(const attribute_iterator &LHS,
+ const attribute_iterator &RHS);
+
/// Update the attribute index and attempt to read the attribute value. If the
/// attribute is able to be read, update AttrValue and the Index member
/// variable. If the attribute value is not able to be read, an appropriate
@@ -303,12 +307,21 @@ public:
attribute_iterator &operator--();
explicit operator bool() const { return AttrValue.isValid(); }
const DWARFAttribute &operator*() const { return AttrValue; }
- bool operator==(const attribute_iterator &X) const { return Index == X.Index; }
};
+inline bool operator==(const DWARFDie::attribute_iterator &LHS,
+ const DWARFDie::attribute_iterator &RHS) {
+ return LHS.Index == RHS.Index;
+}
+
+inline bool operator!=(const DWARFDie::attribute_iterator &LHS,
+ const DWARFDie::attribute_iterator &RHS) {
+ return !(LHS == RHS);
+}
+
inline bool operator==(const DWARFDie &LHS, const DWARFDie &RHS) {
return LHS.getDebugInfoEntry() == RHS.getDebugInfoEntry() &&
- LHS.getDwarfUnit() == RHS.getDwarfUnit();
+ LHS.getDwarfUnit() == RHS.getDwarfUnit();
}
inline bool operator!=(const DWARFDie &LHS, const DWARFDie &RHS) {
@@ -323,11 +336,15 @@ class DWARFDie::iterator
: public iterator_facade_base<iterator, std::bidirectional_iterator_tag,
const DWARFDie> {
DWARFDie Die;
+
+ friend std::reverse_iterator<llvm::DWARFDie::iterator>;
+ friend bool operator==(const DWARFDie::iterator &LHS,
+ const DWARFDie::iterator &RHS);
+
public:
iterator() = default;
- explicit iterator(DWARFDie D) : Die(D) {
- }
+ explicit iterator(DWARFDie D) : Die(D) {}
iterator &operator++() {
Die = Die.getSibling();
@@ -339,11 +356,19 @@ public:
return *this;
}
- explicit operator bool() const { return Die.isValid(); }
const DWARFDie &operator*() const { return Die; }
- bool operator==(const iterator &X) const { return Die == X.Die; }
};
+inline bool operator==(const DWARFDie::iterator &LHS,
+ const DWARFDie::iterator &RHS) {
+ return LHS.Die == RHS.Die;
+}
+
+inline bool operator!=(const DWARFDie::iterator &LHS,
+ const DWARFDie::iterator &RHS) {
+ return !(LHS == RHS);
+}
+
// These inline functions must follow the DWARFDie::iterator definition above
// as they use functions from that class.
inline DWARFDie::iterator DWARFDie::begin() const {
@@ -360,4 +385,80 @@ inline iterator_range<DWARFDie::iterator> DWARFDie::children() const {
} // end namespace llvm
+namespace std {
+
+template <>
+class reverse_iterator<llvm::DWARFDie::iterator>
+ : public llvm::iterator_facade_base<
+ reverse_iterator<llvm::DWARFDie::iterator>,
+ bidirectional_iterator_tag, const llvm::DWARFDie> {
+
+private:
+ llvm::DWARFDie Die;
+ bool AtEnd;
+
+public:
+ reverse_iterator(llvm::DWARFDie::iterator It)
+ : Die(It.Die), AtEnd(!It.Die.getPreviousSibling()) {
+ if (!AtEnd)
+ Die = Die.getPreviousSibling();
+ }
+
+ reverse_iterator<llvm::DWARFDie::iterator> &operator++() {
+ assert(!AtEnd && "Incrementing rend");
+ llvm::DWARFDie D = Die.getPreviousSibling();
+ if (D)
+ Die = D;
+ else
+ AtEnd = true;
+ return *this;
+ }
+
+ reverse_iterator<llvm::DWARFDie::iterator> &operator--() {
+ if (AtEnd) {
+ AtEnd = false;
+ return *this;
+ }
+ Die = Die.getSibling();
+ assert(!Die.isNULL() && "Decrementing rbegin");
+ return *this;
+ }
+
+ const llvm::DWARFDie &operator*() const {
+ assert(Die.isValid());
+ return Die;
+ }
+
+ // FIXME: We should be able to specify the equals operator as a friend, but
+ // that causes the compiler to think the operator overload is ambiguous
+ // with the friend declaration and the actual definition as candidates.
+ bool equals(const reverse_iterator<llvm::DWARFDie::iterator> &RHS) const {
+ return Die == RHS.Die && AtEnd == RHS.AtEnd;
+ }
+};
+
+} // namespace std
+
+namespace llvm {
+
+inline bool operator==(const std::reverse_iterator<DWARFDie::iterator> &LHS,
+ const std::reverse_iterator<DWARFDie::iterator> &RHS) {
+ return LHS.equals(RHS);
+}
+
+inline bool operator!=(const std::reverse_iterator<DWARFDie::iterator> &LHS,
+ const std::reverse_iterator<DWARFDie::iterator> &RHS) {
+ return !(LHS == RHS);
+}
+
+inline std::reverse_iterator<DWARFDie::iterator> DWARFDie::rbegin() const {
+ return llvm::make_reverse_iterator(end());
+}
+
+inline std::reverse_iterator<DWARFDie::iterator> DWARFDie::rend() const {
+ return llvm::make_reverse_iterator(begin());
+}
+
+} // end namespace llvm
+
#endif // LLVM_DEBUGINFO_DWARFDIE_H
diff --git a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
index 569c50602f3a..1e5f6ced597a 100644
--- a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
+++ b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
@@ -14,7 +14,10 @@
#include "llvm/Support/thread.h"
#include <map>
#include <mutex>
+#include <set>
#include <sstream>
+#include <string>
+#include <vector>
namespace llvm {
namespace orc {
@@ -205,6 +208,42 @@ std::mutex RPCTypeName<std::vector<T>>::NameMutex;
template <typename T>
std::string RPCTypeName<std::vector<T>>::Name;
+template <typename T> class RPCTypeName<std::set<T>> {
+public:
+ static const char *getName() {
+ std::lock_guard<std::mutex> Lock(NameMutex);
+ if (Name.empty())
+ raw_string_ostream(Name)
+ << "std::set<" << RPCTypeName<T>::getName() << ">";
+ return Name.data();
+ }
+
+private:
+ static std::mutex NameMutex;
+ static std::string Name;
+};
+
+template <typename T> std::mutex RPCTypeName<std::set<T>>::NameMutex;
+template <typename T> std::string RPCTypeName<std::set<T>>::Name;
+
+template <typename K, typename V> class RPCTypeName<std::map<K, V>> {
+public:
+ static const char *getName() {
+ std::lock_guard<std::mutex> Lock(NameMutex);
+ if (Name.empty())
+ raw_string_ostream(Name)
+ << "std::map<" << RPCTypeNameSequence<K, V>() << ">";
+ return Name.data();
+ }
+
+private:
+ static std::mutex NameMutex;
+ static std::string Name;
+};
+
+template <typename K, typename V>
+std::mutex RPCTypeName<std::map<K, V>>::NameMutex;
+template <typename K, typename V> std::string RPCTypeName<std::map<K, V>>::Name;
/// The SerializationTraits<ChannelT, T> class describes how to serialize and
/// deserialize an instance of type T to/from an abstract channel of type
@@ -527,15 +566,20 @@ public:
};
/// SerializationTraits default specialization for std::pair.
-template <typename ChannelT, typename T1, typename T2>
-class SerializationTraits<ChannelT, std::pair<T1, T2>> {
+template <typename ChannelT, typename T1, typename T2, typename T3, typename T4>
+class SerializationTraits<ChannelT, std::pair<T1, T2>, std::pair<T3, T4>> {
public:
- static Error serialize(ChannelT &C, const std::pair<T1, T2> &V) {
- return serializeSeq(C, V.first, V.second);
+ static Error serialize(ChannelT &C, const std::pair<T3, T4> &V) {
+ if (auto Err = SerializationTraits<ChannelT, T1, T3>::serialize(C, V.first))
+ return Err;
+ return SerializationTraits<ChannelT, T2, T4>::serialize(C, V.second);
}
- static Error deserialize(ChannelT &C, std::pair<T1, T2> &V) {
- return deserializeSeq(C, V.first, V.second);
+ static Error deserialize(ChannelT &C, std::pair<T3, T4> &V) {
+ if (auto Err =
+ SerializationTraits<ChannelT, T1, T3>::deserialize(C, V.first))
+ return Err;
+ return SerializationTraits<ChannelT, T2, T4>::deserialize(C, V.second);
}
};
@@ -589,6 +633,9 @@ public:
/// Deserialize a std::vector<T> to a std::vector<T>.
static Error deserialize(ChannelT &C, std::vector<T> &V) {
+ assert(V.empty() &&
+ "Expected default-constructed vector to deserialize into");
+
uint64_t Count = 0;
if (auto Err = deserializeSeq(C, Count))
return Err;
@@ -602,6 +649,92 @@ public:
}
};
+template <typename ChannelT, typename T, typename T2>
+class SerializationTraits<ChannelT, std::set<T>, std::set<T2>> {
+public:
+ /// Serialize a std::set<T> from std::set<T2>.
+ static Error serialize(ChannelT &C, const std::set<T2> &S) {
+ if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size())))
+ return Err;
+
+ for (const auto &E : S)
+ if (auto Err = SerializationTraits<ChannelT, T, T2>::serialize(C, E))
+ return Err;
+
+ return Error::success();
+ }
+
+ /// Deserialize a std::set<T> to a std::set<T>.
+ static Error deserialize(ChannelT &C, std::set<T2> &S) {
+ assert(S.empty() && "Expected default-constructed set to deserialize into");
+
+ uint64_t Count = 0;
+ if (auto Err = deserializeSeq(C, Count))
+ return Err;
+
+ while (Count-- != 0) {
+ T2 Val;
+ if (auto Err = SerializationTraits<ChannelT, T, T2>::deserialize(C, Val))
+ return Err;
+
+ auto Added = S.insert(Val).second;
+ if (!Added)
+ return make_error<StringError>("Duplicate element in deserialized set",
+ orcError(OrcErrorCode::UnknownORCError));
+ }
+
+ return Error::success();
+ }
+};
+
+template <typename ChannelT, typename K, typename V, typename K2, typename V2>
+class SerializationTraits<ChannelT, std::map<K, V>, std::map<K2, V2>> {
+public:
+ /// Serialize a std::map<K, V> from std::map<K2, V2>.
+ static Error serialize(ChannelT &C, const std::map<K2, V2> &M) {
+ if (auto Err = serializeSeq(C, static_cast<uint64_t>(M.size())))
+ return Err;
+
+ for (const auto &E : M) {
+ if (auto Err =
+ SerializationTraits<ChannelT, K, K2>::serialize(C, E.first))
+ return Err;
+ if (auto Err =
+ SerializationTraits<ChannelT, V, V2>::serialize(C, E.second))
+ return Err;
+ }
+
+ return Error::success();
+ }
+
+ /// Deserialize a std::map<K, V> to a std::map<K, V>.
+ static Error deserialize(ChannelT &C, std::map<K2, V2> &M) {
+ assert(M.empty() && "Expected default-constructed map to deserialize into");
+
+ uint64_t Count = 0;
+ if (auto Err = deserializeSeq(C, Count))
+ return Err;
+
+ while (Count-- != 0) {
+ std::pair<K2, V2> Val;
+ if (auto Err =
+ SerializationTraits<ChannelT, K, K2>::deserialize(C, Val.first))
+ return Err;
+
+ if (auto Err =
+ SerializationTraits<ChannelT, V, V2>::deserialize(C, Val.second))
+ return Err;
+
+ auto Added = M.insert(Val).second;
+ if (!Added)
+ return make_error<StringError>("Duplicate element in deserialized map",
+ orcError(OrcErrorCode::UnknownORCError));
+ }
+
+ return Error::success();
+ }
+};
+
} // end namespace rpc
} // end namespace orc
} // end namespace llvm
diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td
index 1019f867aab0..39978c41ac72 100644
--- a/include/llvm/IR/Attributes.td
+++ b/include/llvm/IR/Attributes.td
@@ -236,3 +236,4 @@ def : MergeRule<"adjustCallerSSPLevel">;
def : MergeRule<"adjustCallerStackProbes">;
def : MergeRule<"adjustCallerStackProbeSize">;
def : MergeRule<"adjustMinLegalVectorWidth">;
+def : MergeRule<"adjustNullPointerValidAttr">;
diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h
index a3bf25056ee5..643c2a0761d1 100644
--- a/include/llvm/IR/Instruction.h
+++ b/include/llvm/IR/Instruction.h
@@ -547,7 +547,7 @@ public:
/// may have side effects cannot be removed without semantically changing the
/// generated program.
bool isSafeToRemove() const;
-
+
/// Return true if the instruction is a variety of EH-block.
bool isEHPad() const {
switch (getOpcode()) {
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index a2cb84a071f2..9be8bd1a07bc 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -4016,7 +4016,7 @@ public:
void setDoesNotThrow() {
addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
}
-
+
/// Return the function called, or null if this is an
/// indirect function invocation.
///
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index 64455573ff19..0cec754dd649 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -541,7 +541,7 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
- def int_experimental_constrained_exp : Intrinsic<[ llvm_anyfloat_ty ],
+ def int_experimental_constrained_exp : Intrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index 8555db01645f..9f361410b9b8 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1191,7 +1191,7 @@ def int_amdgcn_ds_bpermute :
// Deep learning intrinsics.
//===----------------------------------------------------------------------===//
-// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c)
+// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_fdot2 :
GCCBuiltin<"__builtin_amdgcn_fdot2">,
@@ -1200,12 +1200,13 @@ def int_amdgcn_fdot2 :
[
llvm_v2f16_ty, // %a
llvm_v2f16_ty, // %b
- llvm_float_ty // %c
+ llvm_float_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
-// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c)
+// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_sdot2 :
GCCBuiltin<"__builtin_amdgcn_sdot2">,
@@ -1214,12 +1215,13 @@ def int_amdgcn_sdot2 :
[
llvm_v2i16_ty, // %a
llvm_v2i16_ty, // %b
- llvm_i32_ty // %c
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
-// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c)
+// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_udot2 :
GCCBuiltin<"__builtin_amdgcn_udot2">,
@@ -1228,12 +1230,13 @@ def int_amdgcn_udot2 :
[
llvm_v2i16_ty, // %a
llvm_v2i16_ty, // %b
- llvm_i32_ty // %c
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
-// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c)
+// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
def int_amdgcn_sdot4 :
GCCBuiltin<"__builtin_amdgcn_sdot4">,
@@ -1242,12 +1245,13 @@ def int_amdgcn_sdot4 :
[
llvm_i32_ty, // %a
llvm_i32_ty, // %b
- llvm_i32_ty // %c
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
-// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c)
+// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
def int_amdgcn_udot4 :
GCCBuiltin<"__builtin_amdgcn_udot4">,
@@ -1256,12 +1260,13 @@ def int_amdgcn_udot4 :
[
llvm_i32_ty, // %a
llvm_i32_ty, // %b
- llvm_i32_ty // %c
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
-// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c)
+// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
def int_amdgcn_sdot8 :
@@ -1271,12 +1276,13 @@ def int_amdgcn_sdot8 :
[
llvm_i32_ty, // %a
llvm_i32_ty, // %b
- llvm_i32_ty // %c
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
-// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c)
+// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
def int_amdgcn_udot8 :
@@ -1286,7 +1292,8 @@ def int_amdgcn_udot8 :
[
llvm_i32_ty, // %a
llvm_i32_ty, // %b
- llvm_i32_ty // %c
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
],
[IntrNoMem, IntrSpeculatable]
>;
diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td
index f25d2f1dbb5d..4e11f9c29dd0 100644
--- a/include/llvm/IR/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
@@ -275,7 +275,7 @@ def int_arm_stc : GCCBuiltin<"__builtin_arm_stc">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
def int_arm_stcl : GCCBuiltin<"__builtin_arm_stcl">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
-def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">,
+def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
def int_arm_stc2l : GCCBuiltin<"__builtin_arm_stc2l">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index c4e753af25ca..3433aaa402eb 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1,10 +1,10 @@
//===- IntrinsicsPowerPC.td - Defines PowerPC intrinsics ---*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines all of the PowerPC-specific intrinsics.
@@ -122,21 +122,21 @@ class PowerPC_Vec_FFF_Intrinsic<string GCCIntSuffix>
/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16i8
/// vectors and returns one. These intrinsics have no side effects.
-class PowerPC_Vec_BBB_Intrinsic<string GCCIntSuffix>
+class PowerPC_Vec_BBB_Intrinsic<string GCCIntSuffix>
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
/// PowerPC_Vec_HHH_Intrinsic - A PowerPC intrinsic that takes two v8i16
/// vectors and returns one. These intrinsics have no side effects.
-class PowerPC_Vec_HHH_Intrinsic<string GCCIntSuffix>
+class PowerPC_Vec_HHH_Intrinsic<string GCCIntSuffix>
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
/// PowerPC_Vec_WWW_Intrinsic - A PowerPC intrinsic that takes two v4i32
/// vectors and returns one. These intrinsics have no side effects.
-class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix>
+class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix>
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
@@ -267,7 +267,7 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vcmpgtud : GCCBuiltin<"__builtin_altivec_vcmpgtud">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
-
+
def int_ppc_altivec_vcmpequw : GCCBuiltin<"__builtin_altivec_vcmpequw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
@@ -283,7 +283,7 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vcmpnezw : GCCBuiltin<"__builtin_altivec_vcmpnezw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-
+
def int_ppc_altivec_vcmpequh : GCCBuiltin<"__builtin_altivec_vcmpequh">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
@@ -355,7 +355,7 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vcmpnezw_p : GCCBuiltin<"__builtin_altivec_vcmpnezw_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
[IntrNoMem]>;
-
+
def int_ppc_altivec_vcmpequh_p : GCCBuiltin<"__builtin_altivec_vcmpequh_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
[IntrNoMem]>;
@@ -474,10 +474,10 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumshs : GCCBuiltin<"__builtin_altivec_vmsumshs">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumubm : GCCBuiltin<"__builtin_altivec_vmsumubm">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
@@ -544,7 +544,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
// Other multiplies.
def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
// Packs.
@@ -626,21 +626,21 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
// Add Extended Quadword
def int_ppc_altivec_vaddeuqm : GCCBuiltin<"__builtin_altivec_vaddeuqm">,
- Intrinsic<[llvm_v1i128_ty],
+ Intrinsic<[llvm_v1i128_ty],
[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
def int_ppc_altivec_vaddecuq : GCCBuiltin<"__builtin_altivec_vaddecuq">,
- Intrinsic<[llvm_v1i128_ty],
+ Intrinsic<[llvm_v1i128_ty],
[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
// Sub Extended Quadword
def int_ppc_altivec_vsubeuqm : GCCBuiltin<"__builtin_altivec_vsubeuqm">,
- Intrinsic<[llvm_v1i128_ty],
+ Intrinsic<[llvm_v1i128_ty],
[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
def int_ppc_altivec_vsubecuq : GCCBuiltin<"__builtin_altivec_vsubecuq">,
- Intrinsic<[llvm_v1i128_ty],
+ Intrinsic<[llvm_v1i128_ty],
[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
}
@@ -657,7 +657,7 @@ def int_ppc_altivec_vslw : PowerPC_Vec_WWW_Intrinsic<"vslw">;
// Right Shifts.
def int_ppc_altivec_vsr : PowerPC_Vec_WWW_Intrinsic<"vsr">;
def int_ppc_altivec_vsro : PowerPC_Vec_WWW_Intrinsic<"vsro">;
-
+
def int_ppc_altivec_vsrb : PowerPC_Vec_BBB_Intrinsic<"vsrb">;
def int_ppc_altivec_vsrh : PowerPC_Vec_HHH_Intrinsic<"vsrh">;
def int_ppc_altivec_vsrw : PowerPC_Vec_WWW_Intrinsic<"vsrw">;
@@ -679,10 +679,10 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrNoMem]>;
def int_ppc_altivec_vperm : GCCBuiltin<"__builtin_altivec_vperm_4si">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_altivec_vsel : GCCBuiltin<"__builtin_altivec_vsel_4si">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vgbbd : GCCBuiltin<"__builtin_altivec_vgbbd">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h
index 90036c6ce248..f6752f2817ba 100644
--- a/include/llvm/IR/LegacyPassManagers.h
+++ b/include/llvm/IR/LegacyPassManagers.h
@@ -285,7 +285,7 @@ private:
SpecificBumpPtrAllocator<AUFoldingSetNode> AUFoldingSetNodeAllocator;
// Maps from a pass to it's associated entry in UniqueAnalysisUsages. Does
- // not own the storage associated with either key or value..
+ // not own the storage associated with either key or value..
DenseMap<Pass *, AnalysisUsage*> AnUsageMap;
/// Collection of PassInfo objects found via analysis IDs and in this top
diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h
index c8e905b21a30..8908e1b0d090 100644
--- a/include/llvm/IR/Statepoint.h
+++ b/include/llvm/IR/Statepoint.h
@@ -325,7 +325,7 @@ public:
explicit Statepoint(CallSite CS) : Base(CS) {}
};
-/// Common base class for representing values projected from a statepoint.
+/// Common base class for representing values projected from a statepoint.
/// Currently, the only projections available are gc.result and gc.relocate.
class GCProjectionInst : public IntrinsicInst {
public:
diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h
index d6a603ce845d..aea31467f2fa 100644
--- a/include/llvm/IR/User.h
+++ b/include/llvm/IR/User.h
@@ -101,10 +101,10 @@ public:
void operator delete(void *Usr);
/// Placement delete - required by std, called if the ctor throws.
void operator delete(void *Usr, unsigned) {
- // Note: If a subclass manipulates the information which is required to calculate the
- // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has
+ // Note: If a subclass manipulates the information which is required to calculate the
+ // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has
// to restore the changed information to the original value, since the dtor of that class
- // is not called if the ctor fails.
+ // is not called if the ctor fails.
User::operator delete(Usr);
#ifndef LLVM_ENABLE_EXCEPTIONS
@@ -113,10 +113,10 @@ public:
}
/// Placement delete - required by std, called if the ctor throws.
void operator delete(void *Usr, unsigned, bool) {
- // Note: If a subclass manipulates the information which is required to calculate the
- // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has
+ // Note: If a subclass manipulates the information which is required to calculate the
+ // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has
// to restore the changed information to the original value, since the dtor of that class
- // is not called if the ctor fails.
+ // is not called if the ctor fails.
User::operator delete(Usr);
#ifndef LLVM_ENABLE_EXCEPTIONS
diff --git a/include/llvm/LinkAllIR.h b/include/llvm/LinkAllIR.h
index 9a9f3d3a677f..4f4af7187be4 100644
--- a/include/llvm/LinkAllIR.h
+++ b/include/llvm/LinkAllIR.h
@@ -44,7 +44,7 @@ namespace {
llvm::LLVMContext Context;
(void)new llvm::Module("", Context);
(void)new llvm::UnreachableInst(Context);
- (void) llvm::createVerifierPass();
+ (void) llvm::createVerifierPass();
}
} ForceVMCoreLinking;
}
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 785f42d2f9d7..2bfaf19cf2c6 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -362,6 +362,13 @@ public:
static void Encode(MCContext &Context, MCDwarfLineTableParams Params,
int64_t LineDelta, uint64_t AddrDelta, raw_ostream &OS);
+ /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas using
+ /// fixed length operands.
+ static bool FixedEncode(MCContext &Context,
+ MCDwarfLineTableParams Params,
+ int64_t LineDelta, uint64_t AddrDelta,
+ raw_ostream &OS, uint32_t *Offset, uint32_t *Size);
+
/// Utility function to emit the encoding to a streamer.
static void Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
int64_t LineDelta, uint64_t AddrDelta);
diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h
index 47b35175fec8..c999c9fc4f17 100644
--- a/include/llvm/MC/MCFragment.h
+++ b/include/llvm/MC/MCFragment.h
@@ -149,6 +149,7 @@ public:
case MCFragment::FT_Relaxable:
case MCFragment::FT_CompactEncodedInst:
case MCFragment::FT_Data:
+ case MCFragment::FT_Dwarf:
return true;
}
}
@@ -232,7 +233,7 @@ public:
static bool classof(const MCFragment *F) {
MCFragment::FragmentType Kind = F->getKind();
return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data ||
- Kind == MCFragment::FT_CVDefRange;
+ Kind == MCFragment::FT_CVDefRange || Kind == MCFragment::FT_Dwarf;;
}
};
@@ -514,7 +515,7 @@ public:
}
};
-class MCDwarfLineAddrFragment : public MCFragment {
+class MCDwarfLineAddrFragment : public MCEncodedFragmentWithFixups<8, 1> {
/// LineDelta - the value of the difference between the two line numbers
/// between two .loc dwarf directives.
int64_t LineDelta;
@@ -523,15 +524,11 @@ class MCDwarfLineAddrFragment : public MCFragment {
/// make up the address delta between two .loc dwarf directives.
const MCExpr *AddrDelta;
- SmallString<8> Contents;
-
public:
MCDwarfLineAddrFragment(int64_t LineDelta, const MCExpr &AddrDelta,
MCSection *Sec = nullptr)
- : MCFragment(FT_Dwarf, false, Sec), LineDelta(LineDelta),
- AddrDelta(&AddrDelta) {
- Contents.push_back(0);
- }
+ : MCEncodedFragmentWithFixups<8, 1>(FT_Dwarf, false, Sec),
+ LineDelta(LineDelta), AddrDelta(&AddrDelta) {}
/// \name Accessors
/// @{
@@ -540,9 +537,6 @@ public:
const MCExpr &getAddrDelta() const { return *AddrDelta; }
- SmallString<8> &getContents() { return Contents; }
- const SmallString<8> &getContents() const { return Contents; }
-
/// @}
static bool classof(const MCFragment *F) {
diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h
index 484f03b4d854..e1673208d875 100644
--- a/include/llvm/MC/MCInstrAnalysis.h
+++ b/include/llvm/MC/MCInstrAnalysis.h
@@ -64,7 +64,7 @@ public:
/// Returns true if at least one of the register writes performed by
/// \param Inst implicitly clears the upper portion of all super-registers.
- ///
+ ///
/// Example: on X86-64, a write to EAX implicitly clears the upper half of
/// RAX. Also (still on x86) an XMM write perfomed by an AVX 128-bit
/// instruction implicitly clears the upper portion of the correspondent
@@ -87,6 +87,19 @@ public:
const MCInst &Inst,
APInt &Writes) const;
+ /// Returns true if \param Inst is a dependency breaking instruction for the
+ /// given subtarget.
+ ///
+ /// The value computed by a dependency breaking instruction is not dependent
+ /// on the inputs. An example of dependency breaking instruction on X86 is
+ /// `XOR %eax, %eax`.
+ /// TODO: In future, we could implement an alternative approach where this
+ /// method returns `true` if the input instruction is not dependent on
+ /// some/all of its input operands. An APInt mask could then be used to
+ /// identify independent operands.
+ virtual bool isDependencyBreaking(const MCSubtargetInfo &STI,
+ const MCInst &Inst) const;
+
/// Given a branch instruction try to get the address the branch
/// targets. Return true on success, and the address in Target.
virtual bool
diff --git a/include/llvm/MC/MCParser/AsmCond.h b/include/llvm/MC/MCParser/AsmCond.h
index 8e7bfc521556..a6e0fbd7f337 100644
--- a/include/llvm/MC/MCParser/AsmCond.h
+++ b/include/llvm/MC/MCParser/AsmCond.h
@@ -15,7 +15,7 @@ namespace llvm {
/// AsmCond - Class to support conditional assembly
///
/// The conditional assembly feature (.if, .else, .elseif and .endif) is
-/// implemented with AsmCond that tells us what we are in the middle of
+/// implemented with AsmCond that tells us what we are in the middle of
/// processing. Ignore can be either true or false. When true we are ignoring
/// the block of code in the middle of a conditional.
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 0a5d80c6d778..e4d0dc03b87c 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -297,8 +297,8 @@ public:
/// If the comment includes embedded \n's, they will each get the comment
/// prefix as appropriate. The added comment should not end with a \n.
/// By default, each comment is terminated with an end of line, i.e. the
- /// EOL param is set to true by default. If one prefers not to end the
- /// comment with a new line then the EOL param should be passed
+ /// EOL param is set to true by default. If one prefers not to end the
+ /// comment with a new line then the EOL param should be passed
/// with a false value.
virtual void AddComment(const Twine &T, bool EOL = true) {}
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index 531b3d249035..159c1765ab86 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -333,7 +333,7 @@ public:
relocation_iterator locrel_begin() const;
relocation_iterator locrel_end() const;
-
+
void moveRelocationNext(DataRefImpl &Rel) const override;
uint64_t getRelocationOffset(DataRefImpl Rel) const override;
symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h
index 118718747659..a075eb557472 100644
--- a/include/llvm/PassAnalysisSupport.h
+++ b/include/llvm/PassAnalysisSupport.h
@@ -231,7 +231,7 @@ AnalysisType &Pass::getAnalysisID(AnalysisID PI) const {
// should be a small number, we just do a linear search over a (dense)
// vector.
Pass *ResultPass = Resolver->findImplPass(PI);
- assert(ResultPass &&
+ assert(ResultPass &&
"getAnalysis*() called on an analysis that was not "
"'required' by pass!");
diff --git a/include/llvm/PassRegistry.h b/include/llvm/PassRegistry.h
index 93edc12bdc7b..57462138c5ae 100644
--- a/include/llvm/PassRegistry.h
+++ b/include/llvm/PassRegistry.h
@@ -9,7 +9,7 @@
//
// This file defines PassRegistry, a class that is used in the initialization
// and registration of passes. At application startup, passes are registered
-// with the PassRegistry, which is later provided to the PassManager for
+// with the PassRegistry, which is later provided to the PassManager for
// dependency resolution and similar tasks.
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h
index 1ca56dcaf9c5..ecb284d30de0 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -207,7 +207,7 @@ struct CounterMappingRegion {
/// A CodeRegion associates some code with a counter
CodeRegion,
- /// An ExpansionRegion represents a file expansion region that associates
+ /// An ExpansionRegion represents a file expansion region that associates
/// a source range with the expansion of a virtual source file, such as
/// for a macro instantiation or #include file.
ExpansionRegion,
diff --git a/include/llvm/Support/ARMBuildAttributes.h b/include/llvm/Support/ARMBuildAttributes.h
index 6c83e447cb24..b8a03765a7c0 100644
--- a/include/llvm/Support/ARMBuildAttributes.h
+++ b/include/llvm/Support/ARMBuildAttributes.h
@@ -213,6 +213,8 @@ enum {
// Tag_ABI_VFP_args, (=28), uleb128
BaseAAPCS = 0,
HardFPAAPCS = 1,
+ ToolChainFPPCS = 2,
+ CompatibleFPAAPCS = 3,
// Tag_FP_HP_extension, (=36), uleb128
AllowHPFP = 1, // Allow use of Half Precision FP
diff --git a/include/llvm/Support/DataExtractor.h b/include/llvm/Support/DataExtractor.h
index 3a6ada6c77df..2b1639856e79 100644
--- a/include/llvm/Support/DataExtractor.h
+++ b/include/llvm/Support/DataExtractor.h
@@ -15,7 +15,7 @@
namespace llvm {
-/// An auxiliary type to facilitate extraction of 3-byte entities.
+/// An auxiliary type to facilitate extraction of 3-byte entities.
struct Uint24 {
uint8_t Bytes[3];
Uint24(uint8_t U) {
diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h
index 115abc23e2c6..c716e4a4d300 100644
--- a/include/llvm/Support/GenericDomTree.h
+++ b/include/llvm/Support/GenericDomTree.h
@@ -530,11 +530,10 @@ protected:
/// CFG about its children and inverse children. This implies that deletions
/// of CFG edges must not delete the CFG nodes before calling this function.
///
- /// Batch updates should be generally faster when performing longer sequences
- /// of updates than calling insertEdge/deleteEdge manually multiple times, as
- /// it can reorder the updates and remove redundant ones internally.
- /// The batch updater is also able to detect sequences of zero and exactly one
- /// update -- it's optimized to do less work in these cases.
+ /// The applyUpdates function can reorder the updates and remove redundant
+ /// ones internally. The batch updater is also able to detect sequences of
+ /// zero and exactly one update -- it's optimized to do less work in these
+ /// cases.
///
/// Note that for postdominators it automatically takes care of applying
/// updates on reverse edges internally (so there's no need to swap the
@@ -854,10 +853,15 @@ protected:
assert(isReachableFromEntry(B));
assert(isReachableFromEntry(A));
+ const unsigned ALevel = A->getLevel();
const DomTreeNodeBase<NodeT> *IDom;
- while ((IDom = B->getIDom()) != nullptr && IDom != A && IDom != B)
+
+ // Don't walk nodes above A's subtree. When we reach A's level, we must
+ // either find A or be in some other subtree not dominated by A.
+ while ((IDom = B->getIDom()) != nullptr && IDom->getLevel() >= ALevel)
B = IDom; // Walk up the tree
- return IDom != nullptr;
+
+ return B == A;
}
/// Wipe this tree's state without releasing any resources.
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index 535579ecff53..8933295d4ea4 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -43,7 +43,6 @@ class MemoryBuffer {
const char *BufferStart; // Start of the buffer.
const char *BufferEnd; // End of the buffer.
-
protected:
MemoryBuffer() = default;
@@ -148,9 +147,6 @@ public:
virtual BufferKind getBufferKind() const = 0;
MemoryBufferRef getMemBufferRef() const;
-
-private:
- virtual void anchor();
};
/// This class is an extension of MemoryBuffer, which allows copy-on-write
diff --git a/include/llvm/Support/SmallVectorMemoryBuffer.h b/include/llvm/Support/SmallVectorMemoryBuffer.h
index f43c2fb8f826..c4a600e7f37d 100644
--- a/include/llvm/Support/SmallVectorMemoryBuffer.h
+++ b/include/llvm/Support/SmallVectorMemoryBuffer.h
@@ -49,6 +49,9 @@ public:
init(this->SV.begin(), this->SV.end(), false);
}
+ // Key function.
+ ~SmallVectorMemoryBuffer() override;
+
StringRef getBufferIdentifier() const override { return BufferName; }
BufferKind getBufferKind() const override { return MemoryBuffer_Malloc; }
@@ -56,7 +59,6 @@ public:
private:
SmallVector<char, 0> SV;
std::string BufferName;
- void anchor() override;
};
} // namespace llvm
diff --git a/include/llvm/Support/TargetOpcodes.def b/include/llvm/Support/TargetOpcodes.def
index 21f5c7e709b8..63491a5f01d2 100644
--- a/include/llvm/Support/TargetOpcodes.def
+++ b/include/llvm/Support/TargetOpcodes.def
@@ -470,12 +470,15 @@ HANDLE_TARGET_OPCODE(G_BSWAP)
/// Generic AddressSpaceCast.
HANDLE_TARGET_OPCODE(G_ADDRSPACE_CAST)
+/// Generic block address
+HANDLE_TARGET_OPCODE(G_BLOCK_ADDR)
+
// TODO: Add more generic opcodes as we move along.
/// Marker for the end of the generic opcode.
/// This is used to check if an opcode is in the range of the
/// generic opcodes.
-HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_ADDRSPACE_CAST)
+HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_BLOCK_ADDR)
/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific post-isel opcode values start here.
diff --git a/include/llvm/Support/xxhash.h b/include/llvm/Support/xxhash.h
index f7ca460188a2..6fd67ff9ce1c 100644
--- a/include/llvm/Support/xxhash.h
+++ b/include/llvm/Support/xxhash.h
@@ -38,10 +38,12 @@
#ifndef LLVM_SUPPORT_XXHASH_H
#define LLVM_SUPPORT_XXHASH_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
namespace llvm {
uint64_t xxHash64(llvm::StringRef Data);
+uint64_t xxHash64(llvm::ArrayRef<uint8_t> Data);
}
#endif
diff --git a/include/llvm/Target/GenericOpcodes.td b/include/llvm/Target/GenericOpcodes.td
index d72746a0838a..79cc1e4d9eee 100644
--- a/include/llvm/Target/GenericOpcodes.td
+++ b/include/llvm/Target/GenericOpcodes.td
@@ -131,6 +131,13 @@ def G_ADDRSPACE_CAST : GenericInstruction {
let InOperandList = (ins type1:$src);
let hasSideEffects = 0;
}
+
+def G_BLOCK_ADDR : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins unknown:$ba);
+ let hasSideEffects = 0;
+}
+
//------------------------------------------------------------------------------
// Binary ops.
//------------------------------------------------------------------------------
diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td
index 3d8639dfe1da..95d2b4226294 100644
--- a/include/llvm/Target/TargetCallingConv.td
+++ b/include/llvm/Target/TargetCallingConv.td
@@ -1,10 +1,10 @@
//===- TargetCallingConv.td - Target Calling Conventions ---*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the target-independent interfaces with which targets
diff --git a/include/llvm/Target/TargetInstrPredicate.td b/include/llvm/Target/TargetInstrPredicate.td
index d38279b0d65e..8d57cae02d22 100644
--- a/include/llvm/Target/TargetInstrPredicate.td
+++ b/include/llvm/Target/TargetInstrPredicate.td
@@ -13,7 +13,7 @@
// an instruction. Each MCInstPredicate class has a well-known semantic, and it
// is used by a PredicateExpander to generate code for MachineInstr and/or
// MCInst.
-//
+//
// MCInstPredicate definitions can be used to construct MCSchedPredicate
// definitions. An MCSchedPredicate can be used in place of a SchedPredicate
// when defining SchedReadVariant and SchedWriteVariant used by a processor
@@ -63,7 +63,7 @@
//
// New MCInstPredicate classes must be added to this file. For each new class
// XYZ, an "expandXYZ" method must be added to the PredicateExpander.
-//
+//
//===----------------------------------------------------------------------===//
// Forward declarations.
diff --git a/include/llvm/Transforms/Scalar/SpeculativeExecution.h b/include/llvm/Transforms/Scalar/SpeculativeExecution.h
index 068f81776a03..d00e950222a0 100644
--- a/include/llvm/Transforms/Scalar/SpeculativeExecution.h
+++ b/include/llvm/Transforms/Scalar/SpeculativeExecution.h
@@ -82,7 +82,7 @@ private:
bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock);
// If true, this pass is a nop unless the target architecture has branch
- // divergence.
+ // divergence.
const bool OnlyIfDivergentTarget = false;
TargetTransformInfo *TTI = nullptr;
diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h
index fab8334d4c66..0e5254acb0d3 100644
--- a/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -74,7 +74,7 @@ class Value;
/// vararg functions can be extracted. This is safe, if all vararg handling
/// code is extracted, including vastart. If AllowAlloca is true, then
/// extraction of blocks containing alloca instructions would be possible,
- /// however code extractor won't validate whether extraction is legal.
+ /// however code extractor won't validate whether extraction is legal.
CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
BranchProbabilityInfo *BPI = nullptr,
diff --git a/include/llvm/Transforms/Utils/FunctionComparator.h b/include/llvm/Transforms/Utils/FunctionComparator.h
index 7698a068717a..35ba0950343c 100644
--- a/include/llvm/Transforms/Utils/FunctionComparator.h
+++ b/include/llvm/Transforms/Utils/FunctionComparator.h
@@ -18,7 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Support/AtomicOrdering.h"
diff --git a/include/llvm/Transforms/Utils/SymbolRewriter.h b/include/llvm/Transforms/Utils/SymbolRewriter.h
index e0caf7741ff3..5f6488e08b5a 100644
--- a/include/llvm/Transforms/Utils/SymbolRewriter.h
+++ b/include/llvm/Transforms/Utils/SymbolRewriter.h
@@ -134,7 +134,7 @@ public:
private:
void loadAndParseMapFiles();
- SymbolRewriter::RewriteDescriptorList Descriptors;
+ SymbolRewriter::RewriteDescriptorList Descriptors;
};
} // end namespace llvm
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 8aee81b1f1d8..8f903fa4f1e8 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -142,7 +142,7 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
Alias = SetMayAlias;
AST.TotalMayAliasSetSize += size();
} else {
- // First entry of must alias must have maximum size!
+ // First entry of must alias must have maximum size!
P->updateSizeAndAAInfo(Size, AAInfo);
}
assert(Result != NoAlias && "Cannot be part of must set!");
@@ -251,9 +251,9 @@ void AliasSetTracker::clear() {
for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
I != E; ++I)
I->second->eraseFromList();
-
+
PointerMap.clear();
-
+
// The alias sets should all be clear now.
AliasSets.clear();
}
@@ -269,7 +269,7 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
for (iterator I = begin(), E = end(); I != E;) {
iterator Cur = I++;
if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue;
-
+
if (!FoundSet) { // If this is the first alias set ptr can go into.
FoundSet = &*Cur; // Remember it.
} else { // Otherwise, we must merge the sets.
@@ -336,13 +336,13 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer,
// Return the set!
return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
}
-
+
if (AliasSet *AS = mergeAliasSetsForPointer(Pointer, Size, AAInfo)) {
// Add it to the alias set it aliases.
AS->addPointer(*this, Entry, Size, AAInfo);
return *AS;
}
-
+
// Otherwise create a new alias set to hold the loaded pointer.
AliasSets.push_back(new AliasSet());
AliasSets.back().addPointer(*this, Entry, Size, AAInfo);
@@ -526,10 +526,10 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
AS->SetSize--;
TotalMayAliasSetSize--;
}
-
+
// Stop using the alias set.
AS->dropRef(*this);
-
+
PointerMap.erase(I);
}
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 96326347b712..1a24ae3dba15 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/PhiValues.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
@@ -93,7 +94,8 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
// depend on them.
if (Inv.invalidate<AssumptionAnalysis>(Fn, PA) ||
(DT && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)) ||
- (LI && Inv.invalidate<LoopAnalysis>(Fn, PA)))
+ (LI && Inv.invalidate<LoopAnalysis>(Fn, PA)) ||
+ (PV && Inv.invalidate<PhiValuesAnalysis>(Fn, PA)))
return true;
// Otherwise this analysis result remains valid.
@@ -1527,34 +1529,70 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
return Alias;
}
- SmallPtrSet<Value *, 4> UniqueSrc;
SmallVector<Value *, 4> V1Srcs;
bool isRecursive = false;
- for (Value *PV1 : PN->incoming_values()) {
- if (isa<PHINode>(PV1))
- // If any of the source itself is a PHI, return MayAlias conservatively
- // to avoid compile time explosion. The worst possible case is if both
- // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
- // and 'n' are the number of PHI sources.
+ if (PV) {
+ // If we have PhiValues then use it to get the underlying phi values.
+ const PhiValues::ValueSet &PhiValueSet = PV->getValuesForPhi(PN);
+ // If we have more phi values than the search depth then return MayAlias
+ // conservatively to avoid compile time explosion. The worst possible case
+ // is if both sides are PHI nodes. In which case, this is O(m x n) time
+ // where 'm' and 'n' are the number of PHI sources.
+ if (PhiValueSet.size() > MaxLookupSearchDepth)
return MayAlias;
-
- if (EnableRecPhiAnalysis)
- if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
- // Check whether the incoming value is a GEP that advances the pointer
- // result of this PHI node (e.g. in a loop). If this is the case, we
- // would recurse and always get a MayAlias. Handle this case specially
- // below.
- if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
- isa<ConstantInt>(PV1GEP->idx_begin())) {
- isRecursive = true;
- continue;
+ // Add the values to V1Srcs
+ for (Value *PV1 : PhiValueSet) {
+ if (EnableRecPhiAnalysis) {
+ if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
+ // Check whether the incoming value is a GEP that advances the pointer
+ // result of this PHI node (e.g. in a loop). If this is the case, we
+ // would recurse and always get a MayAlias. Handle this case specially
+ // below.
+ if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
+ isa<ConstantInt>(PV1GEP->idx_begin())) {
+ isRecursive = true;
+ continue;
+ }
}
}
-
- if (UniqueSrc.insert(PV1).second)
V1Srcs.push_back(PV1);
+ }
+ } else {
+ // If we don't have PhiInfo then just look at the operands of the phi itself
+ // FIXME: Remove this once we can guarantee that we have PhiInfo always
+ SmallPtrSet<Value *, 4> UniqueSrc;
+ for (Value *PV1 : PN->incoming_values()) {
+ if (isa<PHINode>(PV1))
+ // If any of the source itself is a PHI, return MayAlias conservatively
+ // to avoid compile time explosion. The worst possible case is if both
+ // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
+ // and 'n' are the number of PHI sources.
+ return MayAlias;
+
+ if (EnableRecPhiAnalysis)
+ if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
+ // Check whether the incoming value is a GEP that advances the pointer
+ // result of this PHI node (e.g. in a loop). If this is the case, we
+ // would recurse and always get a MayAlias. Handle this case specially
+ // below.
+ if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
+ isa<ConstantInt>(PV1GEP->idx_begin())) {
+ isRecursive = true;
+ continue;
+ }
+ }
+
+ if (UniqueSrc.insert(PV1).second)
+ V1Srcs.push_back(PV1);
+ }
}
+ // If V1Srcs is empty then that means that the phi has no underlying non-phi
+ // value. This should only be possible in blocks unreachable from the entry
+ // block, but return MayAlias just in case.
+ if (V1Srcs.empty())
+ return MayAlias;
+
// If this PHI node is recursive, set the size of the accessed memory to
// unknown to represent all the possible values the GEP could advance the
// pointer to.
@@ -1879,7 +1917,8 @@ BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) {
AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<AssumptionAnalysis>(F),
&AM.getResult<DominatorTreeAnalysis>(F),
- AM.getCachedResult<LoopAnalysis>(F));
+ AM.getCachedResult<LoopAnalysis>(F),
+ AM.getCachedResult<PhiValuesAnalysis>(F));
}
BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) {
@@ -1891,12 +1930,12 @@ char BasicAAWrapperPass::ID = 0;
void BasicAAWrapperPass::anchor() {}
INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa",
- "Basic Alias Analysis (stateless AA impl)", true, true)
+ "Basic Alias Analysis (stateless AA impl)", false, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa",
- "Basic Alias Analysis (stateless AA impl)", true, true)
+ "Basic Alias Analysis (stateless AA impl)", false, true)
FunctionPass *llvm::createBasicAAWrapperPass() {
return new BasicAAWrapperPass();
@@ -1907,10 +1946,12 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) {
auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
auto &DTWP = getAnalysis<DominatorTreeWrapperPass>();
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *PVWP = getAnalysisIfAvailable<PhiValuesWrapperPass>();
Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, TLIWP.getTLI(),
ACT.getAssumptionCache(F), &DTWP.getDomTree(),
- LIWP ? &LIWP->getLoopInfo() : nullptr));
+ LIWP ? &LIWP->getLoopInfo() : nullptr,
+ PVWP ? &PVWP->getResult() : nullptr));
return false;
}
@@ -1920,6 +1961,7 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addUsedIfAvailable<PhiValuesWrapperPass>();
}
BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) {
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index fc25cef8ddca..5b170dfa7903 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -124,7 +124,7 @@ namespace {
}
char CFGPrinterLegacyPass::ID = 0;
-INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file",
+INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file",
false, true)
PreservedAnalyses CFGPrinterPass::run(Function &F,
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index 7d5d2d2e4496..cbdf5f63c557 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -166,7 +166,7 @@ void CallGraphNode::print(raw_ostream &OS) const {
OS << "Call graph node for function: '" << F->getName() << "'";
else
OS << "Call graph node <<null function>>";
-
+
OS << "<<" << this << ">> #uses=" << getNumReferences() << '\n';
for (const auto &I : *this) {
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index f2211edba216..4c33c420b65d 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -41,7 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "cgscc-passmgr"
-static cl::opt<unsigned>
+static cl::opt<unsigned>
MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC");
@@ -97,13 +97,13 @@ public:
}
PassManagerType getPassManagerType() const override {
- return PMT_CallGraphPassManager;
+ return PMT_CallGraphPassManager;
}
-
+
private:
bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
bool &DevirtualizedCall);
-
+
bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
CallGraph &CG, bool &CallGraphUpToDate,
bool &DevirtualizedCall);
@@ -142,21 +142,21 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
if (EmitICRemark)
emitInstrCountChangedRemark(P, M, InstrCount);
}
-
+
// After the CGSCCPass is done, when assertions are enabled, use
// RefreshCallGraph to verify that the callgraph was correctly updated.
#ifndef NDEBUG
if (Changed)
RefreshCallGraph(CurSCC, CG, true);
#endif
-
+
return Changed;
}
-
+
assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
"Invalid CGPassManager member");
FPPassManager *FPP = (FPPassManager*)P;
-
+
// Run pass P on all functions in the current SCC.
for (CallGraphNode *CGN : CurSCC) {
if (Function *F = CGN->getFunction()) {
@@ -168,7 +168,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
F->getContext().yield();
}
}
-
+
// The function pass(es) modified the IR, they may have clobbered the
// callgraph.
if (Changed && CallGraphUpToDate) {
@@ -199,7 +199,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
bool MadeChange = false;
bool DevirtualizedCall = false;
-
+
// Scan all functions in the SCC.
unsigned FunctionNo = 0;
for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end();
@@ -207,14 +207,14 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
CallGraphNode *CGN = *SCCIdx;
Function *F = CGN->getFunction();
if (!F || F->isDeclaration()) continue;
-
+
// Walk the function body looking for call sites. Sync up the call sites in
// CGN with those actually in the function.
// Keep track of the number of direct and indirect calls that were
// invalidated and removed.
unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
-
+
// Get the set of call sites currently in the function.
for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
// If this call site is null, then the function pass deleted the call
@@ -226,7 +226,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
CallSites.count(I->first) ||
// If the call edge is not from a call or invoke, or it is a
- // instrinsic call, then the function pass RAUW'd a call with
+ // instrinsic call, then the function pass RAUW'd a call with
// another value. This can happen when constant folding happens
// of well known functions etc.
!CallSite(I->first) ||
@@ -236,18 +236,18 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
CallSite(I->first).getCalledFunction()->getIntrinsicID()))) {
assert(!CheckingMode &&
"CallGraphSCCPass did not update the CallGraph correctly!");
-
+
// If this was an indirect call site, count it.
if (!I->second->getFunction())
++NumIndirectRemoved;
- else
+ else
++NumDirectRemoved;
-
+
// Just remove the edge from the set of callees, keep track of whether
// I points to the last element of the vector.
bool WasLast = I + 1 == E;
CGN->removeCallEdge(I);
-
+
// If I pointed to the last element of the vector, we have to bail out:
// iterator checking rejects comparisons of the resultant pointer with
// end.
@@ -256,10 +256,10 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
E = CGN->end();
continue;
}
-
+
assert(!CallSites.count(I->first) &&
"Call site occurs in node multiple times");
-
+
CallSite CS(I->first);
if (CS) {
Function *Callee = CS.getCalledFunction();
@@ -269,7 +269,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
}
++I;
}
-
+
// Loop over all of the instructions in the function, getting the callsites.
// Keep track of the number of direct/indirect calls added.
unsigned NumDirectAdded = 0, NumIndirectAdded = 0;
@@ -280,7 +280,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
if (!CS) continue;
Function *Callee = CS.getCalledFunction();
if (Callee && Callee->isIntrinsic()) continue;
-
+
// If this call site already existed in the callgraph, just verify it
// matches up to expectations and remove it from CallSites.
DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
@@ -290,11 +290,11 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
// Remove from CallSites since we have now seen it.
CallSites.erase(ExistingIt);
-
+
// Verify that the callee is right.
if (ExistingNode->getFunction() == CS.getCalledFunction())
continue;
-
+
// If we are in checking mode, we are not allowed to actually mutate
// the callgraph. If this is a case where we can infer that the
// callgraph is less precise than it could be (e.g. an indirect call
@@ -303,10 +303,10 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
if (CheckingMode && CS.getCalledFunction() &&
ExistingNode->getFunction() == nullptr)
continue;
-
+
assert(!CheckingMode &&
"CallGraphSCCPass did not update the CallGraph correctly!");
-
+
// If not, we either went from a direct call to indirect, indirect to
// direct, or direct to different direct.
CallGraphNode *CalleeNode;
@@ -328,7 +328,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
MadeChange = true;
continue;
}
-
+
assert(!CheckingMode &&
"CallGraphSCCPass did not update the CallGraph correctly!");
@@ -341,11 +341,11 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
CalleeNode = CG.getCallsExternalNode();
++NumIndirectAdded;
}
-
+
CGN->addCalledFunction(CS, CalleeNode);
MadeChange = true;
}
-
+
// We scanned the old callgraph node, removing invalidated call sites and
// then added back newly found call sites. One thing that can happen is
// that an old indirect call site was deleted and replaced with a new direct
@@ -359,13 +359,13 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
if (NumIndirectRemoved > NumIndirectAdded &&
NumDirectRemoved < NumDirectAdded)
DevirtualizedCall = true;
-
+
// After scanning this function, if we still have entries in callsites, then
// they are dangling pointers. WeakTrackingVH should save us for this, so
// abort if
// this happens.
assert(CallSites.empty() && "Dangling pointers found in call sites map");
-
+
// Periodically do an explicit clear to remove tombstones when processing
// large scc's.
if ((FunctionNo & 15) == 15)
@@ -392,7 +392,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
bool &DevirtualizedCall) {
bool Changed = false;
-
+
// Keep track of whether the callgraph is known to be up-to-date or not.
// The CGSSC pass manager runs two types of passes:
// CallGraphSCC Passes and other random function passes. Because other
@@ -406,7 +406,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
for (unsigned PassNo = 0, e = getNumContainedPasses();
PassNo != e; ++PassNo) {
Pass *P = getContainedPass(PassNo);
-
+
// If we're in -debug-pass=Executions mode, construct the SCC node list,
// otherwise avoid constructing this string as it is expensive.
if (isPassDebuggingExecutionsOrMore()) {
@@ -423,23 +423,23 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
}
dumpRequiredSet(P);
-
+
initializeAnalysisImpl(P);
-
+
// Actually run this pass on the current SCC.
Changed |= RunPassOnSCC(P, CurSCC, CG,
CallGraphUpToDate, DevirtualizedCall);
-
+
if (Changed)
dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
dumpPreservedSet(P);
-
- verifyPreservedAnalysis(P);
+
+ verifyPreservedAnalysis(P);
removeNotPreservedAnalysis(P);
recordAvailableAnalysis(P);
removeDeadPasses(P, "", ON_CG_MSG);
}
-
+
// If the callgraph was left out of date (because the last pass run was a
// functionpass), refresh it before we move on to the next SCC.
if (!CallGraphUpToDate)
@@ -452,7 +452,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
bool CGPassManager::runOnModule(Module &M) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
bool Changed = doInitialization(CG);
-
+
// Walk the callgraph in bottom-up SCC order.
scc_iterator<CallGraph*> CGI = scc_begin(&CG);
@@ -485,7 +485,7 @@ bool CGPassManager::runOnModule(Module &M) {
DevirtualizedCall = false;
Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
} while (Iteration++ < MaxIterations && DevirtualizedCall);
-
+
if (DevirtualizedCall)
LLVM_DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after "
<< Iteration
@@ -500,7 +500,7 @@ bool CGPassManager::runOnModule(Module &M) {
/// Initialize CG
bool CGPassManager::doInitialization(CallGraph &CG) {
bool Changed = false;
- for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
+ for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
"Invalid CGPassManager member");
@@ -515,7 +515,7 @@ bool CGPassManager::doInitialization(CallGraph &CG) {
/// Finalize CG
bool CGPassManager::doFinalization(CallGraph &CG) {
bool Changed = false;
- for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
+ for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {
if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
"Invalid CGPassManager member");
@@ -541,7 +541,7 @@ void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
Nodes[i] = New;
break;
}
-
+
// Update the active scc_iterator so that it doesn't contain dangling
// pointers to the old CallGraphNode.
scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context;
@@ -555,18 +555,18 @@ void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
/// Assign pass manager to manage this pass.
void CallGraphSCCPass::assignPassManager(PMStack &PMS,
PassManagerType PreferredType) {
- // Find CGPassManager
+ // Find CGPassManager
while (!PMS.empty() &&
PMS.top()->getPassManagerType() > PMT_CallGraphPassManager)
PMS.pop();
assert(!PMS.empty() && "Unable to handle Call Graph Pass");
CGPassManager *CGP;
-
+
if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager)
CGP = (CGPassManager*)PMS.top();
else {
- // Create new Call Graph SCC Pass Manager if it does not exist.
+ // Create new Call Graph SCC Pass Manager if it does not exist.
assert(!PMS.empty() && "Unable to create Call Graph Pass Manager");
PMDataManager *PMD = PMS.top();
@@ -608,7 +608,7 @@ namespace {
class PrintCallGraphPass : public CallGraphSCCPass {
std::string Banner;
raw_ostream &OS; // raw_ostream to print on.
-
+
public:
static char ID;
@@ -640,10 +640,10 @@ namespace {
}
return false;
}
-
+
StringRef getPassName() const override { return "Print CallGraph IR"; }
};
-
+
} // end anonymous namespace.
char PrintCallGraphPass::ID = 0;
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index 58c5bccff65d..e7637cd88327 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -272,7 +272,7 @@ void DemandedBits::performAnalysis() {
// Analysis already completed for this function.
return;
Analyzed = true;
-
+
Visited.clear();
AliveBits.clear();
@@ -367,7 +367,7 @@ void DemandedBits::performAnalysis() {
APInt DemandedBits::getDemandedBits(Instruction *I) {
performAnalysis();
-
+
const DataLayout &DL = I->getModule()->getDataLayout();
auto Found = AliveBits.find(I);
if (Found != AliveBits.end())
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index 197aee9dacb7..2c503609d96b 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -409,7 +409,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
if (Constant *C = GV->getInitializer())
if (!C->isNullValue())
return false;
-
+
// Walk the user list of the global. If we find anything other than a direct
// load or store, bail out.
for (User *U : GV->users()) {
@@ -464,7 +464,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
return true;
}
-void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) {
+void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) {
// We do a bottom-up SCC traversal of the call graph. In other words, we
// visit all callees before callers (leaf-first).
unsigned SCCID = 0;
@@ -633,7 +633,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
Inputs.push_back(V);
do {
const Value *Input = Inputs.pop_back_val();
-
+
if (isa<GlobalValue>(Input) || isa<Argument>(Input) || isa<CallInst>(Input) ||
isa<InvokeInst>(Input))
// Arguments to functions or returns from functions are inherently
@@ -654,7 +654,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
if (auto *LI = dyn_cast<LoadInst>(Input)) {
Inputs.push_back(GetUnderlyingObject(LI->getPointerOperand(), DL));
continue;
- }
+ }
if (auto *SI = dyn_cast<SelectInst>(Input)) {
const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL);
const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL);
@@ -672,7 +672,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
}
continue;
}
-
+
return false;
} while (!Inputs.empty());
@@ -754,7 +754,7 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
// non-addr-taken globals.
continue;
}
-
+
// Recurse through a limited number of selects, loads and PHIs. This is an
// arbitrary depth of 4, lower numbers could be used to fix compile time
// issues if needed, but this is generally expected to be only be important
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 519d6d67be51..7fc7c15a0c25 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -65,6 +65,48 @@ static Value *SimplifyCastInst(unsigned, Value *, Type *,
static Value *SimplifyGEPInst(Type *, ArrayRef<Value *>, const SimplifyQuery &,
unsigned);
+static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal,
+ Value *FalseVal) {
+ BinaryOperator::BinaryOps BinOpCode;
+ if (auto *BO = dyn_cast<BinaryOperator>(Cond))
+ BinOpCode = BO->getOpcode();
+ else
+ return nullptr;
+
+ CmpInst::Predicate ExpectedPred, Pred1, Pred2;
+ if (BinOpCode == BinaryOperator::Or) {
+ ExpectedPred = ICmpInst::ICMP_NE;
+ } else if (BinOpCode == BinaryOperator::And) {
+ ExpectedPred = ICmpInst::ICMP_EQ;
+ } else
+ return nullptr;
+
+ // %A = icmp eq %TV, %FV
+ // %B = icmp eq %X, %Y (and one of these is a select operand)
+ // %C = and %A, %B
+ // %D = select %C, %TV, %FV
+ // -->
+ // %FV
+
+ // %A = icmp ne %TV, %FV
+ // %B = icmp ne %X, %Y (and one of these is a select operand)
+ // %C = or %A, %B
+ // %D = select %C, %TV, %FV
+ // -->
+ // %TV
+ Value *X, *Y;
+ if (!match(Cond, m_c_BinOp(m_c_ICmp(Pred1, m_Specific(TrueVal),
+ m_Specific(FalseVal)),
+ m_ICmp(Pred2, m_Value(X), m_Value(Y)))) ||
+ Pred1 != Pred2 || Pred1 != ExpectedPred)
+ return nullptr;
+
+ if (X == TrueVal || X == FalseVal || Y == TrueVal || Y == FalseVal)
+ return BinOpCode == BinaryOperator::Or ? TrueVal : FalseVal;
+
+ return nullptr;
+}
+
/// For a boolean type or a vector of boolean type, return false or a vector
/// with every element false.
static Constant *getFalse(Type *Ty) {
@@ -1283,6 +1325,23 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1))))
return X;
+ // ((X << A) | Y) >> A -> X if effective width of Y is not larger than A.
+ // We can return X as we do in the above case since OR alters no bits in X.
+ // SimplifyDemandedBits in InstCombine can do more general optimization for
+ // bit manipulation. This pattern aims to provide opportunities for other
+ // optimizers by supporting a simple but common case in InstSimplify.
+ Value *Y;
+ const APInt *ShRAmt, *ShLAmt;
+ if (match(Op1, m_APInt(ShRAmt)) &&
+ match(Op0, m_c_Or(m_NUWShl(m_Value(X), m_APInt(ShLAmt)), m_Value(Y))) &&
+ *ShRAmt == *ShLAmt) {
+ const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ const unsigned Width = Op0->getType()->getScalarSizeInBits();
+ const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
+ if (EffWidthY <= ShRAmt->getZExtValue())
+ return X;
+ }
+
return nullptr;
}
@@ -3752,6 +3811,9 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse))
return V;
+ if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal))
+ return V;
+
return nullptr;
}
@@ -4604,149 +4666,131 @@ static bool maskIsAllZeroOrUndef(Value *Mask) {
return true;
}
-template <typename IterTy>
-static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
- const SimplifyQuery &Q, unsigned MaxRecurse) {
+static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
+ const SimplifyQuery &Q) {
+ // Idempotent functions return the same result when called repeatedly.
Intrinsic::ID IID = F->getIntrinsicID();
- unsigned NumOperands = std::distance(ArgBegin, ArgEnd);
-
- // Unary Ops
- if (NumOperands == 1) {
- // Perform idempotent optimizations
- if (IsIdempotent(IID)) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) {
- if (II->getIntrinsicID() == IID)
- return II;
- }
- }
+ if (IsIdempotent(IID))
+ if (auto *II = dyn_cast<IntrinsicInst>(Op0))
+ if (II->getIntrinsicID() == IID)
+ return II;
- Value *IIOperand = *ArgBegin;
- Value *X;
- switch (IID) {
- case Intrinsic::fabs: {
- if (SignBitMustBeZero(IIOperand, Q.TLI))
- return IIOperand;
- return nullptr;
- }
- case Intrinsic::bswap: {
- // bswap(bswap(x)) -> x
- if (match(IIOperand, m_BSwap(m_Value(X))))
- return X;
- return nullptr;
- }
- case Intrinsic::bitreverse: {
- // bitreverse(bitreverse(x)) -> x
- if (match(IIOperand, m_BitReverse(m_Value(X))))
- return X;
- return nullptr;
- }
- case Intrinsic::exp: {
- // exp(log(x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
- match(IIOperand, m_Intrinsic<Intrinsic::log>(m_Value(X))))
- return X;
- return nullptr;
- }
- case Intrinsic::exp2: {
- // exp2(log2(x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
- match(IIOperand, m_Intrinsic<Intrinsic::log2>(m_Value(X))))
- return X;
- return nullptr;
- }
- case Intrinsic::log: {
- // log(exp(x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
- match(IIOperand, m_Intrinsic<Intrinsic::exp>(m_Value(X))))
- return X;
- return nullptr;
- }
- case Intrinsic::log2: {
- // log2(exp2(x)) -> x
- if (Q.CxtI->hasAllowReassoc() &&
- match(IIOperand, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) {
- return X;
- }
- return nullptr;
- }
- default:
- return nullptr;
- }
+ Value *X;
+ switch (IID) {
+ case Intrinsic::fabs:
+ if (SignBitMustBeZero(Op0, Q.TLI)) return Op0;
+ break;
+ case Intrinsic::bswap:
+ // bswap(bswap(x)) -> x
+ if (match(Op0, m_BSwap(m_Value(X)))) return X;
+ break;
+ case Intrinsic::bitreverse:
+ // bitreverse(bitreverse(x)) -> x
+ if (match(Op0, m_BitReverse(m_Value(X)))) return X;
+ break;
+ case Intrinsic::exp:
+ // exp(log(x)) -> x
+ if (Q.CxtI->hasAllowReassoc() &&
+ match(Op0, m_Intrinsic<Intrinsic::log>(m_Value(X)))) return X;
+ break;
+ case Intrinsic::exp2:
+ // exp2(log2(x)) -> x
+ if (Q.CxtI->hasAllowReassoc() &&
+ match(Op0, m_Intrinsic<Intrinsic::log2>(m_Value(X)))) return X;
+ break;
+ case Intrinsic::log:
+ // log(exp(x)) -> x
+ if (Q.CxtI->hasAllowReassoc() &&
+ match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X)))) return X;
+ break;
+ case Intrinsic::log2:
+ // log2(exp2(x)) -> x
+ if (Q.CxtI->hasAllowReassoc() &&
+ match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) return X;
+ break;
+ default:
+ break;
}
- // Binary Ops
- if (NumOperands == 2) {
- Value *LHS = *ArgBegin;
- Value *RHS = *(ArgBegin + 1);
- Type *ReturnType = F->getReturnType();
+ return nullptr;
+}
- switch (IID) {
- case Intrinsic::usub_with_overflow:
- case Intrinsic::ssub_with_overflow: {
- // X - X -> { 0, false }
- if (LHS == RHS)
- return Constant::getNullValue(ReturnType);
+static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
+ const SimplifyQuery &Q) {
+ Intrinsic::ID IID = F->getIntrinsicID();
+ Type *ReturnType = F->getReturnType();
+ switch (IID) {
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ // X - X -> { 0, false }
+ if (Op0 == Op1)
+ return Constant::getNullValue(ReturnType);
+ // X - undef -> undef
+ // undef - X -> undef
+ if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
+ return UndefValue::get(ReturnType);
+ break;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ // X + undef -> undef
+ if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
+ return UndefValue::get(ReturnType);
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ // 0 * X -> { 0, false }
+ // X * 0 -> { 0, false }
+ if (match(Op0, m_Zero()) || match(Op1, m_Zero()))
+ return Constant::getNullValue(ReturnType);
+ // undef * X -> { 0, false }
+ // X * undef -> { 0, false }
+ if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ return Constant::getNullValue(ReturnType);
+ break;
+ case Intrinsic::load_relative:
+ if (auto *C0 = dyn_cast<Constant>(Op0))
+ if (auto *C1 = dyn_cast<Constant>(Op1))
+ return SimplifyRelativeLoad(C0, C1, Q.DL);
+ break;
+ case Intrinsic::powi:
+ if (auto *Power = dyn_cast<ConstantInt>(Op1)) {
+ // powi(x, 0) -> 1.0
+ if (Power->isZero())
+ return ConstantFP::get(Op0->getType(), 1.0);
+ // powi(x, 1) -> x
+ if (Power->isOne())
+ return Op0;
+ }
+ break;
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ // If one argument is NaN, return the other argument.
+ if (match(Op0, m_NaN())) return Op1;
+ if (match(Op1, m_NaN())) return Op0;
+ break;
+ default:
+ break;
+ }
- // X - undef -> undef
- // undef - X -> undef
- if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
- return UndefValue::get(ReturnType);
+ return nullptr;
+}
- return nullptr;
- }
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::sadd_with_overflow: {
- // X + undef -> undef
- if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
- return UndefValue::get(ReturnType);
+template <typename IterTy>
+static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
+ const SimplifyQuery &Q) {
+ // Intrinsics with no operands have some kind of side effect. Don't simplify.
+ unsigned NumOperands = std::distance(ArgBegin, ArgEnd);
+ if (NumOperands == 0)
+ return nullptr;
- return nullptr;
- }
- case Intrinsic::umul_with_overflow:
- case Intrinsic::smul_with_overflow: {
- // 0 * X -> { 0, false }
- // X * 0 -> { 0, false }
- if (match(LHS, m_Zero()) || match(RHS, m_Zero()))
- return Constant::getNullValue(ReturnType);
-
- // undef * X -> { 0, false }
- // X * undef -> { 0, false }
- if (match(LHS, m_Undef()) || match(RHS, m_Undef()))
- return Constant::getNullValue(ReturnType);
+ Intrinsic::ID IID = F->getIntrinsicID();
+ if (NumOperands == 1)
+ return simplifyUnaryIntrinsic(F, ArgBegin[0], Q);
- return nullptr;
- }
- case Intrinsic::load_relative: {
- Constant *C0 = dyn_cast<Constant>(LHS);
- Constant *C1 = dyn_cast<Constant>(RHS);
- if (C0 && C1)
- return SimplifyRelativeLoad(C0, C1, Q.DL);
- return nullptr;
- }
- case Intrinsic::powi:
- if (ConstantInt *Power = dyn_cast<ConstantInt>(RHS)) {
- // powi(x, 0) -> 1.0
- if (Power->isZero())
- return ConstantFP::get(LHS->getType(), 1.0);
- // powi(x, 1) -> x
- if (Power->isOne())
- return LHS;
- }
- return nullptr;
- case Intrinsic::maxnum:
- case Intrinsic::minnum:
- // If one argument is NaN, return the other argument.
- if (match(LHS, m_NaN()))
- return RHS;
- if (match(RHS, m_NaN()))
- return LHS;
- return nullptr;
- default:
- return nullptr;
- }
- }
+ if (NumOperands == 2)
+ return simplifyBinaryIntrinsic(F, ArgBegin[0], ArgBegin[1], Q);
- // Simplify calls to llvm.masked.load.*
+ // Handle intrinsics with 3 or more arguments.
switch (IID) {
case Intrinsic::masked_load: {
Value *MaskArg = ArgBegin[2];
@@ -4756,6 +4800,19 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
return PassthruArg;
return nullptr;
}
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ Value *ShAmtArg = ArgBegin[2];
+ const APInt *ShAmtC;
+ if (match(ShAmtArg, m_APInt(ShAmtC))) {
+ // If there's effectively no shift, return the 1st arg or 2nd arg.
+ // TODO: For vectors, we could check each element of a non-splat constant.
+ APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth());
+ if (ShAmtC->urem(BitWidth).isNullValue())
+ return ArgBegin[IID == Intrinsic::fshl ? 0 : 1];
+ }
+ return nullptr;
+ }
default:
return nullptr;
}
@@ -4780,7 +4837,7 @@ static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin,
return nullptr;
if (F->isIntrinsic())
- if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse))
+ if (Value *Ret = simplifyIntrinsic(F, ArgBegin, ArgEnd, Q))
return Ret;
if (!canConstantFoldCallTo(CS, F))
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 435b6f205199..ee0148e0d795 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -725,7 +725,7 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV,
// frequently arranged such that dominating ones come first and we quickly
// find a path to function entry. TODO: We should consider explicitly
// canonicalizing to make this true rather than relying on this happy
- // accident.
+ // accident.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
ValueLatticeElement EdgeResult;
if (!getEdgeValue(Val, *PI, BB, EdgeResult))
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index c6175bf9bee9..a24d66011b8d 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -176,8 +176,8 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
/// Calculate Start and End points of memory access.
/// Let's assume A is the first access and B is a memory access on N-th loop
-/// iteration. Then B is calculated as:
-/// B = A + Step*N .
+/// iteration. Then B is calculated as:
+/// B = A + Step*N .
/// Step value may be positive or negative.
/// N is a calculated back-edge taken count:
/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
@@ -1317,7 +1317,7 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
return false;
}
-/// Given a non-constant (unknown) dependence-distance \p Dist between two
+/// Given a non-constant (unknown) dependence-distance \p Dist between two
/// memory accesses, that have the same stride whose absolute value is given
/// in \p Stride, and that have the same type size \p TypeByteSize,
/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
@@ -1336,19 +1336,19 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
// If we can prove that
// (**) |Dist| > BackedgeTakenCount * Step
- // where Step is the absolute stride of the memory accesses in bytes,
+ // where Step is the absolute stride of the memory accesses in bytes,
// then there is no dependence.
//
- // Ratioanle:
- // We basically want to check if the absolute distance (|Dist/Step|)
- // is >= the loop iteration count (or > BackedgeTakenCount).
- // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
- // Section 4.2.1); Note, that for vectorization it is sufficient to prove
+ // Ratioanle:
+ // We basically want to check if the absolute distance (|Dist/Step|)
+ // is >= the loop iteration count (or > BackedgeTakenCount).
+ // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
+ // Section 4.2.1); Note, that for vectorization it is sufficient to prove
// that the dependence distance is >= VF; This is checked elsewhere.
- // But in some cases we can prune unknown dependence distances early, and
- // even before selecting the VF, and without a runtime test, by comparing
- // the distance against the loop iteration count. Since the vectorized code
- // will be executed only if LoopCount >= VF, proving distance >= LoopCount
+ // But in some cases we can prune unknown dependence distances early, and
+ // even before selecting the VF, and without a runtime test, by comparing
+ // the distance against the loop iteration count. Since the vectorized code
+ // will be executed only if LoopCount >= VF, proving distance >= LoopCount
// also guarantees that distance >= VF.
//
const uint64_t ByteStride = Stride * TypeByteSize;
@@ -1360,8 +1360,8 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
- // The dependence distance can be positive/negative, so we sign extend Dist;
- // The multiplication of the absolute stride in bytes and the
+ // The dependence distance can be positive/negative, so we sign extend Dist;
+ // The multiplication of the absolute stride in bytes and the
// backdgeTakenCount is non-negative, so we zero extend Product.
if (DistTypeSize > ProductTypeSize)
CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
@@ -2212,24 +2212,24 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
"versioning:");
LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
- // Avoid adding the "Stride == 1" predicate when we know that
+ // Avoid adding the "Stride == 1" predicate when we know that
// Stride >= Trip-Count. Such a predicate will effectively optimize a single
// or zero iteration loop, as Trip-Count <= Stride == 1.
- //
+ //
// TODO: We are currently not making a very informed decision on when it is
// beneficial to apply stride versioning. It might make more sense that the
- // users of this analysis (such as the vectorizer) will trigger it, based on
- // their specific cost considerations; For example, in cases where stride
+ // users of this analysis (such as the vectorizer) will trigger it, based on
+ // their specific cost considerations; For example, in cases where stride
// versioning does not help resolving memory accesses/dependences, the
- // vectorizer should evaluate the cost of the runtime test, and the benefit
- // of various possible stride specializations, considering the alternatives
- // of using gather/scatters (if available).
-
+ // vectorizer should evaluate the cost of the runtime test, and the benefit
+ // of various possible stride specializations, considering the alternatives
+ // of using gather/scatters (if available).
+
const SCEV *StrideExpr = PSE->getSCEV(Stride);
- const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
+ const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
// Match the types so we can compare the stride and the BETakenCount.
- // The Stride can be positive/negative, so we sign extend Stride;
+ // The Stride can be positive/negative, so we sign extend Stride;
// The backdgeTakenCount is non-negative, so we zero extend BETakenCount.
const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
@@ -2243,7 +2243,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
// Since TripCount == BackEdgeTakenCount + 1, checking:
- // "Stride >= TripCount" is equivalent to checking:
+ // "Stride >= TripCount" is equivalent to checking:
// Stride - BETakenCount > 0
if (SE->isKnownPositive(StrideMinusBETaken)) {
LLVM_DEBUG(
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 5c0cbb26484c..5a6bbd7b2ac6 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -118,7 +118,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
} else {
SmallVector<NonLocalDepResult, 4> NLDI;
assert( (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
- isa<VAArgInst>(Inst)) && "Unknown memory instruction!");
+ isa<VAArgInst>(Inst)) && "Unknown memory instruction!");
MDA.getNonLocalPointerDependency(Inst, NLDI);
DepSet &InstDeps = Deps[Inst];
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 7eeefd54f007..feae53c54ecb 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/PhiValues.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
@@ -1513,6 +1514,8 @@ void MemoryDependenceResults::invalidateCachedPointerInfo(Value *Ptr) {
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
// Flush load info for the pointer.
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
+ // Invalidate phis that use the pointer.
+ PV.invalidateValue(Ptr);
}
void MemoryDependenceResults::invalidateCachedPredecessors() {
@@ -1671,6 +1674,9 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
}
}
+ // Invalidate phis that use the removed instruction.
+ PV.invalidateValue(RemInst);
+
assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
LLVM_DEBUG(verifyRemoved(RemInst));
}
@@ -1730,7 +1736,8 @@ MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- return MemoryDependenceResults(AA, AC, TLI, DT);
+ auto &PV = AM.getResult<PhiValuesAnalysis>(F);
+ return MemoryDependenceResults(AA, AC, TLI, DT, PV);
}
char MemoryDependenceWrapperPass::ID = 0;
@@ -1741,6 +1748,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass)
INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
"Memory Dependence Analysis", false, true)
@@ -1758,6 +1766,7 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PhiValuesWrapperPass>();
AU.addRequiredTransitive<AAResultsWrapperPass>();
AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
}
@@ -1773,7 +1782,8 @@ bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &P
// Check whether the analyses we depend on became invalid for any reason.
if (Inv.invalidate<AAManager>(F, PA) ||
Inv.invalidate<AssumptionAnalysis>(F, PA) ||
- Inv.invalidate<DominatorTreeAnalysis>(F, PA))
+ Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
+ Inv.invalidate<PhiValuesAnalysis>(F, PA))
return true;
// Otherwise this analysis result remains valid.
@@ -1789,6 +1799,7 @@ bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- MemDep.emplace(AA, AC, TLI, DT);
+ auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
+ MemDep.emplace(AA, AC, TLI, DT, PV);
return false;
}
diff --git a/lib/Analysis/MustExecute.cpp b/lib/Analysis/MustExecute.cpp
index fc4049874622..8e85366b4618 100644
--- a/lib/Analysis/MustExecute.cpp
+++ b/lib/Analysis/MustExecute.cpp
@@ -235,7 +235,7 @@ public:
}
- void printInfoComment(const Value &V, formatted_raw_ostream &OS) override {
+ void printInfoComment(const Value &V, formatted_raw_ostream &OS) override {
if (!MustExec.count(&V))
return;
@@ -245,7 +245,7 @@ public:
OS << " ; (mustexec in " << NumLoops << " loops: ";
else
OS << " ; (mustexec in: ";
-
+
bool first = true;
for (const Loop *L : Loops) {
if (!first)
@@ -264,6 +264,6 @@ bool MustExecutePrinter::runOnFunction(Function &F) {
MustExecuteAnnotatedWriter Writer(F, DT, LI);
F.print(dbgs(), &Writer);
-
+
return false;
}
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index aa95ace93014..0e715b8814ff 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -4839,7 +4839,7 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI
// Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy)
// for each of StartVal and Accum
- auto getExtendedExpr = [&](const SCEV *Expr,
+ auto getExtendedExpr = [&](const SCEV *Expr,
bool CreateSignExtend) -> const SCEV * {
assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant");
const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy);
@@ -4935,11 +4935,11 @@ ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
return Rewrite;
}
-// FIXME: This utility is currently required because the Rewriter currently
-// does not rewrite this expression:
-// {0, +, (sext ix (trunc iy to ix) to iy)}
+// FIXME: This utility is currently required because the Rewriter currently
+// does not rewrite this expression:
+// {0, +, (sext ix (trunc iy to ix) to iy)}
// into {0, +, %step},
-// even when the following Equal predicate exists:
+// even when the following Equal predicate exists:
// "%step == (sext ix (trunc iy to ix) to iy)".
bool PredicatedScalarEvolution::areAddRecsEqualWithPreds(
const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const {
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 9de2f789c89c..7233a86e5daf 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -721,7 +721,7 @@ struct ReductionData {
static Optional<ReductionData> getReductionData(Instruction *I) {
Value *L, *R;
if (m_BinOp(m_Value(L), m_Value(R)).match(I))
- return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
+ return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
if (auto *SI = dyn_cast<SelectInst>(I)) {
if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
m_SMax(m_Value(L), m_Value(R)).match(SI) ||
@@ -730,8 +730,8 @@ static Optional<ReductionData> getReductionData(Instruction *I) {
m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
auto *CI = cast<CmpInst>(SI->getCondition());
- return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
- }
+ return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
+ }
if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
m_UMax(m_Value(L), m_Value(R)).match(SI)) {
auto *CI = cast<CmpInst>(SI->getCondition());
@@ -851,11 +851,11 @@ static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
// We look for a sequence of shuffle,shuffle,add triples like the following
// that builds a pairwise reduction tree.
- //
+ //
// (X0, X1, X2, X3)
// (X0 + X1, X2 + X3, undef, undef)
// ((X0 + X1) + (X2 + X3), undef, undef, undef)
- //
+ //
// %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
// <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
// %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
@@ -916,7 +916,7 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
// We look for a sequence of shuffles and adds like the following matching one
// fadd, shuffle vector pair at a time.
- //
+ //
// %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
// <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
// %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
@@ -927,7 +927,7 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
unsigned MaskStart = 1;
Instruction *RdxOp = RdxStart;
- SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
+ SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
unsigned NumVecElemsRemain = NumVecElems;
while (NumVecElemsRemain - 1) {
// Check for the right reduction operation.
@@ -1093,7 +1093,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
case Instruction::InsertElement: {
const InsertElementInst * IE = cast<InsertElementInst>(I);
ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
- unsigned Idx = -1;
+ unsigned Idx = -1;
if (CI)
Idx = CI->getZExtValue();
return getVectorInstrCost(I->getOpcode(),
@@ -1104,7 +1104,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
// TODO: Identify and add costs for insert/extract subvector, etc.
if (Shuffle->changesLength())
return -1;
-
+
if (Shuffle->isIdentity())
return 0;
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 04a7b73c22bf..0ef39163bda3 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -71,7 +71,7 @@
#include <cassert>
#include <cstdint>
#include <iterator>
-#include <utility>
+#include <utility>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -3828,7 +3828,7 @@ static bool checkRippleForSignedAdd(const KnownBits &LHSKnown,
// If either of the values is known to be non-negative, adding them can only
// overflow if the second is also non-negative, so we can assume that.
- // Two non-negative numbers will only overflow if there is a carry to the
+ // Two non-negative numbers will only overflow if there is a carry to the
// sign bit, so we can check if even when the values are as big as possible
// there is no overflow to the sign bit.
if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) {
@@ -3855,7 +3855,7 @@ static bool checkRippleForSignedAdd(const KnownBits &LHSKnown,
}
// If we reached here it means that we know nothing about the sign bits.
- // In this case we can't know if there will be an overflow, since by
+ // In this case we can't know if there will be an overflow, since by
// changing the sign bits any two values can be made to overflow.
return false;
}
@@ -3905,7 +3905,7 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
// operands.
bool LHSOrRHSKnownNonNegative =
(LHSKnown.isNonNegative() || RHSKnown.isNonNegative());
- bool LHSOrRHSKnownNegative =
+ bool LHSOrRHSKnownNegative =
(LHSKnown.isNegative() || RHSKnown.isNegative());
if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT);
@@ -4454,7 +4454,7 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth);
if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
return SPR;
-
+
if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
return {SPF_UNKNOWN, SPNB_NA, false};
@@ -4630,7 +4630,7 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered};
}
}
-
+
if (isKnownNegation(TrueVal, FalseVal)) {
// Sign-extending LHS does not change its sign, so TrueVal/FalseVal can
// match against either LHS or sext(LHS).
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 599b59bf61e8..7cf74dd16f5a 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -842,7 +842,7 @@ static void maybeSetDSOLocal(bool DSOLocal, GlobalValue &GV) {
}
/// parseIndirectSymbol:
-/// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
+/// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
/// OptionalVisibility OptionalDLLStorageClass
/// OptionalThreadLocal OptionalUnnamedAddr
// 'alias|ifunc' IndirectSymbol
@@ -3935,7 +3935,7 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, EmissionKindField &Result
Lex.Lex();
return false;
}
-
+
template <>
bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
DwarfAttEncodingField &Result) {
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index be75df0820d9..87b47dc354b5 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -3809,7 +3809,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
continue;
// The mapping from OriginalId to GUID may return a GUID
// that corresponds to a static variable. Filter it out here.
- // This can happen when
+ // This can happen when
// 1) There is a call to a library function which does not have
// a CallValidId;
// 2) There is a static variable with the OriginalGUID identical
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index 181da83dc88b..d93716287981 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -46,7 +46,7 @@ public:
MachineBasicBlock::iterator End,
unsigned InsertPosIndex,
DbgValueVector &DbgValues) = 0;
-
+
/// Update liveness information to account for the current
/// instruction, which will not be scheduled.
virtual void Observe(MachineInstr &MI, unsigned Count,
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 4a226527cb5b..c8305ad9c547 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -24,8 +24,26 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
return IterBool.first->second.Number;
}
+
+void AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
+ static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize();
+ Asm.OutStreamer->SwitchSection(Section);
+
+ uint64_t Length = sizeof(uint16_t) // version
+ + sizeof(uint8_t) // address_size
+ + sizeof(uint8_t) // segment_selector_size
+ + AddrSize * Pool.size(); // entries
+ Asm.emitInt32(Length); // TODO: Support DWARF64 format.
+ Asm.emitInt16(Asm.getDwarfVersion());
+ Asm.emitInt8(AddrSize);
+ Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size.
+}
+
// Emit addresses into the section given.
void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
+ if (Asm.getDwarfVersion() >= 5)
+ emitHeader(Asm, AddrSection);
+
if (Pool.empty())
return;
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h
index 5350006bf744..d5008fab5563 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -50,6 +50,9 @@ public:
bool hasBeenUsed() const { return HasBeenUsed; }
void resetUsedFlag() { HasBeenUsed = false; }
+
+private:
+ void emitHeader(AsmPrinter &Asm, MCSection *Section);
};
} // end namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 8761fae9dd22..500e7a00196f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -364,7 +364,9 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
else
UseSectionsAsReferences = DwarfSectionsAsReferences == Enable;
- GenerateTypeUnits = GenerateDwarfTypeUnits;
+ // Don't generate type units for unsupported object file formats.
+ GenerateTypeUnits =
+ A->TM.getTargetTriple().isOSBinFormatELF() && GenerateDwarfTypeUnits;
TheAccelTableKind = computeAccelTableKind(
DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple());
@@ -886,8 +888,7 @@ void DwarfDebug::endModule() {
emitDebugInfoDWO();
emitDebugAbbrevDWO();
emitDebugLineDWO();
- // Emit DWO addresses.
- AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
+ emitDebugAddr();
}
// Emit info into the dwarf accelerator table sections.
@@ -2136,7 +2137,7 @@ void DwarfDebug::emitDebugRanges() {
return;
}
- if (getDwarfVersion() >= 5 && NoRangesPresent())
+ if (NoRangesPresent())
return;
// Start the dwarf ranges section.
@@ -2297,6 +2298,12 @@ void DwarfDebug::emitDebugStrDWO() {
OffSec, /* UseRelativeOffsets = */ false);
}
+// Emit DWO addresses.
+void DwarfDebug::emitDebugAddr() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
+}
+
MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
if (!useSplitDwarf())
return nullptr;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 0c7be5d27dfe..abf2e43b1312 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -447,6 +447,9 @@ class DwarfDebug : public DebugHandlerBase {
/// Emit the debug str dwo section.
void emitDebugStrDWO();
+ /// Emit DWO addresses.
+ void emitDebugAddr();
+
/// Flags to let the linker know we have emitted new style pubnames. Only
/// emit it here if we don't have a skeleton CU for split dwarf.
void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const;
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 952b0d99a95a..0637d952eba4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -112,7 +112,7 @@ protected:
uint64_t OffsetInBits = 0;
unsigned DwarfVersion;
- /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
+ /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
unsigned SubRegisterSizeInBits = 0;
unsigned SubRegisterOffsetInBits = 0;
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index c90bd568162d..049f349b009a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -95,6 +95,6 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
}
} else {
ScopeVars.Locals.push_back(Var);
- }
+ }
return true;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 43b835b2c4aa..600f4a78fda0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1182,7 +1182,7 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath());
if (!M->getISysRoot().empty())
addString(MDie, dwarf::DW_AT_LLVM_isysroot, M->getISysRoot());
-
+
return &MDie;
}
@@ -1691,7 +1691,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
}
void DwarfTypeUnit::emitHeader(bool UseOffsets) {
- DwarfUnit::emitCommonHeader(UseOffsets,
+ DwarfUnit::emitCommonHeader(UseOffsets,
DD->useSplitDwarf() ? dwarf::DW_UT_split_type
: dwarf::DW_UT_type);
Asm->OutStreamer->AddComment("Type Signature");
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index f2615edaece2..e28fc6fb9d4f 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -362,19 +362,19 @@ IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
/// Convert an atomic load of a non-integral type to an integer load of the
/// equivalent bitwidth. See the function comment on
-/// convertAtomicStoreToIntegerType for background.
+/// convertAtomicStoreToIntegerType for background.
LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
auto *M = LI->getModule();
Type *NewTy = getCorrespondingIntegerType(LI->getType(),
M->getDataLayout());
IRBuilder<> Builder(LI);
-
+
Value *Addr = LI->getPointerOperand();
Type *PT = PointerType::get(NewTy,
Addr->getType()->getPointerAddressSpace());
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
-
+
auto *NewLI = Builder.CreateLoad(NewAddr);
NewLI->setAlignment(LI->getAlignment());
NewLI->setVolatile(LI->isVolatile());
@@ -452,7 +452,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
M->getDataLayout());
Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
-
+
Value *Addr = SI->getPointerOperand();
Type *PT = PointerType::get(NewTy,
Addr->getType()->getPointerAddressSpace());
@@ -920,14 +920,14 @@ Value *AtomicExpand::insertRMWLLSCLoop(
/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
/// IR. As a migration step, we convert back to what use to be the standard
/// way to represent a pointer cmpxchg so that we can update backends one by
-/// one.
+/// one.
AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
auto *M = CI->getModule();
Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
M->getDataLayout());
IRBuilder<> Builder(CI);
-
+
Value *Addr = CI->getPointerOperand();
Type *PT = PointerType::get(NewTy,
Addr->getType()->getPointerAddressSpace());
@@ -935,8 +935,8 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *
Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
-
-
+
+
auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
CI->getSuccessOrdering(),
CI->getFailureOrdering(),
diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp
index abac555d6602..3a9b20aa661d 100644
--- a/lib/CodeGen/BuiltinGCs.cpp
+++ b/lib/CodeGen/BuiltinGCs.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This file contains the boilerplate required to define our various built in
-// gc lowering strategies.
+// gc lowering strategies.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 840e5ede6444..5a5960b16130 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -530,7 +530,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
// Kill instructions can define registers but are really nops, and there
// might be a real definition earlier that needs to be paired with uses
// dominated by this kill.
-
+
// FIXME: It may be possible to remove the isKill() restriction once PR18663
// has been properly fixed. There can be value in processing kills as seen
// in the AggressiveAntiDepBreaker class.
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 456fa799e8e1..fe3d29657942 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -159,7 +159,7 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
auto NMI = GCStrategyMap.find(Name);
if (NMI != GCStrategyMap.end())
return NMI->getValue();
-
+
for (auto& Entry : GCRegistry::entries()) {
if (Name == Entry.getName()) {
std::unique_ptr<GCStrategy> S = Entry.instantiate();
@@ -171,11 +171,11 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
}
if (GCRegistry::begin() == GCRegistry::end()) {
- // In normal operation, the registry should not be empty. There should
+ // In normal operation, the registry should not be empty. There should
// be the builtin GCs if nothing else. The most likely scenario here is
- // that we got here without running the initializers used by the Registry
+ // that we got here without running the initializers used by the Registry
// itself and it's registration mechanism.
- const std::string error = ("unsupported GC: " + Name).str() +
+ const std::string error = ("unsupported GC: " + Name).str() +
" (did you remember to link and initialize the CodeGen library?)";
report_fatal_error(error);
} else
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index bafb7a05536d..80da50562d32 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
@@ -33,6 +34,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -1503,6 +1505,8 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
}
EntryBuilder.buildMerge(Reg, Ops);
+ } else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
+ EntryBuilder.buildBlockAddress(Reg, BA);
} else
return false;
@@ -1611,19 +1615,20 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
ArgIt++;
}
- // And translate the function!
- for (const BasicBlock &BB : F) {
- MachineBasicBlock &MBB = getMBB(BB);
+ // Need to visit defs before uses when translating instructions.
+ ReversePostOrderTraversal<const Function *> RPOT(&F);
+ for (const BasicBlock *BB : RPOT) {
+ MachineBasicBlock &MBB = getMBB(*BB);
// Set the insertion point of all the following translations to
// the end of this basic block.
CurBuilder.setMBB(MBB);
- for (const Instruction &Inst : BB) {
+ for (const Instruction &Inst : *BB) {
if (translate(Inst))
continue;
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
- Inst.getDebugLoc(), &BB);
+ Inst.getDebugLoc(), BB);
R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 9df931eb81b3..3271b54aa830 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -809,6 +809,15 @@ MachineIRBuilderBase::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
MMO);
}
+MachineInstrBuilder
+MachineIRBuilderBase::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
+#ifndef NDEBUG
+ assert(getMRI()->getType(Res).isPointer() && "invalid res type");
+#endif
+
+ return buildInstr(TargetOpcode::G_BLOCK_ADDR).addDef(Res).addBlockAddress(BA);
+}
+
void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src,
bool IsExtend) {
#ifndef NDEBUG
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index ca56f4e0c4f1..9f7f5e392a9a 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -56,7 +56,7 @@
// - it makes linker optimizations less useful (order files, LOHs, ...)
// - it forces usage of indexed addressing (which isn't necessarily "free")
// - it can increase register pressure when the uses are disparate enough.
-//
+//
// We use heuristics to discover the best global grouping we can (cf cl::opts).
//
// ===---------------------------------------------------------------------===//
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index eb4099964242..707113bd973b 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -113,22 +113,22 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
case Intrinsic::memcpy:
M.getOrInsertFunction("memcpy",
Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
DL.getIntPtrType(Context));
break;
case Intrinsic::memmove:
M.getOrInsertFunction("memmove",
Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
DL.getIntPtrType(Context));
break;
case Intrinsic::memset:
M.getOrInsertFunction("memset",
Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- Type::getInt32Ty(M.getContext()),
+ Type::getInt8PtrTy(Context),
+ Type::getInt32Ty(M.getContext()),
DL.getIntPtrType(Context));
break;
case Intrinsic::sqrt:
@@ -210,13 +210,13 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
"bswap.5");
Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
"bswap.4");
- Value* Tmp3 = Builder.CreateLShr(V,
+ Value* Tmp3 = Builder.CreateLShr(V,
ConstantInt::get(V->getType(), 24),
"bswap.3");
- Value* Tmp2 = Builder.CreateLShr(V,
+ Value* Tmp2 = Builder.CreateLShr(V,
ConstantInt::get(V->getType(), 40),
"bswap.2");
- Value* Tmp1 = Builder.CreateLShr(V,
+ Value* Tmp1 = Builder.CreateLShr(V,
ConstantInt::get(V->getType(), 56),
"bswap.1");
Tmp7 = Builder.CreateAnd(Tmp7,
@@ -274,7 +274,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
for (unsigned n = 0; n < WordSize; ++n) {
Value *PartValue = V;
- for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
+ for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
i <<= 1, ++ct) {
Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
@@ -381,7 +381,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
case Intrinsic::siglongjmp: {
// Insert the call to abort
- ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
+ ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
Type::getVoidTy(Context));
break;
}
@@ -392,7 +392,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
case Intrinsic::bswap:
CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
break;
-
+
case Intrinsic::ctlz:
CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
break;
@@ -420,7 +420,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
break;
}
-
+
case Intrinsic::get_dynamic_area_offset:
errs() << "WARNING: this target does not support the custom llvm.get."
"dynamic.area.offset. It is being lowered to a constant 0\n";
@@ -473,7 +473,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
case Intrinsic::assume:
case Intrinsic::var_annotation:
break; // Strip out these intrinsics
-
+
case Intrinsic::memcpy: {
Type *IntPtr = DL.getIntPtrType(Context);
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index fea83e92de8f..417bd9d5aebe 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -340,7 +340,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
/// address the spill location in a target independent way.
int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI,
unsigned &Reg) {
- assert(MI.hasOneMemOperand() &&
+ assert(MI.hasOneMemOperand() &&
"Spill instruction does not have exactly one memory operand?");
auto MMOI = MI.memoperands_begin();
const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
@@ -472,7 +472,7 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
int FI;
const MachineMemOperand *MMO;
- // TODO: Handle multiple stores folded into one.
+ // TODO: Handle multiple stores folded into one.
if (!MI.hasOneMemOperand())
return false;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 054cc97f8374..639cd80768fc 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -314,10 +314,10 @@ public:
MMI.deleteMachineFunctionFor(F);
return true;
}
-
+
StringRef getPassName() const override {
return "Free MachineFunction";
- }
+ }
};
} // end anonymous namespace
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
index 28e4e2c6c87a..a712afec0959 100644
--- a/lib/CodeGen/MachineOutliner.cpp
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -620,10 +620,8 @@ struct InstructionMapper {
/// queried for candidates.
///
/// \param MBB The \p MachineBasicBlock to be translated into integers.
- /// \param TRI \p TargetRegisterInfo for the module.
- /// \param TII \p TargetInstrInfo for the module.
+ /// \param TII \p TargetInstrInfo for the function.
void convertToUnsignedVec(MachineBasicBlock &MBB,
- const TargetRegisterInfo &TRI,
const TargetInstrInfo &TII) {
unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB);
@@ -729,7 +727,6 @@ struct MachineOutliner : public ModulePass {
/// its leaf children to find the locations of its substring.
///
/// \param ST A suffix tree to query.
- /// \param TII TargetInstrInfo for the target.
/// \param Mapper Contains outlining mapping information.
/// \param[out] CandidateList Filled with candidates representing each
/// beneficial substring.
@@ -738,7 +735,7 @@ struct MachineOutliner : public ModulePass {
///
/// \returns The length of the longest candidate found.
unsigned
- findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
+ findCandidates(SuffixTree &ST,
InstructionMapper &Mapper,
std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList);
@@ -770,14 +767,12 @@ struct MachineOutliner : public ModulePass {
/// \param[out] FunctionList Filled with functions corresponding to each type
/// of \p Candidate.
/// \param ST The suffix tree for the module.
- /// \param TII TargetInstrInfo for the module.
///
/// \returns The length of the longest candidate found. 0 if there are none.
unsigned
buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList,
- SuffixTree &ST, InstructionMapper &Mapper,
- const TargetInstrInfo &TII);
+ SuffixTree &ST, InstructionMapper &Mapper);
/// Helper function for pruneOverlaps.
/// Removes \p C from the candidate list, and updates its \p OutlinedFunction.
@@ -795,11 +790,9 @@ struct MachineOutliner : public ModulePass {
/// \param[in,out] FunctionList A list of functions to be outlined.
/// \param Mapper Contains instruction mapping info for outlining.
/// \param MaxCandidateLen The length of the longest candidate.
- /// \param TII TargetInstrInfo for the module.
void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList,
- InstructionMapper &Mapper, unsigned MaxCandidateLen,
- const TargetInstrInfo &TII);
+ InstructionMapper &Mapper, unsigned MaxCandidateLen);
/// Construct a suffix tree on the instructions in \p M and outline repeated
/// strings from that tree.
@@ -892,7 +885,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
}
unsigned MachineOutliner::findCandidates(
- SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper,
+ SuffixTree &ST, InstructionMapper &Mapper,
std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList) {
CandidateList.clear();
@@ -945,7 +938,7 @@ unsigned MachineOutliner::findCandidates(
// AA (where each "A" is an instruction).
//
// We might have some portion of the module that looks like this:
- // AAAAAA (6 A's)
+ // AAAAAA (6 A's)
//
// In this case, there are 5 different copies of "AA" in this range, but
// at most 3 can be outlined. If only outlining 3 of these is going to
@@ -979,8 +972,16 @@ unsigned MachineOutliner::findCandidates(
// We've found something we might want to outline.
// Create an OutlinedFunction to store it and check if it'd be beneficial
// to outline.
+ if (CandidatesForRepeatedSeq.empty())
+ continue;
+
+ // Arbitrarily choose a TII from the first candidate.
+ // FIXME: Should getOutliningCandidateInfo move to TargetMachine?
+ const TargetInstrInfo *TII =
+ CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
+
OutlinedFunction OF =
- TII.getOutliningCandidateInfo(CandidatesForRepeatedSeq);
+ TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);
// If we deleted every candidate, then there's nothing to outline.
if (OF.Candidates.empty())
@@ -1036,7 +1037,7 @@ void MachineOutliner::prune(Candidate &C,
void MachineOutliner::pruneOverlaps(
std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper,
- unsigned MaxCandidateLen, const TargetInstrInfo &TII) {
+ unsigned MaxCandidateLen) {
// Return true if this candidate became unbeneficial for outlining in a
// previous step.
@@ -1127,13 +1128,13 @@ void MachineOutliner::pruneOverlaps(
unsigned MachineOutliner::buildCandidateList(
std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST,
- InstructionMapper &Mapper, const TargetInstrInfo &TII) {
+ InstructionMapper &Mapper) {
std::vector<unsigned> CandidateSequence; // Current outlining candidate.
unsigned MaxCandidateLen = 0; // Length of the longest candidate.
MaxCandidateLen =
- findCandidates(ST, TII, Mapper, CandidateList, FunctionList);
+ findCandidates(ST, Mapper, CandidateList, FunctionList);
// Sort the candidates in decending order. This will simplify the outlining
// process when we have to remove the candidates from the mapping by
@@ -1339,10 +1340,6 @@ bool MachineOutliner::runOnModule(Module &M) {
return false;
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
- const TargetSubtargetInfo &STI =
- MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget();
- const TargetRegisterInfo *TRI = STI.getRegisterInfo();
- const TargetInstrInfo *TII = STI.getInstrInfo();
// If the user passed -enable-machine-outliner=always or
// -enable-machine-outliner, the pass will run on all functions in the module.
@@ -1382,6 +1379,8 @@ bool MachineOutliner::runOnModule(Module &M) {
if (!MF)
continue;
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF))
continue;
@@ -1405,7 +1404,7 @@ bool MachineOutliner::runOnModule(Module &M) {
continue;
// MBB is suitable for outlining. Map it to a list of unsigneds.
- Mapper.convertToUnsignedVec(MBB, *TRI, *TII);
+ Mapper.convertToUnsignedVec(MBB, *TII);
}
}
@@ -1416,10 +1415,10 @@ bool MachineOutliner::runOnModule(Module &M) {
// Find all of the outlining candidates.
unsigned MaxCandidateLen =
- buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
+ buildCandidateList(CandidateList, FunctionList, ST, Mapper);
// Remove candidates that overlap with other candidates.
- pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII);
+ pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen);
// Outline each of the candidates and return true if something was outlined.
bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper);
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 6095bdd06b69..f632a9bd457f 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -383,7 +383,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
assert(FromReg != ToReg && "Cannot replace a reg with itself");
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
-
+
// TODO: This could be more efficient by bulk changing the operands.
for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
MachineOperand &O = *I;
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 773661965f18..542491eabbf2 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -254,14 +254,14 @@ public:
private:
MachineInstr *PHI;
unsigned idx;
-
+
public:
explicit PHI_iterator(MachineInstr *P) // begin iterator
: PHI(P), idx(1) {}
PHI_iterator(MachineInstr *P, bool) // end iterator
: PHI(P), idx(PHI->getNumOperands()) {}
- PHI_iterator &operator++() { idx += 2; return *this; }
+ PHI_iterator &operator++() { idx += 2; return *this; }
bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 354f46e9e625..1fd40f757351 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -509,7 +509,7 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
}
ToSplit.insert(std::make_pair(FromBB, ToBB));
-
+
return true;
}
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index b444cd31eba2..79ca6adf95c4 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -655,7 +655,7 @@ static bool getDataDeps(const MachineInstr &UseMI,
// Debug values should not be included in any calculations.
if (UseMI.isDebugInstr())
return false;
-
+
bool HasPhysRegs = false;
for (MachineInstr::const_mop_iterator I = UseMI.operands_begin(),
E = UseMI.operands_end(); I != E; ++I) {
@@ -1167,7 +1167,7 @@ MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
computeInstrDepths(MBB);
if (!TBI.HasValidInstrHeights)
computeInstrHeights(MBB);
-
+
return Trace(*this, TBI);
}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index d644e41abc5b..318776136e24 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -1077,8 +1077,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
auto VerifyStackMapConstant = [&](unsigned Offset) {
if (!MI->getOperand(Offset).isImm() ||
- MI->getOperand(Offset).getImm() != StackMaps::ConstantOp ||
- !MI->getOperand(Offset + 1).isImm())
+ MI->getOperand(Offset).getImm() != StackMaps::ConstantOp ||
+ !MI->getOperand(Offset + 1).isImm())
report("stack map constant to STATEPOINT not well formed!", MI);
};
const unsigned VarStart = StatepointOpers(MI).getVarIdx();
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index a878c34f9aa4..3660586c1358 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -594,7 +594,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator ReloadAfter =
RestoreAfter ? std::next(MBBI) : MBBI;
MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
- LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
+ if (ReloadBefore != MBB.end())
+ LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
Scavenged.Restore = &*std::prev(SpillBefore);
LiveUnits.removeReg(Reg);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7a99687757f8..a8c4b85df321 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -72,7 +72,6 @@
#include <string>
#include <tuple>
#include <utility>
-#include <vector>
using namespace llvm;
@@ -483,9 +482,6 @@ namespace {
/// returns false.
bool findBetterNeighborChains(StoreSDNode *St);
- /// Match "(X shl/srl V1) & V2" where V2 may not be present.
- bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
-
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
@@ -2671,6 +2667,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
N0.getOperand(1).getOperand(0));
+ // fold (A-(B-C)) -> A+(C-B)
+ if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
+ return DAG.getNode(ISD::ADD, DL, VT, N0,
+ DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
+ N1.getOperand(0)));
+
// fold (X - (-Y * Z)) -> (X + (Y * Z))
if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
if (N1.getOperand(0).getOpcode() == ISD::SUB &&
@@ -2740,6 +2742,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // Prefer an add for more folding potential and possibly better codegen:
+ // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
+ if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
+ SDValue ShAmt = N1.getOperand(1);
+ ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
+ if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
+ return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
+ }
+ }
+
return SDValue();
}
@@ -4205,8 +4218,8 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
// Allow one node which will masked along with any loads found.
if (NodeToMask)
return false;
-
- // Also ensure that the node to be masked only produces one data result.
+
+ // Also ensure that the node to be masked only produces one data result.
NodeToMask = Op.getNode();
if (NodeToMask->getNumValues() > 1) {
bool HasValue = false;
@@ -5148,25 +5161,140 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return SDValue();
}
-/// Match "(X shl/srl V1) & V2" where V2 may not be present.
-bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
- if (Op.getOpcode() == ISD::AND) {
- if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
- Mask = Op.getOperand(1);
- Op = Op.getOperand(0);
- } else {
- return false;
- }
+static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
+ if (Op.getOpcode() == ISD::AND &&
+ DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
+ Mask = Op.getOperand(1);
+ return Op.getOperand(0);
}
+ return Op;
+}
+/// Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
+ SDValue &Mask) {
+ Op = stripConstantMask(DAG, Op, Mask);
if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
Shift = Op;
return true;
}
-
return false;
}
+/// Helper function for visitOR to extract the needed side of a rotate idiom
+/// from a shl/srl/mul/udiv. This is meant to handle cases where
+/// InstCombine merged some outside op with one of the shifts from
+/// the rotate pattern.
+/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
+/// Otherwise, returns an expansion of \p ExtractFrom based on the following
+/// patterns:
+///
+/// (or (mul v c0) (shrl (mul v c1) c2)):
+/// expands (mul v c0) -> (shl (mul v c1) c3)
+///
+/// (or (udiv v c0) (shl (udiv v c1) c2)):
+/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
+///
+/// (or (shl v c0) (shrl (shl v c1) c2)):
+/// expands (shl v c0) -> (shl (shl v c1) c3)
+///
+/// (or (shrl v c0) (shl (shrl v c1) c2)):
+/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
+///
+/// Such that in all cases, c3+c2==bitwidth(op v c1).
+static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
+ SDValue ExtractFrom, SDValue &Mask,
+ const SDLoc &DL) {
+ assert(OppShift && ExtractFrom && "Empty SDValue");
+ assert(
+ (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
+ "Existing shift must be valid as a rotate half");
+
+ ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
+ // Preconditions:
+ // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
+ //
+ // Find opcode of the needed shift to be extracted from (op0 v c0).
+ unsigned Opcode = ISD::DELETED_NODE;
+ bool IsMulOrDiv = false;
+ // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
+ // opcode or its arithmetic (mul or udiv) variant.
+ auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
+ IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
+ if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
+ return false;
+ Opcode = NeededShift;
+ return true;
+ };
+ // op0 must be either the needed shift opcode or the mul/udiv equivalent
+ // that the needed shift can be extracted from.
+ if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
+ (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
+ return SDValue();
+
+ // op0 must be the same opcode on both sides, have the same LHS argument,
+ // and produce the same value type.
+ SDValue OppShiftLHS = OppShift.getOperand(0);
+ EVT ShiftedVT = OppShiftLHS.getValueType();
+ if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
+ OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
+ ShiftedVT != ExtractFrom.getValueType())
+ return SDValue();
+
+ // Amount of the existing shift.
+ ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
+ // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
+ ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
+ // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
+ ConstantSDNode *ExtractFromCst =
+ isConstOrConstSplat(ExtractFrom.getOperand(1));
+ // TODO: We should be able to handle non-uniform constant vectors for these values
+ // Check that we have constant values.
+ if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
+ !OppLHSCst || !OppLHSCst->getAPIntValue() ||
+ !ExtractFromCst || !ExtractFromCst->getAPIntValue())
+ return SDValue();
+
+ // Compute the shift amount we need to extract to complete the rotate.
+ const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
+ APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
+ if (NeededShiftAmt.isNegative())
+ return SDValue();
+ // Normalize the bitwidth of the two mul/udiv/shift constant operands.
+ APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
+ APInt OppLHSAmt = OppLHSCst->getAPIntValue();
+ zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
+
+ // Now try extract the needed shift from the ExtractFrom op and see if the
+ // result matches up with the existing shift's LHS op.
+ if (IsMulOrDiv) {
+ // Op to extract from is a mul or udiv by a constant.
+ // Check:
+ // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
+ // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
+ const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
+ NeededShiftAmt.getZExtValue());
+ APInt ResultAmt;
+ APInt Rem;
+ APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
+ if (Rem != 0 || ResultAmt != OppLHSAmt)
+ return SDValue();
+ } else {
+ // Op to extract from is a shift by a constant.
+ // Check:
+ // c2 - (bitwidth(op0 v c0) - c1) == c0
+ if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
+ ExtractFromAmt.getBitWidth()))
+ return SDValue();
+ }
+
+ // Return the expanded shift op that should allow a rotate to be formed.
+ EVT ShiftVT = OppShift.getOperand(1).getValueType();
+ EVT ResVT = ExtractFrom.getValueType();
+ SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
+ return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
+}
+
// Return true if we can prove that, whenever Neg and Pos are both in the
// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
@@ -5333,13 +5461,40 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// Match "(X shl/srl V1) & V2" where V2 may not be present.
SDValue LHSShift; // The shift.
SDValue LHSMask; // AND value if any.
- if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
- return nullptr; // Not part of a rotate.
+ matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
SDValue RHSShift; // The shift.
SDValue RHSMask; // AND value if any.
- if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
- return nullptr; // Not part of a rotate.
+ matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
+
+ // If neither side matched a rotate half, bail
+ if (!LHSShift && !RHSShift)
+ return nullptr;
+
+ // InstCombine may have combined a constant shl, srl, mul, or udiv with one
+ // side of the rotate, so try to handle that here. In all cases we need to
+ // pass the matched shift from the opposite side to compute the opcode and
+ // needed shift amount to extract. We still want to do this if both sides
+ // matched a rotate half because one half may be a potential overshift that
+ // can be broken down (ie if InstCombine merged two shl or srl ops into a
+ // single one).
+
+ // Have LHS side of the rotate, try to extract the needed shift from the RHS.
+ if (LHSShift)
+ if (SDValue NewRHSShift =
+ extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
+ RHSShift = NewRHSShift;
+ // Have RHS side of the rotate, try to extract the needed shift from the LHS.
+ if (RHSShift)
+ if (SDValue NewLHSShift =
+ extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
+ LHSShift = NewLHSShift;
+
+ // If a side is still missing, nothing else we can do.
+ if (!RHSShift || !LHSShift)
+ return nullptr;
+
+ // At this point we've matched or extracted a shift op on each side.
if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
return nullptr; // Not shifting the same value.
@@ -10270,7 +10425,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N10.getOperand(0))),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(1)),
- N0, Flags);
+ N0, Flags);
}
}
@@ -10333,7 +10488,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ N1), Flags), Flags);
}
// fold (fsub x, (fma y, z, (fmul u, v)))
@@ -10348,7 +10503,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N20),
- N21, N0, Flags), Flags);
+ N21, N0, Flags), Flags);
}
@@ -10368,7 +10523,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N020.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ N1), Flags), Flags);
}
}
}
@@ -10396,7 +10551,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N002.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ N1), Flags), Flags);
}
}
}
@@ -10419,7 +10574,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
VT, N1200)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N1201),
- N0, Flags), Flags);
+ N0, Flags), Flags);
}
}
@@ -10450,7 +10605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
VT, N1020)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N1021),
- N0, Flags), Flags);
+ N0, Flags), Flags);
}
}
}
@@ -10506,7 +10661,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
Y, Flags);
if (XC1 && XC1->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
}
return SDValue();
};
@@ -10530,7 +10685,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
if (XC0 && XC0->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
if (XC1 && XC1->isExactlyValue(+1.0))
@@ -10838,12 +10993,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (Options.UnsafeFPMath ||
+ if (Options.UnsafeFPMath ||
(Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
// fold (fmul A, 0) -> 0
if (N1CFP && N1CFP->isZero())
return N1;
- }
+ }
if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
// fmul (fmul X, C1), C2 -> fmul X, C1 * C2
@@ -11258,7 +11413,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
SDNodeFlags Flags = N->getFlags();
- if (!DAG.getTarget().Options.UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!Flags.hasApproximateFuncs())
return SDValue();
@@ -17913,9 +18068,9 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
if (C->isNullValue())
return SDValue();
- std::vector<SDNode *> Built;
+ SmallVector<SDNode *, 8> Built;
SDValue S =
- TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
+ TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
for (SDNode *N : Built)
AddToWorklist(N);
@@ -17933,8 +18088,8 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
if (C->isNullValue())
return SDValue();
- std::vector<SDNode *> Built;
- SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
+ SmallVector<SDNode *, 8> Built;
+ SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built);
for (SDNode *N : Built)
AddToWorklist(N);
@@ -17959,9 +18114,9 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
if (C->isNullValue())
return SDValue();
- std::vector<SDNode *> Built;
+ SmallVector<SDNode *, 8> Built;
SDValue S =
- TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
+ TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
for (SDNode *N : Built)
AddToWorklist(N);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e4a9d557d386..795ade588b8f 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1130,7 +1130,7 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys);
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL);
+ GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL);
bool CanLowerReturn = TLI.CanLowerReturn(
CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext());
@@ -1548,7 +1548,7 @@ void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
{
MachineInstr *CurLastLocalValue = getLastLocalValue();
if (CurLastLocalValue != SavedLastLocalValue) {
- // Find the first local value instruction to be deleted.
+ // Find the first local value instruction to be deleted.
// This is the instruction after SavedLastLocalValue if it is non-NULL.
// Otherwise it's the first instruction in the block.
MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue);
@@ -1569,7 +1569,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
// PHI node handling may have generated local value instructions,
// even though it failed to handle all PHI nodes.
- // We remove these instructions because SelectionDAGISel will generate
+ // We remove these instructions because SelectionDAGISel will generate
// them again.
removeDeadLocalValueCode(SavedLastLocalValue);
return false;
@@ -1630,7 +1630,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
DbgLoc = DebugLoc();
// Undo phi node updates, because they will be added again by SelectionDAG.
if (isa<TerminatorInst>(I)) {
- // PHI node handling may have generated local value instructions.
+ // PHI node handling may have generated local value instructions.
// We remove them because SelectionDAGISel will generate them again.
removeDeadLocalValueCode(SavedLastLocalValue);
FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 42c7181dac41..d3c31911d677 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -89,10 +89,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI,
+ CallingConv::ID CC = Fn->getCallingConv();
+
+ GetReturnInfo(CC, Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI,
mf.getDataLayout());
- CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF,
- Fn->isVarArg(), Outs, Fn->getContext());
+ CanLowerReturn =
+ TLI->CanLowerReturn(CC, *MF, Fn->isVarArg(), Outs, Fn->getContext());
// If this personality uses funclets, we need to do a bit more work.
DenseMap<const AllocaInst *, TinyPtrVector<int *>> CatchObjects;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b0ae1e0399fb..9aa0ea15f3b7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -153,7 +153,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
// of Endianness. LLVM's APFloat representation is not Endian sensitive,
// and so always converts into a 128-bit APInt in a non-Endian-sensitive
// way. However, APInt's are serialized in an Endian-sensitive fashion,
- // so on big-Endian targets, the two doubles are output in the wrong
+ // so on big-Endian targets, the two doubles are output in the wrong
// order. Fix this by manually flipping the order of the high 64 bits
// and the low 64 bits here.
if (DAG.getDataLayout().isBigEndian() &&
@@ -815,7 +815,7 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
switch (N->getOpcode()) {
case ISD::ConstantFP: // Leaf node.
- case ISD::CopyFromReg: // Operand is a register that we know to be left
+ case ISD::CopyFromReg: // Operand is a register that we know to be left
// unchanged by SoftenFloatResult().
case ISD::Register: // Leaf node.
return true;
@@ -838,7 +838,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) {
if (N->getNumOperands() == 3)
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0);
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2,
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2,
N->getOperand(3)),
0);
}
@@ -1898,7 +1898,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
- case ISD::FTRUNC: R = PromoteFloatRes_UnaryOp(N); break;
+ case ISD::FTRUNC:
+ case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break;
// Binary FP Operations
case ISD::FADD:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 2c6b1ee7900f..135922d6f267 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -510,7 +510,7 @@ private:
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
// Return true if we can skip softening the given operand or SDNode because
- // either it was soften before by SoftenFloatResult and references to the
+ // either it was soften before by SoftenFloatResult and references to the
// operand were replaced by ReplaceValueWith or it's value type is legal in HW
// registers and the operand can be left unchanged.
bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 67928d4bdbd5..3a98a7a904cb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -131,7 +131,7 @@ class VectorLegalizer {
SDValue ExpandCTLZ(SDValue Op);
SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
SDValue ExpandStrictFPOp(SDValue Op);
-
+
/// Implements vector promotion.
///
/// This is essentially just bitcasting the operands to a different type and
@@ -315,7 +315,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
// ISD::STRICT_FSQRT.
- Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
Node->getValueType(0));
break;
case ISD::ADD:
@@ -397,12 +397,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
case ISD::FP_ROUND_INREG:
- Action = TLI.getOperationAction(Node->getOpcode(),
+ Action = TLI.getOperationAction(Node->getOpcode(),
cast<VTSDNode>(Node->getOperand(1))->getVT());
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- Action = TLI.getOperationAction(Node->getOpcode(),
+ Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
case ISD::MSCATTER:
@@ -736,7 +736,7 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::CTTZ_ZERO_UNDEF:
return ExpandCTTZ_ZERO_UNDEF(Op);
case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
+ case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
case ISD::STRICT_FSQRT:
@@ -1153,24 +1153,24 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
SmallVector<SDValue, 32> OpChains;
for (unsigned i = 0; i < NumElems; ++i) {
SmallVector<SDValue, 4> Opers;
- SDValue Idx = DAG.getConstant(i, dl,
+ SDValue Idx = DAG.getConstant(i, dl,
TLI.getVectorIdxTy(DAG.getDataLayout()));
// The Chain is the first operand.
Opers.push_back(Chain);
- // Now process the remaining operands.
+ // Now process the remaining operands.
for (unsigned j = 1; j < NumOpers; ++j) {
SDValue Oper = Op.getOperand(j);
EVT OperVT = Oper.getValueType();
if (OperVT.isVector())
- Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
EltVT, Oper, Idx);
Opers.push_back(Oper);
}
-
+
SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
OpValues.push_back(ScalarOp.getValue(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 1cd43ace48f3..f5d9dd234afd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1068,14 +1068,14 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
OpsLo.push_back(Chain);
OpsHi.push_back(Chain);
- // Now process the remaining operands.
+ // Now process the remaining operands.
for (unsigned i = 1; i < NumOps; ++i) {
- SDValue Op = N->getOperand(i);
- SDValue OpLo = Op;
- SDValue OpHi = Op;
+ SDValue Op = N->getOperand(i);
+ SDValue OpLo = Op;
+ SDValue OpHi = Op;
EVT InVT = Op.getValueType();
- if (InVT.isVector()) {
+ if (InVT.isVector()) {
// If the input also splits, handle it directly for a
// compile time speedup. Otherwise split it by hand.
if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
@@ -1092,10 +1092,10 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
EVT HiValueVTs[] = {HiVT, MVT::Other};
Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo);
Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi);
-
+
// Build a factor node to remember that this Op is independent of the
// other one.
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Lo.getValue(1), Hi.getValue(1));
// Legalize the chain result - switch anything that used the old chain to
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 1aa8df29af3b..5f6b6010cae2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -157,31 +157,36 @@ static cl::opt<unsigned> SwitchPeelThreshold(
// store [4096 x i8] %data, [4096 x i8]* %buffer
static const unsigned MaxParallelChains = 64;
-// True if the Value passed requires ABI mangling as it is a parameter to a
-// function or a return value from a function which is not an intrinsic.
-static bool isABIRegCopy(const Value *V) {
- const bool IsRetInst = V && isa<ReturnInst>(V);
- const bool IsCallInst = V && isa<CallInst>(V);
- const bool IsInLineAsm =
- IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm();
- const bool IsIndirectFunctionCall =
- IsCallInst && !IsInLineAsm &&
- !static_cast<const CallInst *>(V)->getCalledFunction();
- // It is possible that the call instruction is an inline asm statement or an
- // indirect function call in which case the return value of
- // getCalledFunction() would be nullptr.
- const bool IsInstrinsicCall =
- IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall &&
- static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() !=
- Intrinsic::not_intrinsic;
-
- return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall));
+// Return the calling convention if the Value passed requires ABI mangling as it
+// is a parameter to a function or a return value from a function which is not
+// an intrinsic.
+static Optional<CallingConv::ID> getABIRegCopyCC(const Value *V) {
+ if (auto *R = dyn_cast<ReturnInst>(V))
+ return R->getParent()->getParent()->getCallingConv();
+
+ if (auto *CI = dyn_cast<CallInst>(V)) {
+ const bool IsInlineAsm = CI->isInlineAsm();
+ const bool IsIndirectFunctionCall =
+ !IsInlineAsm && !CI->getCalledFunction();
+
+ // It is possible that the call instruction is an inline asm statement or an
+ // indirect function call in which case the return value of
+ // getCalledFunction() would be nullptr.
+ const bool IsInstrinsicCall =
+ !IsInlineAsm && !IsIndirectFunctionCall &&
+ CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic;
+
+ if (!IsInlineAsm && !IsInstrinsicCall)
+ return CI->getCallingConv();
+ }
+
+ return None;
}
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
- bool IsABIRegCopy);
+ Optional<CallingConv::ID> CC);
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
@@ -191,11 +196,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
- Optional<ISD::NodeType> AssertOp = None,
- bool IsABIRegCopy = false) {
+ Optional<CallingConv::ID> CC = None,
+ Optional<ISD::NodeType> AssertOp = None) {
if (ValueVT.isVector())
- return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
- PartVT, ValueVT, V, IsABIRegCopy);
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
+ CC);
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -236,8 +241,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
// Assemble the trailing non-power-of-2 part.
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
- Hi = getCopyFromParts(DAG, DL,
- Parts + RoundParts, OddParts, PartVT, OddVT, V);
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
+ OddVT, V, CC);
// Combine the round and odd parts.
Lo = Val;
@@ -267,7 +272,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
- Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
}
}
@@ -340,9 +345,11 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
- bool IsABIRegCopy) {
+ Optional<CallingConv::ID> CallConv) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
+ const bool IsABIRegCopy = CallConv.hasValue();
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
@@ -355,8 +362,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
- *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
- RegisterVT);
+ *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
} else {
NumRegs =
TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
@@ -470,7 +477,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
- MVT PartVT, const Value *V, bool IsABIRegCopy);
+ MVT PartVT, const Value *V,
+ Optional<CallingConv::ID> CallConv);
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
@@ -478,14 +486,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
SDValue *Parts, unsigned NumParts, MVT PartVT,
const Value *V,
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
- bool IsABIRegCopy = false) {
+ Optional<CallingConv::ID> CallConv = None,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
EVT ValueVT = Val.getValueType();
// Handle the vector case separately.
if (ValueVT.isVector())
return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
- IsABIRegCopy);
+ CallConv);
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
@@ -564,7 +572,8 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
DAG.getIntPtrConstant(RoundBits, DL));
- getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
+ CallConv);
if (DAG.getDataLayout().isBigEndian())
// The odd parts were reversed by getCopyToParts - unreverse them.
@@ -605,16 +614,16 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
std::reverse(Parts, Parts + OrigNumParts);
}
-
/// getCopyToPartsVector - Create a series of nodes that contain the specified
/// value split into legal parts.
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V,
- bool IsABIRegCopy) {
+ Optional<CallingConv::ID> CallConv) {
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const bool IsABIRegCopy = CallConv.hasValue();
if (NumParts == 1) {
EVT PartEVT = PartVT;
@@ -679,8 +688,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
unsigned NumRegs;
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
- *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
- RegisterVT);
+ *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
} else {
NumRegs =
TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
@@ -720,7 +729,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// If the register was not expanded, promote or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
- getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
@@ -729,29 +738,32 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
- getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V,
+ CallConv);
}
}
RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
- EVT valuevt, bool IsABIMangledValue)
+ EVT valuevt, Optional<CallingConv::ID> CC)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
- RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
+ RegCount(1, regs.size()), CallConv(CC) {}
RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty,
- bool IsABIMangledValue) {
+ Optional<CallingConv::ID> CC) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
- IsABIMangled = IsABIMangledValue;
+ CallConv = CC;
for (EVT ValueVT : ValueVTs) {
- unsigned NumRegs = IsABIMangledValue
- ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
- : TLI.getNumRegisters(Context, ValueVT);
- MVT RegisterVT = IsABIMangledValue
- ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
- : TLI.getRegisterType(Context, ValueVT);
+ unsigned NumRegs =
+ isABIMangled()
+ ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT)
+ : TLI.getNumRegisters(Context, ValueVT);
+ MVT RegisterVT =
+ isABIMangled()
+ ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT)
+ : TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(Reg + i);
RegVTs.push_back(RegisterVT);
@@ -777,9 +789,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// Copy the legal parts from the registers.
EVT ValueVT = ValueVTs[Value];
unsigned NumRegs = RegCount[Value];
- MVT RegisterVT = IsABIMangled
- ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value])
- : RegVTs[Value];
+ MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(),
+ CallConv.getValue(), RegVTs[Value])
+ : RegVTs[Value];
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
@@ -837,8 +850,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
RegisterVT, P, DAG.getValueType(FromVT));
}
- Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
- NumRegs, RegisterVT, ValueVT, V);
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
+ RegisterVT, ValueVT, V, CallConv);
Part += NumRegs;
Parts.clear();
}
@@ -859,15 +872,16 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
unsigned NumParts = RegCount[Value];
- MVT RegisterVT = IsABIMangled
- ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value])
- : RegVTs[Value];
+ MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(),
+ CallConv.getValue(), RegVTs[Value])
+ : RegVTs[Value];
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
- getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
- &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
+ NumParts, RegisterVT, V, CallConv, ExtendKind);
Part += NumParts;
}
@@ -1164,7 +1178,7 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V));
+ DAG.getDataLayout(), InReg, Ty, getABIRegCopyCC(V));
SDValue Chain = DAG.getEntryNode();
Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
V);
@@ -1355,7 +1369,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
- Inst->getType(), isABIRegCopy(V));
+ Inst->getType(), getABIRegCopyCC(V));
SDValue Chain = DAG.getEntryNode();
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
@@ -1589,12 +1603,14 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
- unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
- MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
+ CallingConv::ID CC = F->getCallingConv();
+
+ unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
+ MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
- &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
+ &Parts[0], NumParts, PartVT, &I, CC, ExtendKind);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -4929,7 +4945,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (VMI != FuncInfo.ValueMap.end()) {
const auto &TLI = DAG.getTargetLoweringInfo();
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
- V->getType(), isABIRegCopy(V));
+ V->getType(), getABIRegCopyCC(V));
if (RFV.occupiesMultipleRegs()) {
unsigned Offset = 0;
for (auto RegAndSize : RFV.getRegsAndSizes()) {
@@ -4971,7 +4987,7 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
unsigned DbgSDNodeOrder) {
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
- // stack slot locations.
+ // stack slot locations.
//
// Consider "int x = 0; int *px = &x;". There are two kinds of interesting
// debug values here after optimization:
@@ -5288,7 +5304,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// The PHI node may be split up into several MI PHI nodes (in
// FunctionLoweringInfo::set).
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
- V->getType(), false);
+ V->getType(), None);
if (RFV.occupiesMultipleRegs()) {
unsigned Offset = 0;
unsigned BitsToDescribe = 0;
@@ -7182,10 +7198,11 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
/// uses features that we can't model on machineinstrs, we have SDISel do the
/// allocation. This produces generally horrible, but correct, code.
///
-/// OpInfo describes the operand.
+/// OpInfo describes the operand
+/// RefOpInfo describes the matching operand if any, the operand otherwise
static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
- const SDLoc &DL,
- SDISelAsmOperandInfo &OpInfo) {
+ const SDLoc &DL, SDISelAsmOperandInfo &OpInfo,
+ SDISelAsmOperandInfo &RefOpInfo) {
LLVMContext &Context = *DAG.getContext();
MachineFunction &MF = DAG.getMachineFunction();
@@ -7195,8 +7212,8 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
std::pair<unsigned, const TargetRegisterClass *> PhysReg =
- TLI.getRegForInlineAsmConstraint(&TRI, OpInfo.ConstraintCode,
- OpInfo.ConstraintVT);
+ TLI.getRegForInlineAsmConstraint(&TRI, RefOpInfo.ConstraintCode,
+ RefOpInfo.ConstraintVT);
unsigned NumRegs = 1;
if (OpInfo.ConstraintVT != MVT::Other) {
@@ -7238,6 +7255,11 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
}
+ // No need to allocate a matching input constraint since the constraint it's
+ // matching to has already been allocated.
+ if (OpInfo.isMatchingInputConstraint())
+ return;
+
MVT RegVT;
EVT ValueVT = OpInfo.ConstraintVT;
@@ -7486,19 +7508,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this constraint is for a specific register, allocate it before
// anything else.
- if (OpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
+ SDISelAsmOperandInfo &RefOpInfo =
+ OpInfo.isMatchingInputConstraint()
+ ? ConstraintOperands[OpInfo.getMatchedOperand()]
+ : ConstraintOperands[i];
+ if (RefOpInfo.ConstraintType == TargetLowering::C_Register)
+ GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);
}
// Third pass - Loop over all of the operands, assigning virtual or physregs
// to register class operands.
for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+ SDISelAsmOperandInfo &RefOpInfo =
+ OpInfo.isMatchingInputConstraint()
+ ? ConstraintOperands[OpInfo.getMatchedOperand()]
+ : ConstraintOperands[i];
// C_Register operands have already been allocated, Other/Memory don't need
// to be.
- if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
+ if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+ GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -8289,7 +8319,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
+ GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
bool CanLowerReturn =
this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
@@ -8305,7 +8335,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
MachineFunction &MF = CLI.DAG.getMachineFunction();
DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
- Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
+ Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
+ DL.getAllocaAddrSpace());
DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
ArgListEntry Entry;
@@ -8331,10 +8362,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
} else {
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
- MVT RegisterVT =
- getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
- unsigned NumRegs =
- getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
+ MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
MyFlags.VT = RegisterVT;
@@ -8443,9 +8474,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setInConsecutiveRegs();
Flags.setOrigAlign(OriginalAlignment);
- MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
- unsigned NumParts =
- getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
+ MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
SmallVector<SDValue, 4> Parts(NumParts);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
@@ -8477,7 +8509,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
- CLI.CS.getInstruction(), ExtendKind, true);
+ CLI.CS.getInstruction(), CLI.CallConv, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
@@ -8577,14 +8609,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned CurReg = 0;
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
- MVT RegisterVT =
- getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
- unsigned NumRegs =
- getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
+ MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
+ unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
+ CLI.CallConv, VT);
ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
NumRegs, RegisterVT, VT, nullptr,
- AssertOp, true));
+ CLI.CallConv, AssertOp));
CurReg += NumRegs;
}
@@ -8623,8 +8655,8 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
// If this is an InlineAsm we have to match the registers required, not the
// notional registers required by the type.
- RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
- V->getType(), isABIRegCopy(V));
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
+ getABIRegCopyCC(V));
SDValue Chain = DAG.getEntryNode();
ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@@ -8937,10 +8969,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (ArgCopyElisionCandidates.count(&Arg))
Flags.setCopyElisionCandidate();
- MVT RegisterVT =
- TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
- unsigned NumRegs =
- TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
+ MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
+ *CurDAG->getContext(), F.getCallingConv(), VT);
+ unsigned NumRegs = TLI->getNumRegistersForCallingConv(
+ *CurDAG->getContext(), F.getCallingConv(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
ArgNo, PartBase+i*RegisterVT.getStoreSize());
@@ -8995,8 +9027,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
Optional<ISD::NodeType> AssertOp = None;
- SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
- RegVT, VT, nullptr, AssertOp);
+ SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
+ nullptr, F.getCallingConv(), AssertOp);
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
@@ -9046,10 +9078,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
for (unsigned Val = 0; Val != NumValues; ++Val) {
EVT VT = ValueVTs[Val];
- MVT PartVT =
- TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
- unsigned NumParts =
- TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
+ MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(),
+ F.getCallingConv(), VT);
+ unsigned NumParts = TLI->getNumRegistersForCallingConv(
+ *CurDAG->getContext(), F.getCallingConv(), VT);
// Even an apparant 'unused' swifterror argument needs to be returned. So
// we do generate a copy for it that can be used on return from the
@@ -9062,8 +9094,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
AssertOp = ISD::AssertZext;
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
- PartVT, VT, nullptr, AssertOp,
- true));
+ PartVT, VT, nullptr,
+ F.getCallingConv(), AssertOp));
}
i += NumParts;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index e421984b8af2..4b5dda982f1b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -1015,14 +1015,18 @@ struct RegsForValue {
/// Records if this value needs to be treated in an ABI dependant manner,
/// different to normal type legalization.
- bool IsABIMangled = false;
+ Optional<CallingConv::ID> CallConv;
RegsForValue() = default;
RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
- bool IsABIMangledValue = false);
+ Optional<CallingConv::ID> CC = None);
RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty,
- bool IsABIMangledValue = false);
+ Optional<CallingConv::ID> CC);
+
+ bool isABIMangled() const {
+ return CallConv.hasValue();
+ }
/// Add the specified values to this one.
void append(const RegsForValue &RHS) {
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 5cf06e62b80c..54cbd6859f70 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -419,10 +419,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
Builder.getFrameIndexTy()));
} else if (LiveInOnly) {
// If this value is live in (not live-on-return, or live-through), we can
- // treat it the same way patchpoint treats it's "live in" values. We'll
- // end up folding some of these into stack references, but they'll be
+ // treat it the same way patchpoint treats it's "live in" values. We'll
+ // end up folding some of these into stack references, but they'll be
// handled by the register allocator. Note that we do not have the notion
- // of a late use so these values might be placed in registers which are
+ // of a late use so these values might be placed in registers which are
// clobbered by the call. This is fine for live-in.
Ops.push_back(Incoming);
} else {
@@ -498,7 +498,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
auto isGCValue =[&](const Value *V) {
return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V);
};
-
+
// Before we actually start lowering (and allocating spill slots for values),
// reserve any stack slots which we judge to be profitable to reuse for a
// particular value. This is purely an optimization over the code below and
@@ -861,7 +861,8 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
// completely and make statepoint call to return a tuple.
unsigned Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), Reg, RetTy, true);
+ DAG.getDataLayout(), Reg, RetTy,
+ ISP.getCallSite().getCallingConv());
SDValue Chain = DAG.getEntryNode();
RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index fa867fcec366..e317268fa5f4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3421,7 +3421,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
/// with the multiplicative inverse of the constant.
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
const SDLoc &dl, SelectionDAG &DAG,
- std::vector<SDNode *> &Created) {
+ SmallVectorImpl<SDNode *> &Created) {
assert(d != 0 && "Division by zero!");
// Shift the value upfront if it is even, so the LSB is one.
@@ -3450,8 +3450,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
}
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
- SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const {
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isIntDivCheap(N->getValueType(0), Attr))
@@ -3465,9 +3465,7 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG, bool IsAfterLegalization,
- std::vector<SDNode *> *Created) const {
- assert(Created && "No vector to hold sdiv ops.");
-
+ SmallVectorImpl<SDNode *> &Created) const {
EVT VT = N->getValueType(0);
SDLoc dl(N);
@@ -3478,7 +3476,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
// If the sdiv has an 'exact' bit we can use a simpler lowering.
if (N->getFlags().hasExact())
- return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created);
+ return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, Created);
APInt::ms magics = Divisor.magic();
@@ -3496,15 +3494,18 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
else
return SDValue(); // No mulhs or equvialent
+
+ Created.push_back(Q.getNode());
+
// If d > 0 and m < 0, add the numerator
if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
- Created->push_back(Q.getNode());
+ Created.push_back(Q.getNode());
}
// If d < 0 and m > 0, subtract the numerator.
if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
- Created->push_back(Q.getNode());
+ Created.push_back(Q.getNode());
}
auto &DL = DAG.getDataLayout();
// Shift right algebraic if shift value is nonzero
@@ -3512,14 +3513,14 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
Q = DAG.getNode(
ISD::SRA, dl, VT, Q,
DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
- Created->push_back(Q.getNode());
+ Created.push_back(Q.getNode());
}
// Extract the sign bit and add it to the quotient
SDValue T =
DAG.getNode(ISD::SRL, dl, VT, Q,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
getShiftAmountTy(Q.getValueType(), DL)));
- Created->push_back(T.getNode());
+ Created.push_back(T.getNode());
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
}
@@ -3529,9 +3530,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG, bool IsAfterLegalization,
- std::vector<SDNode *> *Created) const {
- assert(Created && "No vector to hold udiv ops.");
-
+ SmallVectorImpl<SDNode *> &Created) const {
EVT VT = N->getValueType(0);
SDLoc dl(N);
auto &DL = DAG.getDataLayout();
@@ -3554,7 +3553,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
Q = DAG.getNode(
ISD::SRL, dl, VT, Q,
DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL)));
- Created->push_back(Q.getNode());
+ Created.push_back(Q.getNode());
// Get magic number for the shifted divisor.
magics = Divisor.lshr(Shift).magicu(Shift);
@@ -3573,7 +3572,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
else
return SDValue(); // No mulhu or equivalent
- Created->push_back(Q.getNode());
+ Created.push_back(Q.getNode());
if (magics.a == 0) {
assert(magics.s < Divisor.getBitWidth() &&
@@ -3583,13 +3582,13 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
} else {
SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
- Created->push_back(NPQ.getNode());
+ Created.push_back(NPQ.getNode());
NPQ = DAG.getNode(
ISD::SRL, dl, VT, NPQ,
DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL)));
- Created->push_back(NPQ.getNode());
+ Created.push_back(NPQ.getNode());
NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
- Created->push_back(NPQ.getNode());
+ Created.push_back(NPQ.getNode());
return DAG.getNode(
ISD::SRL, dl, VT, NPQ,
DAG.getConstant(magics.s - 1, dl,
@@ -3994,7 +3993,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
// Scalarize the load and let the individual components be handled.
SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
- return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
+ return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
}
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index 25d405bf63de..3e12b32b12d4 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -175,7 +175,7 @@ bool ShadowStackGCLowering::doInitialization(Module &M) {
}
if (!Active)
return false;
-
+
// struct FrameMap {
// int32_t NumRoots; // Number of roots in stack frame.
// int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
@@ -286,7 +286,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
if (!F.hasGC() ||
F.getGC() != std::string("shadow-stack"))
return false;
-
+
LLVMContext &Context = F.getContext();
// Find calls to llvm.gcroot.
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index ed664e4f81a3..8fbe724045e6 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -233,7 +233,7 @@ public:
/// - Create a SplitEditor from a SplitAnalysis.
/// - Start a new live interval with openIntv.
/// - Mark the places where the new interval is entered using enterIntv*
-/// - Mark the ranges where the new interval is used with useIntv*
+/// - Mark the ranges where the new interval is used with useIntv*
/// - Mark the places where the interval is exited with exitIntv*.
/// - Finish the current interval with closeIntv and repeat from 2.
/// - Rewrite instructions with finish().
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 43f4bad595e3..7b1b76821daa 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -632,7 +632,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::BITREVERSE, VT, Expand);
-
+
// These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
@@ -924,7 +924,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
// STATEPOINT Deopt Spill - live-through, read only, indirect
// STATEPOINT Deopt Alloca - live-through, read only, direct
// (We're currently conservative and mark the deopt slots read/write in
- // practice.)
+ // practice.)
// STATEPOINT GC Spill - live-through, read/write, indirect
// STATEPOINT GC Alloca - live-through, read/write, direct
// The live-in vs live-through is handled already (the live through ones are
@@ -1337,7 +1337,8 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr,
+void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
+ AttributeList attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
const TargetLowering &TLI, const DataLayout &DL) {
SmallVector<EVT, 4> ValueVTs;
@@ -1365,9 +1366,9 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr,
}
unsigned NumParts =
- TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT);
+ TLI.getNumRegistersForCallingConv(ReturnType->getContext(), CC, VT);
MVT PartVT =
- TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT);
+ TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), CC, VT);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -1410,7 +1411,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
*Fast = true;
return true;
}
-
+
// This is a misaligned access.
return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index b5dd2d4cca89..f6b91a2f0231 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -422,32 +422,34 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
return ".data.rel.ro";
}
+static unsigned getEntrySizeForKind(SectionKind Kind) {
+ if (Kind.isMergeable1ByteCString())
+ return 1;
+ else if (Kind.isMergeable2ByteCString())
+ return 2;
+ else if (Kind.isMergeable4ByteCString())
+ return 4;
+ else if (Kind.isMergeableConst4())
+ return 4;
+ else if (Kind.isMergeableConst8())
+ return 8;
+ else if (Kind.isMergeableConst16())
+ return 16;
+ else if (Kind.isMergeableConst32())
+ return 32;
+ else {
+ // We shouldn't have mergeable C strings or mergeable constants that we
+ // didn't handle above.
+ assert(!Kind.isMergeableCString() && "unknown string width");
+ assert(!Kind.isMergeableConst() && "unknown data width");
+ return 0;
+ }
+}
+
static MCSectionELF *selectELFSectionForGlobal(
MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags,
unsigned *NextUniqueID, const MCSymbolELF *AssociatedSymbol) {
- unsigned EntrySize = 0;
- if (Kind.isMergeableCString()) {
- if (Kind.isMergeable2ByteCString()) {
- EntrySize = 2;
- } else if (Kind.isMergeable4ByteCString()) {
- EntrySize = 4;
- } else {
- EntrySize = 1;
- assert(Kind.isMergeable1ByteCString() && "unknown string width");
- }
- } else if (Kind.isMergeableConst()) {
- if (Kind.isMergeableConst4()) {
- EntrySize = 4;
- } else if (Kind.isMergeableConst8()) {
- EntrySize = 8;
- } else if (Kind.isMergeableConst16()) {
- EntrySize = 16;
- } else {
- assert(Kind.isMergeableConst32() && "unknown data width");
- EntrySize = 32;
- }
- }
StringRef Group = "";
if (const Comdat *C = getELFComdat(GO)) {
@@ -455,7 +457,9 @@ static MCSectionELF *selectELFSectionForGlobal(
Group = C->getName();
}
- bool UniqueSectionNames = TM.getUniqueSectionNames();
+ // Get the section entry size based on the kind.
+ unsigned EntrySize = getEntrySizeForKind(Kind);
+
SmallString<128> Name;
if (Kind.isMergeableCString()) {
// We also need alignment here.
@@ -479,16 +483,17 @@ static MCSectionELF *selectELFSectionForGlobal(
Name += *OptionalPrefix;
}
- if (EmitUniqueSection && UniqueSectionNames) {
- Name.push_back('.');
- TM.getNameWithPrefix(Name, GO, Mang, true);
- }
unsigned UniqueID = MCContext::GenericSectionID;
- if (EmitUniqueSection && !UniqueSectionNames) {
- UniqueID = *NextUniqueID;
- (*NextUniqueID)++;
+ if (EmitUniqueSection) {
+ if (TM.getUniqueSectionNames()) {
+ Name.push_back('.');
+ TM.getNameWithPrefix(Name, GO, Mang, true /*MayAlwaysUsePrivate*/);
+ } else {
+ UniqueID = *NextUniqueID;
+ (*NextUniqueID)++;
+ }
}
- // Use 0 as the unique ID for execute-only text
+ // Use 0 as the unique ID for execute-only text.
if (Kind.isExecuteOnly())
UniqueID = 0;
return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 3fca2f4ee4fe..2db03288f2ac 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -166,7 +166,7 @@ static cl::opt<CFLAAType> UseCFLAA(
"Enable unification-based CFL-AA"),
clEnumValN(CFLAAType::Andersen, "anders",
"Enable inclusion-based CFL-AA"),
- clEnumValN(CFLAAType::Both, "both",
+ clEnumValN(CFLAAType::Both, "both",
"Enable both variants of CFL-AA")));
/// Option names for limiting the codegen pipeline.
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index e629c13f133f..65d0a7a774fe 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -54,7 +54,7 @@ static cl::opt<bool> DemoteCatchSwitchPHIOnlyOpt(
cl::desc("Demote catchswitch BBs only (for wasm EH)"), cl::init(false));
namespace {
-
+
class WinEHPrepare : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
diff --git a/lib/DebugInfo/CodeView/RecordName.cpp b/lib/DebugInfo/CodeView/RecordName.cpp
index e50c43a1d481..d868ae237a44 100644
--- a/lib/DebugInfo/CodeView/RecordName.cpp
+++ b/lib/DebugInfo/CodeView/RecordName.cpp
@@ -307,6 +307,9 @@ static int getSymbolNameOffset(CVSymbol Sym) {
// See BPRelativeSym
case SymbolKind::S_BPREL32:
return 8;
+ // See UsingNamespaceSym
+ case SymbolKind::S_UNAMESPACE:
+ return 0;
default:
return -1;
}
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index af249adc9774..f8bf961f22a1 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -611,6 +611,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) {
return Error::success();
}
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+ UsingNamespaceSym &UN) {
+ W.printString("Namespace", UN.Name);
+ return Error::success();
+}
+
Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) {
W.printNumber("Length", CVR.length());
return Error::success();
diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
index 923837a45d9f..e77c8e8f02f5 100644
--- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -463,3 +463,11 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) {
return Error::success();
}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+ UsingNamespaceSym &UN) {
+
+ error(IO.mapStringZ(UN.Name));
+
+ return Error::success();
+}
diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
index 95082d4a8e03..839ab6f0a705 100644
--- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
+++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
@@ -428,7 +428,7 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
case SymbolKind::S_DEFRANGE_SUBFIELD:
break;
- // No type refernces.
+ // No type references.
case SymbolKind::S_LABEL32:
case SymbolKind::S_OBJNAME:
case SymbolKind::S_COMPILE:
@@ -439,6 +439,7 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
case SymbolKind::S_FRAMEPROC:
case SymbolKind::S_THUNK32:
case SymbolKind::S_FRAMECOOKIE:
+ case SymbolKind::S_UNAMESPACE:
break;
// Scope ending symbols.
case SymbolKind::S_END:
diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index e4f39dd988e1..2e29c9d7dfa0 100644
--- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -226,7 +226,10 @@ bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx,
if (IsSecondPass && MapPos >= Map.size()) {
// FIXME: Print a more useful error. We can give the current record and the
// index that we think its pointing to.
- LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
+ if (LastError)
+ LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
+ else
+ LastError = errorCorruptRecord();
}
++NumBadIndices;
diff --git a/lib/DebugInfo/DWARF/CMakeLists.txt b/lib/DebugInfo/DWARF/CMakeLists.txt
index d88a02721700..b4770e561f71 100644
--- a/lib/DebugInfo/DWARF/CMakeLists.txt
+++ b/lib/DebugInfo/DWARF/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMDebugInfoDWARF
DWARFContext.cpp
DWARFDataExtractor.cpp
DWARFDebugAbbrev.cpp
+ DWARFDebugAddr.cpp
DWARFDebugArangeSet.cpp
DWARFDebugAranges.cpp
DWARFDebugFrame.cpp
diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index adada672af00..f49ab40fad9a 100644
--- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -38,7 +38,7 @@ DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() {
}
bool
-DWARFAbbreviationDeclaration::extract(DataExtractor Data,
+DWARFAbbreviationDeclaration::extract(DataExtractor Data,
uint32_t* OffsetPtr) {
clear();
const uint32_t Offset = *OffsetPtr;
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index da13c5047f77..9d2554ff9e2e 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -17,6 +17,7 @@
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
@@ -249,6 +250,36 @@ static void dumpStringOffsetsSection(
}
}
+// Dump the .debug_addr section.
+static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData,
+ DIDumpOptions DumpOpts, uint16_t Version,
+ uint8_t AddrSize) {
+ // TODO: Make this more general: add callback types to Error.h, create
+ // implementation and make all DWARF classes use them.
+ static auto WarnCallback = [](Error Warn) {
+ handleAllErrors(std::move(Warn), [](ErrorInfoBase &Info) {
+ WithColor::warning() << Info.message() << '\n';
+ });
+ };
+ uint32_t Offset = 0;
+ while (AddrData.isValidOffset(Offset)) {
+ DWARFDebugAddrTable AddrTable;
+ uint32_t TableOffset = Offset;
+ if (Error Err = AddrTable.extract(AddrData, &Offset, Version,
+ AddrSize, WarnCallback)) {
+ WithColor::error() << toString(std::move(Err)) << '\n';
+ // Keep going after an error, if we can, assuming that the length field
+ // could be read. If it couldn't, stop reading the section.
+ if (!AddrTable.hasValidLength())
+ break;
+ uint64_t Length = AddrTable.getLength();
+ Offset = TableOffset + Length;
+ } else {
+ AddrTable.dump(OS, DumpOpts);
+ }
+ }
+}
+
// Dump the .debug_rnglists or .debug_rnglists.dwo section (DWARF v5).
static void dumpRnglistsSection(raw_ostream &OS,
DWARFDataExtractor &rnglistData,
@@ -455,18 +486,16 @@ void DWARFContext::dump(
}
}
+ if (shouldDump(Explicit, ".debug_addr", DIDT_ID_DebugAddr,
+ DObj->getAddrSection().Data)) {
+ DWARFDataExtractor AddrData(*DObj, DObj->getAddrSection(),
+ isLittleEndian(), 0);
+ dumpAddrSection(OS, AddrData, DumpOpts, getMaxVersion(), getCUAddrSize());
+ }
+
if (shouldDump(Explicit, ".debug_ranges", DIDT_ID_DebugRanges,
DObj->getRangeSection().Data)) {
- // In fact, different compile units may have different address byte
- // sizes, but for simplicity we just use the address byte size of the
- // last compile unit (there is no easy and fast way to associate address
- // range list and the compile unit it describes).
- // FIXME: savedAddressByteSize seems sketchy.
- uint8_t savedAddressByteSize = 0;
- for (const auto &CU : compile_units()) {
- savedAddressByteSize = CU->getAddressByteSize();
- break;
- }
+ uint8_t savedAddressByteSize = getCUAddrSize();
DWARFDataExtractor rangesData(*DObj, DObj->getRangeSection(),
isLittleEndian(), savedAddressByteSize);
uint32_t offset = 0;
@@ -474,7 +503,7 @@ void DWARFContext::dump(
while (rangesData.isValidOffset(offset)) {
if (Error E = rangeList.extract(rangesData, &offset)) {
WithColor::error() << toString(std::move(E)) << '\n';
- break;
+ break;
}
rangeList.dump(OS);
}
@@ -1584,3 +1613,17 @@ Error DWARFContext::loadRegisterInfo(const object::ObjectFile &Obj) {
RegInfo.reset(TheTarget->createMCRegInfo(TT.str()));
return Error::success();
}
+
+uint8_t DWARFContext::getCUAddrSize() {
+ // In theory, different compile units may have different address byte
+ // sizes, but for simplicity we just use the address byte size of the
+ // last compile unit. In practice the address size field is repeated across
+ // various DWARF headers (at least in version 5) to make it easier to dump
+ // them independently, not to enable varying the address size.
+ uint8_t Addr = 0;
+ for (const auto &CU : compile_units()) {
+ Addr = CU->getAddressByteSize();
+ break;
+ }
+ return Addr;
+}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
new file mode 100644
index 000000000000..7085ca067ba6
--- /dev/null
+++ b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
@@ -0,0 +1,198 @@
+//===- DWARFDebugAddr.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+
+using namespace llvm;
+
+void DWARFDebugAddrTable::clear() {
+ HeaderData = {};
+ Addrs.clear();
+ invalidateLength();
+}
+
+Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data,
+ uint32_t *OffsetPtr,
+ uint16_t Version,
+ uint8_t AddrSize,
+ std::function<void(Error)> WarnCallback) {
+ clear();
+ HeaderOffset = *OffsetPtr;
+ // Read and verify the length field.
+ if (!Data.isValidOffsetForDataOfSize(*OffsetPtr, sizeof(uint32_t)))
+ return createStringError(errc::invalid_argument,
+ "section is not large enough to contain a "
+ ".debug_addr table length at offset 0x%"
+ PRIx32, *OffsetPtr);
+ uint16_t UnitVersion;
+ if (Version == 0) {
+ WarnCallback(createStringError(errc::invalid_argument,
+ "DWARF version is not defined in CU,"
+ " assuming version 5"));
+ UnitVersion = 5;
+ } else {
+ UnitVersion = Version;
+ }
+ // TODO: Add support for DWARF64.
+ Format = dwarf::DwarfFormat::DWARF32;
+ if (UnitVersion >= 5) {
+ HeaderData.Length = Data.getU32(OffsetPtr);
+ if (HeaderData.Length == 0xffffffffu) {
+ invalidateLength();
+ return createStringError(errc::not_supported,
+ "DWARF64 is not supported in .debug_addr at offset 0x%" PRIx32,
+ HeaderOffset);
+ }
+ if (HeaderData.Length + sizeof(uint32_t) < sizeof(Header)) {
+ uint32_t TmpLength = getLength();
+ invalidateLength();
+ return createStringError(errc::invalid_argument,
+ ".debug_addr table at offset 0x%" PRIx32
+ " has too small length (0x%" PRIx32
+ ") to contain a complete header",
+ HeaderOffset, TmpLength);
+ }
+ uint32_t End = HeaderOffset + getLength();
+ if (!Data.isValidOffsetForDataOfSize(HeaderOffset, End - HeaderOffset)) {
+ uint32_t TmpLength = getLength();
+ invalidateLength();
+ return createStringError(errc::invalid_argument,
+ "section is not large enough to contain a .debug_addr table "
+ "of length 0x%" PRIx32 " at offset 0x%" PRIx32,
+ TmpLength, HeaderOffset);
+ }
+
+ HeaderData.Version = Data.getU16(OffsetPtr);
+ HeaderData.AddrSize = Data.getU8(OffsetPtr);
+ HeaderData.SegSize = Data.getU8(OffsetPtr);
+ DataSize = getDataSize();
+ } else {
+ HeaderData.Version = UnitVersion;
+ HeaderData.AddrSize = AddrSize;
+ // TODO: Support for non-zero SegSize.
+ HeaderData.SegSize = 0;
+ DataSize = Data.size();
+ }
+
+ // Perform basic validation of the remaining header fields.
+
+ // We support DWARF version 5 for now as well as pre-DWARF5
+ // implementations of .debug_addr table, which doesn't contain a header
+ // and consists only of a series of addresses.
+ if (HeaderData.Version > 5) {
+ return createStringError(errc::not_supported, "version %" PRIu16
+ " of .debug_addr section at offset 0x%" PRIx32 " is not supported",
+ HeaderData.Version, HeaderOffset);
+ }
+ // FIXME: For now we just treat version mismatch as an error,
+ // however the correct way to associate a .debug_addr table
+ // with a .debug_info table is to look at the DW_AT_addr_base
+ // attribute in the info table.
+ if (HeaderData.Version != UnitVersion)
+ return createStringError(errc::invalid_argument,
+ ".debug_addr table at offset 0x%" PRIx32
+ " has version %" PRIu16
+ " which is different from the version suggested"
+ " by the DWARF unit header: %" PRIu16,
+ HeaderOffset, HeaderData.Version, UnitVersion);
+ if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)
+ return createStringError(errc::not_supported,
+ ".debug_addr table at offset 0x%" PRIx32
+ " has unsupported address size %" PRIu8,
+ HeaderOffset, HeaderData.AddrSize);
+ if (HeaderData.AddrSize != AddrSize && AddrSize != 0)
+ return createStringError(errc::invalid_argument,
+ ".debug_addr table at offset 0x%" PRIx32
+ " has address size %" PRIu8
+ " which is different from CU address size %" PRIu8,
+ HeaderOffset, HeaderData.AddrSize, AddrSize);
+
+ // TODO: add support for non-zero segment selector size.
+ if (HeaderData.SegSize != 0)
+ return createStringError(errc::not_supported,
+ ".debug_addr table at offset 0x%" PRIx32
+ " has unsupported segment selector size %" PRIu8,
+ HeaderOffset, HeaderData.SegSize);
+ if (DataSize % HeaderData.AddrSize != 0) {
+ invalidateLength();
+ return createStringError(errc::invalid_argument,
+ ".debug_addr table at offset 0x%" PRIx32
+ " contains data of size %" PRIu32
+ " which is not a multiple of addr size %" PRIu8,
+ HeaderOffset, DataSize, HeaderData.AddrSize);
+ }
+ Data.setAddressSize(HeaderData.AddrSize);
+ uint32_t AddrCount = DataSize / HeaderData.AddrSize;
+ for (uint32_t I = 0; I < AddrCount; ++I)
+ if (HeaderData.AddrSize == 4)
+ Addrs.push_back(Data.getU32(OffsetPtr));
+ else
+ Addrs.push_back(Data.getU64(OffsetPtr));
+ return Error::success();
+}
+
+void DWARFDebugAddrTable::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
+ if (DumpOpts.Verbose)
+ OS << format("0x%8.8" PRIx32 ": ", HeaderOffset);
+ OS << format("Addr Section: length = 0x%8.8" PRIx32
+ ", version = 0x%4.4" PRIx16 ", "
+ "addr_size = 0x%2.2" PRIx8 ", seg_size = 0x%2.2" PRIx8 "\n",
+ HeaderData.Length, HeaderData.Version, HeaderData.AddrSize,
+ HeaderData.SegSize);
+
+ static const char *Fmt32 = "0x%8.8" PRIx32;
+ static const char *Fmt64 = "0x%16.16" PRIx64;
+ std::string AddrFmt = "\n";
+ std::string AddrFmtVerbose = " => ";
+ if (HeaderData.AddrSize == 4) {
+ AddrFmt.append(Fmt32);
+ AddrFmtVerbose.append(Fmt32);
+ }
+ else {
+ AddrFmt.append(Fmt64);
+ AddrFmtVerbose.append(Fmt64);
+ }
+
+ if (Addrs.size() > 0) {
+ OS << "Addrs: [";
+ for (uint64_t Addr : Addrs) {
+ OS << format(AddrFmt.c_str(), Addr);
+ if (DumpOpts.Verbose)
+ OS << format(AddrFmtVerbose.c_str(),
+ Addr + HeaderOffset + sizeof(HeaderData));
+ }
+ OS << "\n]\n";
+ }
+}
+
+Expected<uint64_t> DWARFDebugAddrTable::getAddrEntry(uint32_t Index) const {
+ if (Index < Addrs.size())
+ return Addrs[Index];
+ return createStringError(errc::invalid_argument,
+ "Index %" PRIu32 " is out of range of the "
+ ".debug_addr table at offset 0x%" PRIx32,
+ Index, HeaderOffset);
+}
+
+uint32_t DWARFDebugAddrTable::getLength() const {
+ if (HeaderData.Length == 0)
+ return 0;
+ // TODO: DWARF64 support.
+ return HeaderData.Length + sizeof(uint32_t);
+}
+
+uint32_t DWARFDebugAddrTable::getDataSize() const {
+ if (DataSize != 0)
+ return DataSize;
+ if (getLength() == 0)
+ return 0;
+ return getLength() - getHeaderSize();
+}
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 2a89faff9647..08be524ab464 100644
--- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -155,7 +155,7 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
// of the function's code, not the descriptor.
uint64_t OpdOffset = SymbolAddress - OpdAddress;
uint32_t OpdOffset32 = OpdOffset;
- if (OpdOffset == OpdOffset32 &&
+ if (OpdOffset == OpdOffset32 &&
OpdExtractor->isValidOffsetForAddress(OpdOffset32))
SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
}
diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp
index 5bfd2e6ff87e..72e4b56c05e3 100644
--- a/lib/Demangle/ItaniumDemangle.cpp
+++ b/lib/Demangle/ItaniumDemangle.cpp
@@ -450,6 +450,8 @@ class ReferenceType : public Node {
const Node *Pointee;
ReferenceKind RK;
+ mutable bool Printing = false;
+
// Dig through any refs to refs, collapsing the ReferenceTypes as we go. The
// rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any
// other combination collapses to a lvalue ref.
@@ -476,6 +478,9 @@ public:
}
void printLeft(OutputStream &s) const override {
+ if (Printing)
+ return;
+ SwapAndRestore<bool> SavePrinting(Printing, true);
std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
Collapsed.second->printLeft(s);
if (Collapsed.second->hasArray(s))
@@ -486,6 +491,9 @@ public:
s += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&");
}
void printRight(OutputStream &s) const override {
+ if (Printing)
+ return;
+ SwapAndRestore<bool> SavePrinting(Printing, true);
std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
s += ")";
diff --git a/lib/Demangle/MicrosoftDemangle.cpp b/lib/Demangle/MicrosoftDemangle.cpp
index 596359b7d990..3eac87d61011 100644
--- a/lib/Demangle/MicrosoftDemangle.cpp
+++ b/lib/Demangle/MicrosoftDemangle.cpp
@@ -29,15 +29,27 @@
// the demangler is 3x faster with this allocator compared to one with
// STL containers.
namespace {
+ constexpr size_t AllocUnit = 4096;
+
class ArenaAllocator {
struct AllocatorNode {
uint8_t *Buf = nullptr;
size_t Used = 0;
+ size_t Capacity = 0;
AllocatorNode *Next = nullptr;
};
+ void addNode(size_t Capacity) {
+ AllocatorNode *NewHead = new AllocatorNode;
+ NewHead->Buf = new uint8_t[Capacity];
+ NewHead->Next = Head;
+ NewHead->Capacity = Capacity;
+ Head = NewHead;
+ NewHead->Used = 0;
+ }
+
public:
- ArenaAllocator() : Head(new AllocatorNode) { Head->Buf = new uint8_t[Unit]; }
+ ArenaAllocator() { addNode(AllocUnit); }
~ArenaAllocator() {
while (Head) {
@@ -49,10 +61,25 @@ public:
}
}
+ char *allocUnalignedBuffer(size_t Length) {
+ uint8_t *Buf = Head->Buf + Head->Used;
+
+ Head->Used += Length;
+ if (Head->Used > Head->Capacity) {
+ // It's possible we need a buffer which is larger than our default unit
+ // size, so we need to be careful to add a node with capacity that is at
+ // least as large as what we need.
+ addNode(std::max(AllocUnit, Length));
+ Head->Used = Length;
+ Buf = Head->Buf;
+ }
+
+ return reinterpret_cast<char *>(Buf);
+ }
+
template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) {
size_t Size = sizeof(T);
- assert(Size < Unit);
assert(Head && Head->Buf);
size_t P = (size_t)Head->Buf + Head->Used;
@@ -62,20 +89,15 @@ public:
size_t Adjustment = AlignedP - P;
Head->Used += Size + Adjustment;
- if (Head->Used < Unit)
+ if (Head->Used < Head->Capacity)
return new (PP) T(std::forward<Args>(ConstructorArgs)...);
- AllocatorNode *NewHead = new AllocatorNode;
- NewHead->Buf = new uint8_t[ArenaAllocator::Unit];
- NewHead->Next = Head;
- Head = NewHead;
- NewHead->Used = Size;
- return new (NewHead->Buf) T(std::forward<Args>(ConstructorArgs)...);
+ addNode(AllocUnit);
+ Head->Used = Size;
+ return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...);
}
private:
- static constexpr size_t Unit = 4096;
-
AllocatorNode *Head = nullptr;
};
} // namespace
@@ -117,7 +139,7 @@ enum class StorageClass : uint8_t {
enum class QualifierMangleMode { Drop, Mangle, Result };
-enum class PointerAffinity { Pointer, Reference };
+enum class PointerAffinity { Pointer, Reference, RValueReference };
// Calling conventions
enum class CallingConv : uint8_t {
@@ -141,7 +163,6 @@ enum class PrimTy : uint8_t {
None,
Function,
Ptr,
- Ref,
MemberPtr,
Array,
@@ -155,6 +176,8 @@ enum class PrimTy : uint8_t {
Char,
Schar,
Uchar,
+ Char16,
+ Char32,
Short,
Ushort,
Int,
@@ -167,6 +190,7 @@ enum class PrimTy : uint8_t {
Float,
Double,
Ldouble,
+ Nullptr
};
// Function classes
@@ -183,15 +207,30 @@ enum FuncClass : uint8_t {
namespace {
struct Type;
+struct Name;
-// Represents a list of parameters (template params or function arguments.
-// It's represented as a linked list.
-struct ParamList {
+struct FunctionParams {
bool IsVariadic = false;
Type *Current = nullptr;
- ParamList *Next = nullptr;
+ FunctionParams *Next = nullptr;
+};
+
+struct TemplateParams {
+ bool IsTemplateTemplate = false;
+ bool IsAliasTemplate = false;
+
+ // Type can be null if this is a template template parameter. In that case
+ // only Name will be valid.
+ Type *ParamType = nullptr;
+
+ // Name can be valid if this is a template template parameter (see above) or
+ // this is a function declaration (e.g. foo<&SomeFunc>). In the latter case
+ // Name contains the name of the function and Type contains the signature.
+ Name *ParamName = nullptr;
+
+ TemplateParams *Next = nullptr;
};
// The type class. Mangled symbols are first parsed and converted to
@@ -232,7 +271,7 @@ struct Name {
StringView Operator;
// Template parameters. Null if not a template.
- ParamList TemplateParams;
+ TemplateParams *TParams = nullptr;
// Nested BackReferences (e.g. "A::B::C") are represented as a linked list.
Name *Next = nullptr;
@@ -243,6 +282,8 @@ struct PointerType : public Type {
void outputPre(OutputStream &OS) override;
void outputPost(OutputStream &OS) override;
+ PointerAffinity Affinity;
+
// Represents a type X in "a pointer to X", "a reference to X",
// "an array of X", or "a function returning X".
Type *Pointee = nullptr;
@@ -276,7 +317,7 @@ struct FunctionType : public Type {
CallingConv CallConvention;
FuncClass FunctionClass;
- ParamList Params;
+ FunctionParams Params;
};
struct UdtType : public Type {
@@ -302,9 +343,13 @@ struct ArrayType : public Type {
static bool isMemberPointer(StringView MangledName) {
switch (MangledName.popFront()) {
+ case '$':
+ // This is probably an rvalue reference (e.g. $$Q), and you cannot have an
+ // rvalue reference to a member.
+ return false;
case 'A':
// 'A' indicates a reference, and you cannot have a reference to a member
- // function or member variable.
+ // function or member.
return false;
case 'P':
case 'Q':
@@ -386,14 +431,58 @@ static void outputCallingConvention(OutputStream &OS, CallingConv CC) {
}
}
+static bool startsWithLocalScopePattern(StringView S) {
+ if (!S.consumeFront('?'))
+ return false;
+ if (S.size() < 2)
+ return false;
+
+ size_t End = S.find('?');
+ if (End == StringView::npos)
+ return false;
+ StringView Candidate = S.substr(0, End);
+ if (Candidate.empty())
+ return false;
+
+ // \?[0-9]\?
+ // ?@? is the discriminator 0.
+ if (Candidate.size() == 1)
+ return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9');
+
+ // If it's not 0-9, then it's an encoded number terminated with an @
+ if (Candidate.back() != '@')
+ return false;
+ Candidate = Candidate.dropBack();
+
+ // An encoded number starts with B-P and all subsequent digits are in A-P.
+ // Note that the reason the first digit cannot be A is two fold. First, it
+ // would create an ambiguity with ?A which delimits the beginning of an
+ // anonymous namespace. Second, A represents 0, and you don't start a multi
+ // digit number with a leading 0. Presumably the anonymous namespace
+ // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J.
+ if (Candidate[0] < 'B' || Candidate[0] > 'P')
+ return false;
+ Candidate = Candidate.dropFront();
+ while (!Candidate.empty()) {
+ if (Candidate[0] < 'A' || Candidate[0] > 'P')
+ return false;
+ Candidate = Candidate.dropFront();
+ }
+
+ return true;
+}
+
+static void outputName(OutputStream &OS, const Name *TheName);
+
// Write a function or template parameter list.
-static void outputParameterList(OutputStream &OS, const ParamList &Params) {
+static void outputParameterList(OutputStream &OS,
+ const FunctionParams &Params) {
if (!Params.Current) {
OS << "void";
return;
}
- const ParamList *Head = &Params;
+ const FunctionParams *Head = &Params;
while (Head) {
Type::outputPre(OS, *Head->Current);
Type::outputPost(OS, *Head->Current);
@@ -405,12 +494,39 @@ static void outputParameterList(OutputStream &OS, const ParamList &Params) {
}
}
-static void outputTemplateParams(OutputStream &OS, const Name &TheName) {
- if (!TheName.TemplateParams.Current)
+static void outputParameterList(OutputStream &OS,
+ const TemplateParams &Params) {
+ if (!Params.ParamType && !Params.ParamName) {
+ OS << "<>";
return;
+ }
OS << "<";
- outputParameterList(OS, TheName.TemplateParams);
+ const TemplateParams *Head = &Params;
+ while (Head) {
+ // Type can be null if this is a template template parameter,
+ // and Name can be null if this is a simple type.
+
+ if (Head->ParamType && Head->ParamName) {
+ // Function pointer.
+ OS << "&";
+ Type::outputPre(OS, *Head->ParamType);
+ outputName(OS, Head->ParamName);
+ Type::outputPost(OS, *Head->ParamType);
+ } else if (Head->ParamType) {
+ // simple type.
+ Type::outputPre(OS, *Head->ParamType);
+ Type::outputPost(OS, *Head->ParamType);
+ } else {
+ // Template alias.
+ outputName(OS, Head->ParamName);
+ }
+
+ Head = Head->Next;
+
+ if (Head)
+ OS << ", ";
+ }
OS << ">";
}
@@ -420,29 +536,32 @@ static void outputName(OutputStream &OS, const Name *TheName) {
outputSpaceIfNecessary(OS);
+ const Name *Previous = nullptr;
// Print out namespaces or outer class BackReferences.
for (; TheName->Next; TheName = TheName->Next) {
+ Previous = TheName;
OS << TheName->Str;
- outputTemplateParams(OS, *TheName);
+ if (TheName->TParams)
+ outputParameterList(OS, *TheName->TParams);
OS << "::";
}
// Print out a regular name.
if (TheName->Operator.empty()) {
OS << TheName->Str;
- outputTemplateParams(OS, *TheName);
+ if (TheName->TParams)
+ outputParameterList(OS, *TheName->TParams);
return;
}
// Print out ctor or dtor.
+ if (TheName->Operator == "dtor")
+ OS << "~";
+
if (TheName->Operator == "ctor" || TheName->Operator == "dtor") {
- OS << TheName->Str;
- outputTemplateParams(OS, *TheName);
- OS << "::";
- if (TheName->Operator == "dtor")
- OS << "~";
- OS << TheName->Str;
- outputTemplateParams(OS, *TheName);
+ OS << Previous->Str;
+ if (Previous->TParams)
+ outputParameterList(OS, *Previous->TParams);
return;
}
@@ -514,6 +633,12 @@ void Type::outputPre(OutputStream &OS) {
case PrimTy::Uchar:
OS << "unsigned char";
break;
+ case PrimTy::Char16:
+ OS << "char16_t";
+ break;
+ case PrimTy::Char32:
+ OS << "char32_t";
+ break;
case PrimTy::Short:
OS << "short";
break;
@@ -550,6 +675,9 @@ void Type::outputPre(OutputStream &OS) {
case PrimTy::Ldouble:
OS << "long double";
break;
+ case PrimTy::Nullptr:
+ OS << "std::nullptr_t";
+ break;
default:
assert(false && "Invalid primitive type!");
}
@@ -584,8 +712,10 @@ static void outputPointerIndicator(OutputStream &OS, PointerAffinity Affinity,
if (Affinity == PointerAffinity::Pointer)
OS << "*";
- else
+ else if (Affinity == PointerAffinity::Reference)
OS << "&";
+ else
+ OS << "&&";
}
void PointerType::outputPre(OutputStream &OS) {
@@ -596,9 +726,6 @@ void PointerType::outputPre(OutputStream &OS) {
if (Quals & Q_Unaligned)
OS << "__unaligned ";
- PointerAffinity Affinity = (Prim == PrimTy::Ptr) ? PointerAffinity::Pointer
- : PointerAffinity::Reference;
-
outputPointerIndicator(OS, Affinity, nullptr, Pointee);
// FIXME: We should output this, but it requires updating lots of tests.
@@ -668,6 +795,15 @@ void FunctionType::outputPost(OutputStream &OS) {
OS << " const";
if (Quals & Q_Volatile)
OS << " volatile";
+ if (Quals & Q_Restrict)
+ OS << " __restrict";
+ if (Quals & Q_Unaligned)
+ OS << " __unaligned";
+
+ if (RefKind == ReferenceKind::LValueRef)
+ OS << " &";
+ else if (RefKind == ReferenceKind::RValueRef)
+ OS << " &&";
if (ReturnType)
Type::outputPost(OS, *ReturnType);
@@ -716,6 +852,11 @@ void ArrayType::outputPost(OutputStream &OS) {
Type::outputPost(OS, *ElementType);
}
+struct Symbol {
+ Name *SymbolName = nullptr;
+ Type *SymbolType = nullptr;
+};
+
} // namespace
namespace {
@@ -725,63 +866,68 @@ namespace {
// It also has a set of functions to cnovert Type instances to strings.
class Demangler {
public:
- Demangler(OutputStream &OS, StringView s) : OS(OS), MangledName(s) {}
+ Demangler() = default;
// You are supposed to call parse() first and then check if error is true. If
// it is false, call output() to write the formatted name to the given stream.
- void parse();
- void output();
+ Symbol *parse(StringView &MangledName);
+ void output(const Symbol *S, OutputStream &OS);
// True if an error occurred.
bool Error = false;
private:
- Type *demangleVariableEncoding();
- Type *demangleFunctionEncoding();
+ Type *demangleVariableEncoding(StringView &MangledName);
+ Type *demangleFunctionEncoding(StringView &MangledName);
- Qualifiers demanglePointerExtQualifiers();
+ Qualifiers demanglePointerExtQualifiers(StringView &MangledName);
// Parser functions. This is a recursive-descent parser.
- Type *demangleType(QualifierMangleMode QMM);
- Type *demangleBasicType();
- UdtType *demangleClassType();
- PointerType *demanglePointerType();
- MemberPointerType *demangleMemberPointerType();
- FunctionType *demangleFunctionType(bool HasThisQuals, bool IsFunctionPointer);
+ Type *demangleType(StringView &MangledName, QualifierMangleMode QMM);
+ Type *demangleBasicType(StringView &MangledName);
+ UdtType *demangleClassType(StringView &MangledName);
+ PointerType *demanglePointerType(StringView &MangledName);
+ MemberPointerType *demangleMemberPointerType(StringView &MangledName);
+ FunctionType *demangleFunctionType(StringView &MangledName, bool HasThisQuals,
+ bool IsFunctionPointer);
- ArrayType *demangleArrayType();
+ ArrayType *demangleArrayType(StringView &MangledName);
- ParamList demangleTemplateParameterList();
- ParamList demangleFunctionParameterList();
+ TemplateParams *demangleTemplateParameterList(StringView &MangledName);
+ FunctionParams demangleFunctionParameterList(StringView &MangledName);
- int demangleNumber();
- void demangleNamePiece(Name &Node, bool IsHead);
+ int demangleNumber(StringView &MangledName);
- StringView demangleString(bool memorize);
void memorizeString(StringView s);
- Name *demangleName();
- void demangleOperator(Name *);
- StringView demangleOperatorName();
- FuncClass demangleFunctionClass();
- CallingConv demangleCallingConvention();
- StorageClass demangleVariableStorageClass();
- ReferenceKind demangleReferenceKind();
- void demangleThrowSpecification();
- std::pair<Qualifiers, bool> demangleQualifiers();
+ /// Allocate a copy of \p Borrowed into memory that we own.
+ StringView copyString(StringView Borrowed);
- // The result is written to this stream.
- OutputStream OS;
+ Name *demangleFullyQualifiedTypeName(StringView &MangledName);
+ Name *demangleFullyQualifiedSymbolName(StringView &MangledName);
- // Mangled symbol. demangle* functions shorten this string
- // as they parse it.
- StringView MangledName;
+ Name *demangleUnqualifiedTypeName(StringView &MangledName);
+ Name *demangleUnqualifiedSymbolName(StringView &MangledName);
- // A parsed mangled symbol.
- Type *SymbolType = nullptr;
+ Name *demangleNameScopeChain(StringView &MangledName, Name *UnqualifiedName);
+ Name *demangleNameScopePiece(StringView &MangledName);
- // The main symbol name. (e.g. "ns::foo" in "int ns::foo()".)
- Name *SymbolName = nullptr;
+ Name *demangleBackRefName(StringView &MangledName);
+ Name *demangleClassTemplateName(StringView &MangledName);
+ Name *demangleOperatorName(StringView &MangledName);
+ Name *demangleSimpleName(StringView &MangledName, bool Memorize);
+ Name *demangleAnonymousNamespaceName(StringView &MangledName);
+ Name *demangleLocallyScopedNamePiece(StringView &MangledName);
+
+ StringView demangleSimpleString(StringView &MangledName, bool Memorize);
+
+ FuncClass demangleFunctionClass(StringView &MangledName);
+ CallingConv demangleCallingConvention(StringView &MangledName);
+ StorageClass demangleVariableStorageClass(StringView &MangledName);
+ ReferenceKind demangleReferenceKind(StringView &MangledName);
+ void demangleThrowSpecification(StringView &MangledName);
+
+ std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName);
// Memory allocator.
ArenaAllocator Arena;
@@ -809,28 +955,36 @@ private:
};
} // namespace
+StringView Demangler::copyString(StringView Borrowed) {
+ char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1);
+ std::strcpy(Stable, Borrowed.begin());
+
+ return {Stable, Borrowed.size()};
+}
+
// Parser entry point.
-void Demangler::parse() {
+Symbol *Demangler::parse(StringView &MangledName) {
+ Symbol *S = Arena.alloc<Symbol>();
+
// MSVC-style mangled symbols must start with '?'.
if (!MangledName.consumeFront("?")) {
- SymbolName = Arena.alloc<Name>();
- SymbolName->Str = MangledName;
- SymbolType = Arena.alloc<Type>();
- SymbolType->Prim = PrimTy::Unknown;
+ S->SymbolName = Arena.alloc<Name>();
+ S->SymbolName->Str = MangledName;
+ S->SymbolType = Arena.alloc<Type>();
+ S->SymbolType->Prim = PrimTy::Unknown;
+ return S;
}
// What follows is a main symbol name. This may include
// namespaces or class BackReferences.
- SymbolName = demangleName();
+ S->SymbolName = demangleFullyQualifiedSymbolName(MangledName);
// Read a variable.
- if (startsWithDigit(MangledName)) {
- SymbolType = demangleVariableEncoding();
- return;
- }
+ S->SymbolType = startsWithDigit(MangledName)
+ ? demangleVariableEncoding(MangledName)
+ : demangleFunctionEncoding(MangledName);
- // Read a function.
- SymbolType = demangleFunctionEncoding();
+ return S;
}
// <type-encoding> ::= <storage-class> <variable-type>
@@ -840,10 +994,10 @@ void Demangler::parse() {
// ::= 3 # global
// ::= 4 # static local
-Type *Demangler::demangleVariableEncoding() {
- StorageClass SC = demangleVariableStorageClass();
+Type *Demangler::demangleVariableEncoding(StringView &MangledName) {
+ StorageClass SC = demangleVariableStorageClass(MangledName);
- Type *Ty = demangleType(QualifierMangleMode::Drop);
+ Type *Ty = demangleType(MangledName, QualifierMangleMode::Drop);
Ty->Storage = SC;
@@ -851,17 +1005,17 @@ Type *Demangler::demangleVariableEncoding() {
// ::= <type> <pointee-cvr-qualifiers> # pointers, references
switch (Ty->Prim) {
case PrimTy::Ptr:
- case PrimTy::Ref:
case PrimTy::MemberPtr: {
Qualifiers ExtraChildQuals = Q_None;
- Ty->Quals = Qualifiers(Ty->Quals | demanglePointerExtQualifiers());
+ Ty->Quals =
+ Qualifiers(Ty->Quals | demanglePointerExtQualifiers(MangledName));
bool IsMember = false;
- std::tie(ExtraChildQuals, IsMember) = demangleQualifiers();
+ std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
if (Ty->Prim == PrimTy::MemberPtr) {
assert(IsMember);
- Name *BackRefName = demangleName();
+ Name *BackRefName = demangleFullyQualifiedTypeName(MangledName);
(void)BackRefName;
MemberPointerType *MPTy = static_cast<MemberPointerType *>(Ty);
MPTy->Pointee->Quals = Qualifiers(MPTy->Pointee->Quals | ExtraChildQuals);
@@ -873,7 +1027,7 @@ Type *Demangler::demangleVariableEncoding() {
break;
}
default:
- Ty->Quals = demangleQualifiers().first;
+ Ty->Quals = demangleQualifiers(MangledName).first;
break;
}
@@ -891,7 +1045,7 @@ Type *Demangler::demangleVariableEncoding() {
// ::= <hex digit>+ @ # when Numbrer == 0 or >= 10
//
// <hex-digit> ::= [A-P] # A = 0, B = 1, ...
-int Demangler::demangleNumber() {
+int Demangler::demangleNumber(StringView &MangledName) {
bool neg = MangledName.consumeFront("?");
if (startsWithDigit(MangledName)) {
@@ -918,23 +1072,6 @@ int Demangler::demangleNumber() {
return 0;
}
-// Read until the next '@'.
-StringView Demangler::demangleString(bool Memorize) {
- for (size_t i = 0; i < MangledName.size(); ++i) {
- if (MangledName[i] != '@')
- continue;
- StringView ret = MangledName.substr(0, i);
- MangledName = MangledName.dropFront(i + 1);
-
- if (Memorize)
- memorizeString(ret);
- return ret;
- }
-
- Error = true;
- return "";
-}
-
// First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
// Memorize it.
void Demangler::memorizeString(StringView S) {
@@ -946,179 +1083,322 @@ void Demangler::memorizeString(StringView S) {
BackReferences[BackRefCount++] = S;
}
-void Demangler::demangleNamePiece(Name &Node, bool IsHead) {
- if (startsWithDigit(MangledName)) {
- size_t I = MangledName[0] - '0';
- if (I >= BackRefCount) {
- Error = true;
- return;
- }
- MangledName = MangledName.dropFront();
- Node.Str = BackReferences[I];
- } else if (MangledName.consumeFront("?$")) {
- // Class template.
- Node.Str = demangleString(false);
- Node.TemplateParams = demangleTemplateParameterList();
- } else if (!IsHead && MangledName.consumeFront("?A")) {
- // Anonymous namespace starts with ?A. So does overloaded operator[],
- // but the distinguishing factor is that namespace themselves are not
- // mangled, only the variables and functions inside of them are. So
- // an anonymous namespace will never occur as the first item in the
- // name.
- Node.Str = "`anonymous namespace'";
- if (!MangledName.consumeFront('@')) {
- Error = true;
- return;
- }
- } else if (MangledName.consumeFront("?")) {
- // Overloaded operator.
- demangleOperator(&Node);
- } else {
- // Non-template functions or classes.
- Node.Str = demangleString(true);
+Name *Demangler::demangleBackRefName(StringView &MangledName) {
+ assert(startsWithDigit(MangledName));
+
+ size_t I = MangledName[0] - '0';
+ if (I >= BackRefCount) {
+ Error = true;
+ return nullptr;
}
-}
-// Parses a name in the form of A@B@C@@ which represents C::B::A.
-Name *Demangler::demangleName() {
- Name *Head = nullptr;
+ MangledName = MangledName.dropFront();
+ Name *Node = Arena.alloc<Name>();
+ Node->Str = BackReferences[I];
+ return Node;
+}
- while (!MangledName.consumeFront("@")) {
- Name *Elem = Arena.alloc<Name>();
+Name *Demangler::demangleClassTemplateName(StringView &MangledName) {
+ assert(MangledName.startsWith("?$"));
+ MangledName.consumeFront("?$");
- assert(!Error);
- demangleNamePiece(*Elem, Head == nullptr);
- if (Error)
- return nullptr;
+ Name *Node = demangleSimpleName(MangledName, false);
+ Node->TParams = demangleTemplateParameterList(MangledName);
- Elem->Next = Head;
- Head = Elem;
- if (MangledName.empty()) {
- Error = true;
- return nullptr;
- }
- }
+ // Render this class template name into a string buffer so that we can
+ // memorize it for the purpose of back-referencing.
+ OutputStream OS = OutputStream::create(nullptr, nullptr, 1024);
+ outputName(OS, Node);
+ OS << '\0';
+ char *Name = OS.getBuffer();
- return Head;
-}
+ StringView Owned = copyString(Name);
+ memorizeString(Owned);
+ std::free(Name);
-void Demangler::demangleOperator(Name *OpName) {
- OpName->Operator = demangleOperatorName();
- if (!Error && !MangledName.empty() && MangledName.front() != '@')
- demangleNamePiece(*OpName, false);
+ return Node;
}
-StringView Demangler::demangleOperatorName() {
- SwapAndRestore<StringView> RestoreOnError(MangledName, MangledName);
- RestoreOnError.shouldRestore(false);
-
- switch (MangledName.popFront()) {
- case '0':
- return "ctor";
- case '1':
- return "dtor";
- case '2':
- return " new";
- case '3':
- return " delete";
- case '4':
- return "=";
- case '5':
- return ">>";
- case '6':
- return "<<";
- case '7':
- return "!";
- case '8':
- return "==";
- case '9':
- return "!=";
- case 'A':
- return "[]";
- case 'C':
- return "->";
- case 'D':
- return "*";
- case 'E':
- return "++";
- case 'F':
- return "--";
- case 'G':
- return "-";
- case 'H':
- return "+";
- case 'I':
- return "&";
- case 'J':
- return "->*";
- case 'K':
- return "/";
- case 'L':
- return "%";
- case 'M':
- return "<";
- case 'N':
- return "<=";
- case 'O':
- return ">";
- case 'P':
- return ">=";
- case 'Q':
- return ",";
- case 'R':
- return "()";
- case 'S':
- return "~";
- case 'T':
- return "^";
- case 'U':
- return "|";
- case 'V':
- return "&&";
- case 'W':
- return "||";
- case 'X':
- return "*=";
- case 'Y':
- return "+=";
- case 'Z':
- return "-=";
- case '_': {
- if (MangledName.empty())
- break;
+Name *Demangler::demangleOperatorName(StringView &MangledName) {
+ assert(MangledName.startsWith('?'));
+ MangledName.consumeFront('?');
+ auto NameString = [this, &MangledName]() -> StringView {
switch (MangledName.popFront()) {
case '0':
- return "/=";
+ return "ctor";
case '1':
- return "%=";
+ return "dtor";
case '2':
- return ">>=";
+ return " new";
case '3':
- return "<<=";
+ return " delete";
case '4':
- return "&=";
+ return "=";
case '5':
- return "|=";
+ return ">>";
case '6':
- return "^=";
+ return "<<";
+ case '7':
+ return "!";
+ case '8':
+ return "==";
+ case '9':
+ return "!=";
+ case 'A':
+ return "[]";
+ case 'C':
+ return "->";
+ case 'D':
+ return "*";
+ case 'E':
+ return "++";
+ case 'F':
+ return "--";
+ case 'G':
+ return "-";
+ case 'H':
+ return "+";
+ case 'I':
+ return "&";
+ case 'J':
+ return "->*";
+ case 'K':
+ return "/";
+ case 'L':
+ return "%";
+ case 'M':
+ return "<";
+ case 'N':
+ return "<=";
+ case 'O':
+ return ">";
+ case 'P':
+ return ">=";
+ case 'Q':
+ return ",";
+ case 'R':
+ return "()";
+ case 'S':
+ return "~";
+ case 'T':
+ return "^";
case 'U':
- return " new[]";
+ return "|";
case 'V':
- return " delete[]";
- case '_':
- if (MangledName.consumeFront("L"))
- return " co_await";
+ return "&&";
+ case 'W':
+ return "||";
+ case 'X':
+ return "*=";
+ case 'Y':
+ return "+=";
+ case 'Z':
+ return "-=";
+ case '_': {
+ if (MangledName.empty())
+ break;
+
+ switch (MangledName.popFront()) {
+ case '0':
+ return "/=";
+ case '1':
+ return "%=";
+ case '2':
+ return ">>=";
+ case '3':
+ return "<<=";
+ case '4':
+ return "&=";
+ case '5':
+ return "|=";
+ case '6':
+ return "^=";
+ case 'U':
+ return " new[]";
+ case 'V':
+ return " delete[]";
+ case '_':
+ if (MangledName.consumeFront("L"))
+ return " co_await";
+ if (MangledName.consumeFront("K")) {
+ size_t EndPos = MangledName.find('@');
+ if (EndPos == StringView::npos)
+ break;
+ StringView OpName = demangleSimpleString(MangledName, false);
+ size_t FullSize = OpName.size() + 3; // <space>""OpName
+ char *Buffer = Arena.allocUnalignedBuffer(FullSize);
+ Buffer[0] = ' ';
+ Buffer[1] = '"';
+ Buffer[2] = '"';
+ std::memcpy(Buffer + 3, OpName.begin(), OpName.size());
+ return {Buffer, FullSize};
+ }
+ }
}
- }
+ }
+ Error = true;
+ return "";
+ };
+
+ Name *Node = Arena.alloc<Name>();
+ Node->Operator = NameString();
+ return Node;
+}
+
+Name *Demangler::demangleSimpleName(StringView &MangledName, bool Memorize) {
+ StringView S = demangleSimpleString(MangledName, Memorize);
+ if (Error)
+ return nullptr;
+
+ Name *Node = Arena.alloc<Name>();
+ Node->Str = S;
+ return Node;
+}
+
+StringView Demangler::demangleSimpleString(StringView &MangledName,
+ bool Memorize) {
+ StringView S;
+ for (size_t i = 0; i < MangledName.size(); ++i) {
+ if (MangledName[i] != '@')
+ continue;
+ S = MangledName.substr(0, i);
+ MangledName = MangledName.dropFront(i + 1);
+
+ if (Memorize)
+ memorizeString(S);
+ return S;
}
Error = true;
- RestoreOnError.shouldRestore(true);
- return "";
+ return {};
+}
+
+Name *Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
+ assert(MangledName.startsWith("?A"));
+ MangledName.consumeFront("?A");
+
+ Name *Node = Arena.alloc<Name>();
+ Node->Str = "`anonymous namespace'";
+ if (MangledName.consumeFront('@'))
+ return Node;
+
+ Error = true;
+ return nullptr;
+}
+
+Name *Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
+ assert(startsWithLocalScopePattern(MangledName));
+
+ Name *Node = Arena.alloc<Name>();
+ MangledName.consumeFront('?');
+ int ScopeIdentifier = demangleNumber(MangledName);
+
+ // One ? to terminate the number
+ MangledName.consumeFront('?');
+
+ assert(!Error);
+ Symbol *Scope = parse(MangledName);
+ if (Error)
+ return nullptr;
+
+ // Render the parent symbol's name into a buffer.
+ OutputStream OS = OutputStream::create(nullptr, nullptr, 1024);
+ OS << '`';
+ output(Scope, OS);
+ OS << '\'';
+ OS << "::`" << ScopeIdentifier << "'";
+ OS << '\0';
+ char *Result = OS.getBuffer();
+ Node->Str = copyString(Result);
+ std::free(Result);
+ return Node;
+}
+
+// Parses a type name in the form of A@B@C@@ which represents C::B::A.
+Name *Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
+ Name *TypeName = demangleUnqualifiedTypeName(MangledName);
+ assert(TypeName);
+
+ Name *QualName = demangleNameScopeChain(MangledName, TypeName);
+ assert(QualName);
+ return QualName;
+}
+
+// Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
+// Symbol names have slightly different rules regarding what can appear
+// so we separate out the implementations for flexibility.
+Name *Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
+ Name *SymbolName = demangleUnqualifiedSymbolName(MangledName);
+ assert(SymbolName);
+
+ Name *QualName = demangleNameScopeChain(MangledName, SymbolName);
+ assert(QualName);
+ return QualName;
+}
+
+Name *Demangler::demangleUnqualifiedTypeName(StringView &MangledName) {
+ // An inner-most name can be a back-reference, because a fully-qualified name
+ // (e.g. Scope + Inner) can contain other fully qualified names inside of
+ // them (for example template parameters), and these nested parameters can
+ // refer to previously mangled types.
+ if (startsWithDigit(MangledName))
+ return demangleBackRefName(MangledName);
+
+ if (MangledName.startsWith("?$"))
+ return demangleClassTemplateName(MangledName);
+
+ return demangleSimpleName(MangledName, true);
+}
+
+Name *Demangler::demangleUnqualifiedSymbolName(StringView &MangledName) {
+ if (startsWithDigit(MangledName))
+ return demangleBackRefName(MangledName);
+ if (MangledName.startsWith("?$"))
+ return demangleClassTemplateName(MangledName);
+ if (MangledName.startsWith('?'))
+ return demangleOperatorName(MangledName);
+ return demangleSimpleName(MangledName, true);
+}
+
+Name *Demangler::demangleNameScopePiece(StringView &MangledName) {
+ if (startsWithDigit(MangledName))
+ return demangleBackRefName(MangledName);
+
+ if (MangledName.startsWith("?$"))
+ return demangleClassTemplateName(MangledName);
+
+ if (MangledName.startsWith("?A"))
+ return demangleAnonymousNamespaceName(MangledName);
+
+ if (startsWithLocalScopePattern(MangledName))
+ return demangleLocallyScopedNamePiece(MangledName);
+
+ return demangleSimpleName(MangledName, true);
+}
+
+Name *Demangler::demangleNameScopeChain(StringView &MangledName,
+ Name *UnqualifiedName) {
+ Name *Head = UnqualifiedName;
+
+ while (!MangledName.consumeFront("@")) {
+ if (MangledName.empty()) {
+ Error = true;
+ return nullptr;
+ }
+
+ assert(!Error);
+ Name *Elem = demangleNameScopePiece(MangledName);
+ if (Error)
+ return nullptr;
+
+ Elem->Next = Head;
+ Head = Elem;
+ }
+ return Head;
}
-FuncClass Demangler::demangleFunctionClass() {
+FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
SwapAndRestore<StringView> RestoreOnError(MangledName, MangledName);
RestoreOnError.shouldRestore(false);
@@ -1170,7 +1450,7 @@ FuncClass Demangler::demangleFunctionClass() {
return Public;
}
-CallingConv Demangler::demangleCallingConvention() {
+CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
switch (MangledName.popFront()) {
case 'A':
case 'B':
@@ -1200,7 +1480,7 @@ CallingConv Demangler::demangleCallingConvention() {
return CallingConv::None;
}
-StorageClass Demangler::demangleVariableStorageClass() {
+StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
assert(std::isdigit(MangledName.front()));
switch (MangledName.popFront()) {
@@ -1219,7 +1499,8 @@ StorageClass Demangler::demangleVariableStorageClass() {
return StorageClass::None;
}
-std::pair<Qualifiers, bool> Demangler::demangleQualifiers() {
+std::pair<Qualifiers, bool>
+Demangler::demangleQualifiers(StringView &MangledName) {
switch (MangledName.popFront()) {
// Member qualifiers
@@ -1245,54 +1526,88 @@ std::pair<Qualifiers, bool> Demangler::demangleQualifiers() {
return std::make_pair(Q_None, false);
}
+static bool isTagType(StringView S) {
+ switch (S.front()) {
+ case 'T': // union
+ case 'U': // struct
+ case 'V': // class
+ case 'W': // enum
+ return true;
+ }
+ return false;
+}
+
+static bool isPointerType(StringView S) {
+ if (S.startsWith("$$Q")) // foo &&
+ return true;
+
+ switch (S.front()) {
+ case 'A': // foo &
+ case 'P': // foo *
+ case 'Q': // foo *const
+ case 'R': // foo *volatile
+ case 'S': // foo *const volatile
+ return true;
+ }
+ return false;
+}
+
+static bool isArrayType(StringView S) { return S[0] == 'Y'; }
+
+static bool isFunctionType(StringView S) {
+ return S.startsWith("$$A8@@") || S.startsWith("$$A6");
+}
+
// <variable-type> ::= <type> <cvr-qualifiers>
// ::= <type> <pointee-cvr-qualifiers> # pointers, references
-Type *Demangler::demangleType(QualifierMangleMode QMM) {
+Type *Demangler::demangleType(StringView &MangledName,
+ QualifierMangleMode QMM) {
Qualifiers Quals = Q_None;
bool IsMember = false;
bool IsMemberKnown = false;
if (QMM == QualifierMangleMode::Mangle) {
- std::tie(Quals, IsMember) = demangleQualifiers();
+ std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
IsMemberKnown = true;
} else if (QMM == QualifierMangleMode::Result) {
if (MangledName.consumeFront('?')) {
- std::tie(Quals, IsMember) = demangleQualifiers();
+ std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
IsMemberKnown = true;
}
}
Type *Ty = nullptr;
- switch (MangledName.front()) {
- case 'T': // union
- case 'U': // struct
- case 'V': // class
- case 'W': // enum
- Ty = demangleClassType();
- break;
- case 'A': // foo &
- case 'P': // foo *
- case 'Q': // foo *const
- case 'R': // foo *volatile
- case 'S': // foo *const volatile
+ if (isTagType(MangledName))
+ Ty = demangleClassType(MangledName);
+ else if (isPointerType(MangledName)) {
if (!IsMemberKnown)
IsMember = isMemberPointer(MangledName);
+
if (IsMember)
- Ty = demangleMemberPointerType();
+ Ty = demangleMemberPointerType(MangledName);
else
- Ty = demanglePointerType();
- break;
- case 'Y':
- Ty = demangleArrayType();
- break;
- default:
- Ty = demangleBasicType();
- break;
+ Ty = demanglePointerType(MangledName);
+ } else if (isArrayType(MangledName))
+ Ty = demangleArrayType(MangledName);
+ else if (isFunctionType(MangledName)) {
+ if (MangledName.consumeFront("$$A8@@"))
+ Ty = demangleFunctionType(MangledName, true, false);
+ else {
+ assert(MangledName.startsWith("$$A6"));
+ MangledName.consumeFront("$$A6");
+ Ty = demangleFunctionType(MangledName, false, false);
+ }
+ } else {
+ Ty = demangleBasicType(MangledName);
+ assert(Ty && !Error);
+ if (!Ty || Error)
+ return Ty;
}
+
Ty->Quals = Qualifiers(Ty->Quals | Quals);
return Ty;
}
-ReferenceKind Demangler::demangleReferenceKind() {
+ReferenceKind Demangler::demangleReferenceKind(StringView &MangledName) {
if (MangledName.consumeFront('G'))
return ReferenceKind::LValueRef;
else if (MangledName.consumeFront('H'))
@@ -1300,55 +1615,61 @@ ReferenceKind Demangler::demangleReferenceKind() {
return ReferenceKind::None;
}
-void Demangler::demangleThrowSpecification() {
+void Demangler::demangleThrowSpecification(StringView &MangledName) {
if (MangledName.consumeFront('Z'))
return;
Error = true;
}
-FunctionType *Demangler::demangleFunctionType(bool HasThisQuals,
+FunctionType *Demangler::demangleFunctionType(StringView &MangledName,
+ bool HasThisQuals,
bool IsFunctionPointer) {
FunctionType *FTy = Arena.alloc<FunctionType>();
FTy->Prim = PrimTy::Function;
FTy->IsFunctionPointer = IsFunctionPointer;
if (HasThisQuals) {
- FTy->Quals = demanglePointerExtQualifiers();
- FTy->RefKind = demangleReferenceKind();
- FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers().first);
+ FTy->Quals = demanglePointerExtQualifiers(MangledName);
+ FTy->RefKind = demangleReferenceKind(MangledName);
+ FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
}
// Fields that appear on both member and non-member functions.
- FTy->CallConvention = demangleCallingConvention();
+ FTy->CallConvention = demangleCallingConvention(MangledName);
// <return-type> ::= <type>
// ::= @ # structors (they have no declared return type)
bool IsStructor = MangledName.consumeFront('@');
if (!IsStructor)
- FTy->ReturnType = demangleType(QualifierMangleMode::Result);
+ FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
- FTy->Params = demangleFunctionParameterList();
+ FTy->Params = demangleFunctionParameterList(MangledName);
- demangleThrowSpecification();
+ demangleThrowSpecification(MangledName);
return FTy;
}
-Type *Demangler::demangleFunctionEncoding() {
- FuncClass FC = demangleFunctionClass();
+Type *Demangler::demangleFunctionEncoding(StringView &MangledName) {
+ FuncClass FC = demangleFunctionClass(MangledName);
bool HasThisQuals = !(FC & (Global | Static));
- FunctionType *FTy = demangleFunctionType(HasThisQuals, false);
+ FunctionType *FTy = demangleFunctionType(MangledName, HasThisQuals, false);
FTy->FunctionClass = FC;
return FTy;
}
// Reads a primitive type.
-Type *Demangler::demangleBasicType() {
+Type *Demangler::demangleBasicType(StringView &MangledName) {
Type *Ty = Arena.alloc<Type>();
+ if (MangledName.consumeFront("$$T")) {
+ Ty->Prim = PrimTy::Nullptr;
+ return Ty;
+ }
+
switch (MangledName.popFront()) {
case 'X':
Ty->Prim = PrimTy::Void;
@@ -1407,16 +1728,26 @@ Type *Demangler::demangleBasicType() {
case 'W':
Ty->Prim = PrimTy::Wchar;
break;
+ case 'S':
+ Ty->Prim = PrimTy::Char16;
+ break;
+ case 'U':
+ Ty->Prim = PrimTy::Char32;
+ break;
default:
- assert(false);
+ Error = true;
+ return nullptr;
}
break;
}
+ default:
+ Error = true;
+ return nullptr;
}
return Ty;
}
-UdtType *Demangler::demangleClassType() {
+UdtType *Demangler::demangleClassType(StringView &MangledName) {
UdtType *UTy = Arena.alloc<UdtType>();
switch (MangledName.popFront()) {
@@ -1440,12 +1771,15 @@ UdtType *Demangler::demangleClassType() {
assert(false);
}
- UTy->UdtName = demangleName();
+ UTy->UdtName = demangleFullyQualifiedTypeName(MangledName);
return UTy;
}
static std::pair<Qualifiers, PointerAffinity>
demanglePointerCVQualifiers(StringView &MangledName) {
+ if (MangledName.consumeFront("$$Q"))
+ return std::make_pair(Q_None, PointerAffinity::RValueReference);
+
switch (MangledName.popFront()) {
case 'A':
return std::make_pair(Q_None, PointerAffinity::Reference);
@@ -1466,27 +1800,27 @@ demanglePointerCVQualifiers(StringView &MangledName) {
// <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
// # the E is required for 64-bit non-static pointers
-PointerType *Demangler::demanglePointerType() {
+PointerType *Demangler::demanglePointerType(StringView &MangledName) {
PointerType *Pointer = Arena.alloc<PointerType>();
- PointerAffinity Affinity;
- std::tie(Pointer->Quals, Affinity) = demanglePointerCVQualifiers(MangledName);
+ std::tie(Pointer->Quals, Pointer->Affinity) =
+ demanglePointerCVQualifiers(MangledName);
- Pointer->Prim =
- (Affinity == PointerAffinity::Pointer) ? PrimTy::Ptr : PrimTy::Ref;
+ Pointer->Prim = PrimTy::Ptr;
if (MangledName.consumeFront("6")) {
- Pointer->Pointee = demangleFunctionType(false, true);
+ Pointer->Pointee = demangleFunctionType(MangledName, false, true);
return Pointer;
}
- Qualifiers ExtQuals = demanglePointerExtQualifiers();
+ Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
- Pointer->Pointee = demangleType(QualifierMangleMode::Mangle);
+ Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle);
return Pointer;
}
-MemberPointerType *Demangler::demangleMemberPointerType() {
+MemberPointerType *
+Demangler::demangleMemberPointerType(StringView &MangledName) {
MemberPointerType *Pointer = Arena.alloc<MemberPointerType>();
Pointer->Prim = PrimTy::MemberPtr;
@@ -1494,27 +1828,27 @@ MemberPointerType *Demangler::demangleMemberPointerType() {
std::tie(Pointer->Quals, Affinity) = demanglePointerCVQualifiers(MangledName);
assert(Affinity == PointerAffinity::Pointer);
- Qualifiers ExtQuals = demanglePointerExtQualifiers();
+ Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
if (MangledName.consumeFront("8")) {
- Pointer->MemberName = demangleName();
- Pointer->Pointee = demangleFunctionType(true, true);
+ Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName);
+ Pointer->Pointee = demangleFunctionType(MangledName, true, true);
} else {
Qualifiers PointeeQuals = Q_None;
bool IsMember = false;
- std::tie(PointeeQuals, IsMember) = demangleQualifiers();
+ std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
assert(IsMember);
- Pointer->MemberName = demangleName();
+ Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName);
- Pointer->Pointee = demangleType(QualifierMangleMode::Drop);
+ Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
Pointer->Pointee->Quals = PointeeQuals;
}
return Pointer;
}
-Qualifiers Demangler::demanglePointerExtQualifiers() {
+Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) {
Qualifiers Quals = Q_None;
if (MangledName.consumeFront('E'))
Quals = Qualifiers(Quals | Q_Pointer64);
@@ -1526,11 +1860,11 @@ Qualifiers Demangler::demanglePointerExtQualifiers() {
return Quals;
}
-ArrayType *Demangler::demangleArrayType() {
+ArrayType *Demangler::demangleArrayType(StringView &MangledName) {
assert(MangledName.front() == 'Y');
MangledName.popFront();
- int Dimension = demangleNumber();
+ int Dimension = demangleNumber(MangledName);
if (Dimension <= 0) {
Error = true;
return nullptr;
@@ -1540,7 +1874,7 @@ ArrayType *Demangler::demangleArrayType() {
ArrayType *Dim = ATy;
for (int I = 0; I < Dimension; ++I) {
Dim->Prim = PrimTy::Array;
- Dim->ArrayDimension = demangleNumber();
+ Dim->ArrayDimension = demangleNumber(MangledName);
Dim->NextDimension = Arena.alloc<ArrayType>();
Dim = Dim->NextDimension;
}
@@ -1554,19 +1888,20 @@ ArrayType *Demangler::demangleArrayType() {
Error = true;
}
- ATy->ElementType = demangleType(QualifierMangleMode::Drop);
+ ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
Dim->ElementType = ATy->ElementType;
return ATy;
}
// Reads a function or a template parameters.
-ParamList Demangler::demangleFunctionParameterList() {
+FunctionParams
+Demangler::demangleFunctionParameterList(StringView &MangledName) {
// Empty parameter list.
if (MangledName.consumeFront('X'))
return {};
- ParamList *Head;
- ParamList **Current = &Head;
+ FunctionParams *Head;
+ FunctionParams **Current = &Head;
while (!Error && !MangledName.startsWith('@') &&
!MangledName.startsWith('Z')) {
@@ -1578,7 +1913,7 @@ ParamList Demangler::demangleFunctionParameterList() {
}
MangledName = MangledName.dropFront();
- *Current = Arena.alloc<ParamList>();
+ *Current = Arena.alloc<FunctionParams>();
(*Current)->Current = FunctionParamBackRefs[N]->clone(Arena);
Current = &(*Current)->Next;
continue;
@@ -1586,8 +1921,8 @@ ParamList Demangler::demangleFunctionParameterList() {
size_t OldSize = MangledName.size();
- *Current = Arena.alloc<ParamList>();
- (*Current)->Current = demangleType(QualifierMangleMode::Drop);
+ *Current = Arena.alloc<FunctionParams>();
+ (*Current)->Current = demangleType(MangledName, QualifierMangleMode::Drop);
size_t CharsConsumed = OldSize - MangledName.size();
assert(CharsConsumed != 0);
@@ -1618,14 +1953,33 @@ ParamList Demangler::demangleFunctionParameterList() {
return {};
}
-ParamList Demangler::demangleTemplateParameterList() {
- ParamList *Head;
- ParamList **Current = &Head;
+TemplateParams *
+Demangler::demangleTemplateParameterList(StringView &MangledName) {
+ TemplateParams *Head;
+ TemplateParams **Current = &Head;
while (!Error && !MangledName.startsWith('@')) {
-
// Template parameter lists don't participate in back-referencing.
- *Current = Arena.alloc<ParamList>();
- (*Current)->Current = demangleType(QualifierMangleMode::Drop);
+ *Current = Arena.alloc<TemplateParams>();
+
+ // Empty parameter pack.
+ if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
+ MangledName.consumeFront("$$$V")) {
+ if (!MangledName.startsWith('@'))
+ Error = true;
+ continue;
+ }
+
+ if (MangledName.consumeFront("$$Y")) {
+ (*Current)->IsTemplateTemplate = true;
+ (*Current)->IsAliasTemplate = true;
+ (*Current)->ParamName = demangleFullyQualifiedTypeName(MangledName);
+ } else if (MangledName.consumeFront("$1?")) {
+ (*Current)->ParamName = demangleFullyQualifiedSymbolName(MangledName);
+ (*Current)->ParamType = demangleFunctionEncoding(MangledName);
+ } else {
+ (*Current)->ParamType =
+ demangleType(MangledName, QualifierMangleMode::Drop);
+ }
Current = &(*Current)->Next;
}
@@ -1636,12 +1990,12 @@ ParamList Demangler::demangleTemplateParameterList() {
// Template parameter lists cannot be variadic, so it can only be terminated
// by @.
if (MangledName.consumeFront('@'))
- return *Head;
+ return Head;
Error = true;
return {};
}
-void Demangler::output() {
+void Demangler::output(const Symbol *S, OutputStream &OS) {
// Converts an AST to a string.
//
// Converting an AST representing a C++ type to a string is tricky due
@@ -1659,26 +2013,24 @@ void Demangler::output() {
// the "first half" of type declaration, and outputPost() writes the
// "second half". For example, outputPre() writes a return type for a
// function and outputPost() writes an parameter list.
- Type::outputPre(OS, *SymbolType);
- outputName(OS, SymbolName);
- Type::outputPost(OS, *SymbolType);
-
- // Null terminate the buffer.
- OS << '\0';
+ Type::outputPre(OS, *S->SymbolType);
+ outputName(OS, S->SymbolName);
+ Type::outputPost(OS, *S->SymbolType);
}
char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N,
int *Status) {
- OutputStream OS = OutputStream::create(Buf, N, 1024);
-
- Demangler D(OS, StringView(MangledName));
- D.parse();
+ Demangler D;
+ StringView Name{MangledName};
+ Symbol *S = D.parse(Name);
if (D.Error)
*Status = llvm::demangle_invalid_mangled_name;
else
*Status = llvm::demangle_success;
- D.output();
+ OutputStream OS = OutputStream::create(Buf, N, 1024);
+ D.output(S, OS);
+ OS << '\0';
return OS.getBuffer();
}
diff --git a/lib/Demangle/StringView.h b/lib/Demangle/StringView.h
index 3416db2c2867..a89deda694c2 100644
--- a/lib/Demangle/StringView.h
+++ b/lib/Demangle/StringView.h
@@ -22,6 +22,8 @@ class StringView {
const char *Last;
public:
+ static const size_t npos = ~size_t(0);
+
template <size_t N>
StringView(const char (&Str)[N]) : First(Str), Last(Str + N - 1) {}
StringView(const char *First_, const char *Last_)
@@ -35,6 +37,17 @@ public:
return StringView(begin() + From, size() - From);
}
+ size_t find(char C, size_t From = 0) const {
+ size_t FindBegin = std::min(From, size());
+ // Avoid calling memchr with nullptr.
+ if (FindBegin < size()) {
+ // Just forward to memchr, which is faster than a hand-rolled loop.
+ if (const void *P = ::memchr(First + FindBegin, C, size() - FindBegin))
+ return static_cast<const char *>(P) - First;
+ }
+ return npos;
+ }
+
StringView substr(size_t From, size_t To) const {
if (To >= size())
To = size() - 1;
@@ -49,11 +62,22 @@ public:
return StringView(First + N, Last);
}
+ StringView dropBack(size_t N = 1) const {
+ if (N >= size())
+ N = size();
+ return StringView(First, Last - N);
+ }
+
char front() const {
assert(!empty());
return *begin();
}
+ char back() const {
+ assert(!empty());
+ return *(end() - 1);
+ }
+
char popFront() {
assert(!empty());
return *First++;
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index abcdaeba8eb0..3be4bec566a0 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -153,7 +153,7 @@ void LLVMInitializeMCJITCompilerOptions(LLVMMCJITCompilerOptions *PassedOptions,
LLVMMCJITCompilerOptions options;
memset(&options, 0, sizeof(options)); // Most fields are zero by default.
options.CodeModel = LLVMCodeModelJITDefault;
-
+
memcpy(PassedOptions, &options,
std::min(sizeof(options), SizeOfPassedOptions));
}
@@ -171,14 +171,14 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
"LLVM library mismatch.");
return 1;
}
-
+
// Defend against the user having an old version of the API by ensuring that
// any fields they didn't see are cleared. We must defend against fields being
// set to the bitwise equivalent of zero, and assume that this means "do the
// default" as if that option hadn't been available.
LLVMInitializeMCJITCompilerOptions(&options, sizeof(options));
memcpy(&options, PassedOptions, SizeOfPassedOptions);
-
+
TargetOptions targetOptions;
targetOptions.EnableFastISel = options.EnableFastISel;
std::unique_ptr<Module> Mod(unwrap(M));
@@ -241,12 +241,12 @@ LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
unsigned NumArgs,
LLVMGenericValueRef *Args) {
unwrap(EE)->finalizeObject();
-
+
std::vector<GenericValue> ArgVec;
ArgVec.reserve(NumArgs);
for (unsigned I = 0; I != NumArgs; ++I)
ArgVec.push_back(*unwrap(Args[I]));
-
+
GenericValue *Result = new GenericValue();
*Result = unwrap(EE)->runFunction(unwrap<Function>(F), ArgVec);
return wrap(Result);
@@ -297,7 +297,7 @@ void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) {
unwrap(EE)->finalizeObject();
-
+
return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global));
}
@@ -395,11 +395,11 @@ LLVMMCJITMemoryManagerRef LLVMCreateSimpleMCJITMemoryManager(
LLVMMemoryManagerAllocateDataSectionCallback AllocateDataSection,
LLVMMemoryManagerFinalizeMemoryCallback FinalizeMemory,
LLVMMemoryManagerDestroyCallback Destroy) {
-
+
if (!AllocateCodeSection || !AllocateDataSection || !FinalizeMemory ||
!Destroy)
return nullptr;
-
+
SimpleBindingMMFunctions functions;
functions.AllocateCodeSection = AllocateCodeSection;
functions.AllocateDataSection = AllocateDataSection;
diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
index 1f029fb1c45b..61d8cc75d9f2 100644
--- a/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
+++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
@@ -7,7 +7,7 @@
*
*===----------------------------------------------------------------------===*
*
- * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
* Profiling API internal config.
*
* NOTE: This file comes in a style different from the rest of LLVM
@@ -213,7 +213,7 @@ typedef pthread_mutex_t mutex_t;
#define __itt_thread_id() GetCurrentThreadId()
#define __itt_thread_yield() SwitchToThread()
#ifndef ITT_SIMPLE_INIT
-ITT_INLINE long
+ITT_INLINE long
__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
{
@@ -273,7 +273,7 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
}
#endif /* ITT_ARCH==ITT_ARCH_IA64 */
#ifndef ITT_SIMPLE_INIT
-ITT_INLINE long
+ITT_INLINE long
__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
{
diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
index 8d16ee85d141..efd2b1a33f75 100644
--- a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
+++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
@@ -7,7 +7,7 @@
*
*===----------------------------------------------------------------------===*
*
- * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
* Profiling API declaration.
*
* NOTE: This file comes in a style different from the rest of LLVM
@@ -28,54 +28,54 @@ typedef enum iJIT_jvm_event
{
/* shutdown */
-
- /*
+
+ /*
* Program exiting EventSpecificData NA
*/
- iJVM_EVENT_TYPE_SHUTDOWN = 2,
+ iJVM_EVENT_TYPE_SHUTDOWN = 2,
/* JIT profiling */
-
- /*
+
+ /*
* issued after method code jitted into memory but before code is executed
* EventSpecificData is an iJIT_Method_Load
*/
- iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED=13,
+ iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED=13,
- /* issued before unload. Method code will no longer be executed, but code
- * and info are still in memory. The VTune profiler may capture method
+ /* issued before unload. Method code will no longer be executed, but code
+ * and info are still in memory. The VTune profiler may capture method
* code only at this point EventSpecificData is iJIT_Method_Id
*/
- iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
+ iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
/* Method Profiling */
- /* method name, Id and stack is supplied
- * issued when a method is about to be entered EventSpecificData is
+ /* method name, Id and stack is supplied
+ * issued when a method is about to be entered EventSpecificData is
* iJIT_Method_NIDS
*/
- iJVM_EVENT_TYPE_ENTER_NIDS = 19,
+ iJVM_EVENT_TYPE_ENTER_NIDS = 19,
- /* method name, Id and stack is supplied
- * issued when a method is about to be left EventSpecificData is
+ /* method name, Id and stack is supplied
+ * issued when a method is about to be left EventSpecificData is
* iJIT_Method_NIDS
*/
- iJVM_EVENT_TYPE_LEAVE_NIDS
+ iJVM_EVENT_TYPE_LEAVE_NIDS
} iJIT_JVM_EVENT;
typedef enum _iJIT_ModeFlags
{
/* No need to Notify VTune, since VTune is not running */
- iJIT_NO_NOTIFICATIONS = 0x0000,
+ iJIT_NO_NOTIFICATIONS = 0x0000,
- /* when turned on the jit must call
+ /* when turned on the jit must call
* iJIT_NotifyEvent
* (
* iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
* )
* for all the method already jitted
*/
- iJIT_BE_NOTIFY_ON_LOAD = 0x0001,
+ iJIT_BE_NOTIFY_ON_LOAD = 0x0001,
/* when turned on the jit must call
* iJIT_NotifyEvent
@@ -83,19 +83,19 @@ typedef enum _iJIT_ModeFlags
* iJVM_EVENT_TYPE_METHOD_UNLOAD_FINISHED,
* ) for all the method that are unloaded
*/
- iJIT_BE_NOTIFY_ON_UNLOAD = 0x0002,
+ iJIT_BE_NOTIFY_ON_UNLOAD = 0x0002,
/* when turned on the jit must instrument all
* the currently jited code with calls on
* method entries
*/
- iJIT_BE_NOTIFY_ON_METHOD_ENTRY = 0x0004,
+ iJIT_BE_NOTIFY_ON_METHOD_ENTRY = 0x0004,
/* when turned on the jit must instrument all
* the currently jited code with calls
* on method exit
*/
- iJIT_BE_NOTIFY_ON_METHOD_EXIT = 0x0008
+ iJIT_BE_NOTIFY_ON_METHOD_EXIT = 0x0008
} iJIT_ModeFlags;
@@ -104,13 +104,13 @@ typedef enum _iJIT_ModeFlags
typedef enum _iJIT_IsProfilingActiveFlags
{
/* No profiler is running. Currently not used */
- iJIT_NOTHING_RUNNING = 0x0000,
+ iJIT_NOTHING_RUNNING = 0x0000,
/* Sampling is running. This is the default value
* returned by iJIT_IsProfilingActive()
*/
- iJIT_SAMPLING_ON = 0x0001,
-
+ iJIT_SAMPLING_ON = 0x0001,
+
/* Call Graph is running */
iJIT_CALLGRAPH_ON = 0x0002
@@ -135,7 +135,7 @@ typedef struct _iJIT_Method_Id
/* Id of the method (same as the one passed in
* the iJIT_Method_Load struct
*/
- unsigned int method_id;
+ unsigned int method_id;
} *piJIT_Method_Id, iJIT_Method_Id;
@@ -149,13 +149,13 @@ typedef struct _iJIT_Method_Id
typedef struct _iJIT_Method_NIDS
{
/* unique method ID */
- unsigned int method_id;
+ unsigned int method_id;
/* NOTE: no need to fill this field, it's filled by VTune */
- unsigned int stack_id;
+ unsigned int stack_id;
/* method name (just the method, without the class) */
- char* method_name;
+ char* method_name;
} *piJIT_Method_NIDS, iJIT_Method_NIDS;
/* structures for the events:
@@ -168,51 +168,51 @@ typedef struct _LineNumberInfo
unsigned int Offset;
/* source line number from the beginning of the source file */
- unsigned int LineNumber;
+ unsigned int LineNumber;
} *pLineNumberInfo, LineNumberInfo;
typedef struct _iJIT_Method_Load
{
/* unique method ID - can be any unique value, (except 0 - 999) */
- unsigned int method_id;
+ unsigned int method_id;
/* method name (can be with or without the class and signature, in any case
* the class name will be added to it)
*/
- char* method_name;
+ char* method_name;
/* virtual address of that method - This determines the method range for the
* iJVM_EVENT_TYPE_ENTER/LEAVE_METHOD_ADDR events
*/
- void* method_load_address;
+ void* method_load_address;
/* Size in memory - Must be exact */
- unsigned int method_size;
+ unsigned int method_size;
/* Line Table size in number of entries - Zero if none */
unsigned int line_number_size;
/* Pointer to the beginning of the line numbers info array */
- pLineNumberInfo line_number_table;
+ pLineNumberInfo line_number_table;
/* unique class ID */
- unsigned int class_id;
-
+ unsigned int class_id;
+
/* class file name */
- char* class_file_name;
+ char* class_file_name;
/* source file name */
- char* source_file_name;
+ char* source_file_name;
/* bits supplied by the user for saving in the JIT file */
- void* user_data;
+ void* user_data;
/* the size of the user data buffer */
- unsigned int user_data_size;
+ unsigned int user_data_size;
/* NOTE: no need to fill this field, it's filled by VTune */
- iJDEnvironmentType env;
+ iJDEnvironmentType env;
} *piJIT_Method_Load, iJIT_Method_Load;
@@ -241,7 +241,7 @@ typedef void (*iJIT_ModeChangedEx)(void *UserData, iJIT_ModeFlags Flags);
int JITAPI iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData);
/* The new mode call back routine */
-void JITAPI iJIT_RegisterCallbackEx(void *userdata,
+void JITAPI iJIT_RegisterCallbackEx(void *userdata,
iJIT_ModeChangedEx NewModeCallBackFuncEx);
iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive(void);
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 9e77d160c30b..39cf6d4a32a3 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -85,7 +85,7 @@ static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
}
}
-static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
+static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
GenericValue Src2, Type *Ty) {
switch (Ty->getTypeID()) {
IMPLEMENT_BINARY_OPERATOR(/, Float);
@@ -96,7 +96,7 @@ static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
}
}
-static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
+static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
GenericValue Src2, Type *Ty) {
switch (Ty->getTypeID()) {
case Type::FloatTyID:
@@ -281,7 +281,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue R; // Result
-
+
switch (I.getPredicate()) {
case ICmpInst::ICMP_EQ: R = executeICMP_EQ(Src1, Src2, Ty); break;
case ICmpInst::ICMP_NE: R = executeICMP_NE(Src1, Src2, Ty); break;
@@ -297,7 +297,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
dbgs() << "Don't know how to handle this ICmp predicate!\n-->" << I;
llvm_unreachable(nullptr);
}
-
+
SetValue(&I, R, SF);
}
@@ -552,10 +552,10 @@ static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
Src2.AggregateVal[_i].DoubleVal)));
}
} else if (Ty->isFloatTy())
- Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal &&
+ Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal &&
Src2.FloatVal == Src2.FloatVal));
else {
- Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal &&
+ Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal &&
Src2.DoubleVal == Src2.DoubleVal));
}
return Dest;
@@ -583,10 +583,10 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
Src2.AggregateVal[_i].DoubleVal)));
}
} else if (Ty->isFloatTy())
- Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal ||
+ Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal ||
Src2.FloatVal != Src2.FloatVal));
else {
- Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal ||
+ Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal ||
Src2.DoubleVal != Src2.DoubleVal));
}
return Dest;
@@ -613,15 +613,15 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue R; // Result
-
+
switch (I.getPredicate()) {
default:
dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
llvm_unreachable(nullptr);
break;
- case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false);
+ case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false);
break;
- case FCmpInst::FCMP_TRUE: R = executeFCMP_BOOL(Src1, Src2, Ty, true);
+ case FCmpInst::FCMP_TRUE: R = executeFCMP_BOOL(Src1, Src2, Ty, true);
break;
case FCmpInst::FCMP_ORD: R = executeFCMP_ORD(Src1, Src2, Ty); break;
case FCmpInst::FCMP_UNO: R = executeFCMP_UNO(Src1, Src2, Ty); break;
@@ -638,11 +638,11 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break;
case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break;
}
-
+
SetValue(&I, R, SF);
}
-static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
+static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
GenericValue Src2, Type *Ty) {
GenericValue Result;
switch (predicate) {
@@ -747,12 +747,12 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
case Instruction::FRem:
if (cast<VectorType>(Ty)->getElementType()->isFloatTy())
for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
- R.AggregateVal[i].FloatVal =
+ R.AggregateVal[i].FloatVal =
fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal);
else {
if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())
for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
- R.AggregateVal[i].DoubleVal =
+ R.AggregateVal[i].DoubleVal =
fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal);
else {
dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
@@ -965,7 +965,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) {
Type *Ty = I.getType()->getElementType(); // Type to be allocated
// Get the number of elements being allocated by the array...
- unsigned NumElements =
+ unsigned NumElements =
getOperandValue(I.getOperand(0), SF).IntVal.getZExtValue();
unsigned TypeSize = (size_t)getDataLayout().getTypeAllocSize(Ty);
@@ -1011,7 +1011,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
GenericValue IdxGV = getOperandValue(I.getOperand(), SF);
int64_t Idx;
- unsigned BitWidth =
+ unsigned BitWidth =
cast<IntegerType>(I.getOperand()->getType())->getBitWidth();
if (BitWidth == 32)
Idx = (int64_t)(int32_t)IdxGV.IntVal.getZExtValue();
@@ -2037,13 +2037,13 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
case Instruction::And: Dest.IntVal = Op0.IntVal & Op1.IntVal; break;
case Instruction::Or: Dest.IntVal = Op0.IntVal | Op1.IntVal; break;
case Instruction::Xor: Dest.IntVal = Op0.IntVal ^ Op1.IntVal; break;
- case Instruction::Shl:
+ case Instruction::Shl:
Dest.IntVal = Op0.IntVal.shl(Op1.IntVal.getZExtValue());
break;
- case Instruction::LShr:
+ case Instruction::LShr:
Dest.IntVal = Op0.IntVal.lshr(Op1.IntVal.getZExtValue());
break;
- case Instruction::AShr:
+ case Instruction::AShr:
Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue());
break;
default:
@@ -2100,7 +2100,7 @@ void Interpreter::callFunction(Function *F, ArrayRef<GenericValue> ArgVals) {
// Handle non-varargs arguments...
unsigned i = 0;
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI, ++i)
SetValue(&*AI, ArgVals[i], StackFrame);
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 5c16448404bb..33542e7e43ad 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -132,8 +132,8 @@ public:
void visitLoadInst(LoadInst &I);
void visitStoreInst(StoreInst &I);
void visitGetElementPtrInst(GetElementPtrInst &I);
- void visitPHINode(PHINode &PN) {
- llvm_unreachable("PHI nodes already handled!");
+ void visitPHINode(PHINode &PN) {
+ llvm_unreachable("PHI nodes already handled!");
}
void visitTruncInst(TruncInst &I);
void visitZExtInst(ZExtInst &I);
@@ -224,7 +224,7 @@ private: // Helper functions
ExecutionContext &SF);
GenericValue executeBitCastInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal,
+ GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal,
Type *Ty, ExecutionContext &SF);
void popStackAndReturnValueToCaller(Type *RetTy, GenericValue Result);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index e774af05ebdd..75d4c2b5134e 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
@@ -119,10 +119,10 @@ void RTDyldMemoryManager::deregisterEHFramesInProcess(uint8_t *Addr,
void RTDyldMemoryManager::registerEHFramesInProcess(uint8_t *Addr,
size_t Size) {
- // On Linux __register_frame takes a single argument:
+ // On Linux __register_frame takes a single argument:
// a pointer to the start of the .eh_frame section.
- // How can it find the end? Because crtendS.o is linked
+ // How can it find the end? Because crtendS.o is linked
// in and it has an .eh_frame section with four zero chars.
__register_frame(Addr);
}
@@ -255,7 +255,7 @@ RTDyldMemoryManager::getSymbolAddressInProcess(const std::string &Name) {
return (uint64_t)&__morestack;
#endif
#endif // __linux__ && __GLIBC__
-
+
// See ARM_MATH_IMPORTS definition for explanation
#if defined(__BIONIC__) && defined(__arm__)
if (Name.compare(0, 8, "__aeabi_") == 0) {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index cc6729d21320..f9a81c7bd1b0 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -1430,7 +1430,7 @@ RuntimeDyldELF::processRelocationRef(
} else {
processSimpleRelocation(SectionID, Offset, RelType, Value);
}
-
+
} else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) {
if (RelType == ELF::R_PPC64_REL24) {
// Determine ABI variant in use for this object.
diff --git a/lib/FuzzMutate/FuzzerCLI.cpp b/lib/FuzzMutate/FuzzerCLI.cpp
index 6f5a5c067a97..a70dad37dfcf 100644
--- a/lib/FuzzMutate/FuzzerCLI.cpp
+++ b/lib/FuzzMutate/FuzzerCLI.cpp
@@ -93,7 +93,7 @@ void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) {
Args.push_back("-passes=gvn");
} else if (Opt == "sccp") {
Args.push_back("-passes=sccp");
-
+
} else if (Opt == "loop_predication") {
Args.push_back("-passes=loop-predication");
} else if (Opt == "guard_widening") {
@@ -114,7 +114,7 @@ void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) {
Args.push_back("-passes=strength-reduce");
} else if (Opt == "irce") {
Args.push_back("-passes=irce");
-
+
} else if (Triple(Opt).getArch()) {
Args.push_back("-mtriple=" + Opt.str());
} else {
@@ -204,6 +204,6 @@ std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
auto M = parseModule(Data, Size, Context);
if (!M || verifyModule(*M, &errs()))
return nullptr;
-
+
return M;
}
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index 9e5f55d49756..d87187481be0 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -1709,6 +1709,15 @@ adjustMinLegalVectorWidth(Function &Caller, const Function &Callee) {
}
}
+/// If the inlined function has "null-pointer-is-valid=true" attribute,
+/// set this attribute in the caller post inlining.
+static void
+adjustNullPointerValidAttr(Function &Caller, const Function &Callee) {
+ if (Callee.nullPointerIsDefined() && !Caller.nullPointerIsDefined()) {
+ Caller.addFnAttr(Callee.getFnAttribute("null-pointer-is-valid"));
+ }
+}
+
#define GET_ATTR_COMPAT_FUNC
#include "AttributesCompatFunc.inc"
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index ef62a23b5358..f098ad9725b6 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -94,7 +94,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
- Name.startswith("avx512.kunpck") || //added in 6.0
+ Name.startswith("avx512.kunpck") || //added in 6.0
Name.startswith("avx2.pabs.") || // Added in 6.0
Name.startswith("avx512.mask.pabs.") || // Added in 6.0
Name.startswith("avx512.broadcastm") || // Added in 6.0
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index aba329b80508..72090f5bac3e 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -586,7 +586,7 @@ static std::string getMangledTypeStr(Type* Ty) {
if (FT->isVarArg())
Result += "vararg";
// Ensure nested function types are distinguishable.
- Result += "f";
+ Result += "f";
} else if (isa<VectorType>(Ty)) {
Result += "v" + utostr(Ty->getVectorNumElements()) +
getMangledTypeStr(Ty->getVectorElementType());
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index 8667d7aab583..4623f69bd9a3 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -57,7 +57,7 @@ void InlineAsm::destroyConstant() {
FunctionType *InlineAsm::getFunctionType() const {
return FTy;
}
-
+
/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
/// fields in this structure. If the constraint string is not understood,
/// return true, otherwise return false.
@@ -80,7 +80,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
isCommutative = false;
isIndirect = false;
currentAlternativeIndex = 0;
-
+
// Parse prefixes.
if (*I == '~') {
Type = isClobber;
@@ -100,7 +100,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
}
if (I == E) return true; // Just a prefix, like "==" or "~".
-
+
// Parse the modifiers.
bool DoneWithModifiers = false;
while (!DoneWithModifiers) {
@@ -124,13 +124,13 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
case '*': // Register preferencing.
return true; // Not supported.
}
-
+
if (!DoneWithModifiers) {
++I;
if (I == E) return true; // Just prefixes and modifiers!
}
}
-
+
// Parse the various constraints.
while (I != E) {
if (*I == '{') { // Physical register reference.
@@ -150,7 +150,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
if (N >= ConstraintsSoFar.size() || ConstraintsSoFar[N].Type != isOutput||
Type != isInput)
return true; // Invalid constraint number.
-
+
// If Operand N already has a matching input, reject this. An output
// can't be constrained to the same value as multiple inputs.
if (isMultipleAlternative) {
@@ -207,7 +207,7 @@ void InlineAsm::ConstraintInfo::selectAlternative(unsigned index) {
InlineAsm::ConstraintInfoVector
InlineAsm::ParseConstraints(StringRef Constraints) {
ConstraintInfoVector Result;
-
+
// Scan the constraints string.
for (StringRef::iterator I = Constraints.begin(),
E = Constraints.end(); I != E; ) {
@@ -223,7 +223,7 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
}
Result.push_back(Info);
-
+
// ConstraintEnd may be either the next comma or the end of the string. In
// the former case, we skip the comma.
I = ConstraintEnd;
@@ -235,7 +235,7 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
} // don't allow "xyz,"
}
}
-
+
return Result;
}
@@ -243,15 +243,15 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
/// specified function type, and otherwise validate the constraint string.
bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
if (Ty->isVarArg()) return false;
-
+
ConstraintInfoVector Constraints = ParseConstraints(ConstStr);
-
+
// Error parsing constraints.
if (Constraints.empty() && !ConstStr.empty()) return false;
-
+
unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0;
unsigned NumIndirect = 0;
-
+
for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
switch (Constraints[i].Type) {
case InlineAsm::isOutput:
@@ -272,7 +272,7 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
break;
}
}
-
+
switch (NumOutputs) {
case 0:
if (!Ty->getReturnType()->isVoidTy()) return false;
@@ -285,8 +285,8 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
if (!STy || STy->getNumElements() != NumOutputs)
return false;
break;
- }
-
+ }
+
if (Ty->getNumParams() != NumInputs) return false;
return true;
}
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index e0ad0d1ea1f1..32db918dab97 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -310,7 +310,7 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
"Calling a function with bad signature!");
for (unsigned i = 0; i != Args.size(); ++i)
- assert((i >= FTy->getNumParams() ||
+ assert((i >= FTy->getNumParams() ||
FTy->getParamType(i) == Args[i]->getType()) &&
"Calling a function with a bad signature!");
#endif
@@ -409,7 +409,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
"createMalloc needs either InsertBefore or InsertAtEnd");
- // malloc(type) becomes:
+ // malloc(type) becomes:
// bitcast (i8* malloc(typeSize)) to type*
// malloc(type, arraySize) becomes:
// bitcast (i8* malloc(typeSize*arraySize)) to type*
@@ -516,7 +516,7 @@ Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
/// responsibility of the caller.
Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
Type *IntPtrTy, Type *AllocTy,
- Value *AllocSize, Value *ArraySize,
+ Value *AllocSize, Value *ArraySize,
Function *MallocF, const Twine &Name) {
return createMalloc(nullptr, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
ArraySize, None, MallocF, Name);
@@ -612,7 +612,7 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
"Invoking a function with bad signature");
for (unsigned i = 0, e = Args.size(); i != e; i++)
- assert((i >= FTy->getNumParams() ||
+ assert((i >= FTy->getNumParams() ||
FTy->getParamType(i) == Args[i]->getType()) &&
"Invoking a function with a bad signature!");
#endif
@@ -912,7 +912,7 @@ FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
// UnreachableInst Implementation
//===----------------------------------------------------------------------===//
-UnreachableInst::UnreachableInst(LLVMContext &Context,
+UnreachableInst::UnreachableInst(LLVMContext &Context,
Instruction *InsertBefore)
: TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
nullptr, 0, InsertBefore) {
@@ -1072,7 +1072,7 @@ bool AllocaInst::isArrayAllocation() const {
bool AllocaInst::isStaticAlloca() const {
// Must be constant size.
if (!isa<ConstantInt>(getArraySize())) return false;
-
+
// Must be in the entry block.
const BasicBlock *Parent = getParent();
return Parent == &Parent->getParent()->front() && !isUsedWithInAlloca();
@@ -1125,7 +1125,7 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
setName(Name);
}
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
unsigned Align, AtomicOrdering Order,
SyncScope::ID SSID,
BasicBlock *InsertAE)
@@ -1380,7 +1380,7 @@ AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
// FenceInst Implementation
//===----------------------------------------------------------------------===//
-FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
+FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
SyncScope::ID SSID,
Instruction *InsertBefore)
: Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertBefore) {
@@ -1388,7 +1388,7 @@ FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
setSyncScopeID(SSID);
}
-FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
+FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
SyncScope::ID SSID,
BasicBlock *InsertAtEnd)
: Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertAtEnd) {
@@ -1575,14 +1575,14 @@ InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
setName(Name);
}
-bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
+bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
const Value *Index) {
if (!Vec->getType()->isVectorTy())
return false; // First operand of insertelement must be vector type.
-
+
if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
return false;// Second operand of insertelement must be vector element type.
-
+
if (!Index->getType()->isIntegerTy())
return false; // Third operand of insertelement must be i32.
return true;
@@ -1632,7 +1632,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
// V1 and V2 must be vectors of the same type.
if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
return false;
-
+
// Mask must be vector of i32.
auto *MaskTy = dyn_cast<VectorType>(Mask->getType());
if (!MaskTy || !MaskTy->getElementType()->isIntegerTy(32))
@@ -1654,7 +1654,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
}
return true;
}
-
+
if (const auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {
unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i)
@@ -1662,7 +1662,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
return false;
return true;
}
-
+
// The bitcode reader can create a place holder for a forward reference
// used as the shuffle mask. When this occurs, the shuffle mask will
// fall into this case and fail. To avoid this error, do this bit of
@@ -1687,12 +1687,12 @@ int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) {
void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
SmallVectorImpl<int> &Result) {
unsigned NumElts = Mask->getType()->getVectorNumElements();
-
+
if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {
for (unsigned i = 0; i != NumElts; ++i)
Result.push_back(CDS->getElementAsInteger(i));
return;
- }
+ }
for (unsigned i = 0; i != NumElts; ++i) {
Constant *C = Mask->getAggregateElement(i);
Result.push_back(isa<UndefValue>(C) ? -1 :
@@ -1806,7 +1806,7 @@ bool ShuffleVectorInst::isTransposeMask(ArrayRef<int> Mask) {
// InsertValueInst Class
//===----------------------------------------------------------------------===//
-void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
+void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
const Twine &Name) {
assert(getNumOperands() == 2 && "NumOperands not initialized?");
@@ -1903,7 +1903,7 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
AssertOK();
}
-BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
Type *Ty, const Twine &Name,
BasicBlock *InsertAtEnd)
: Instruction(Ty, iType,
@@ -1938,8 +1938,8 @@ void BinaryOperator::AssertOK() {
"Tried to create a floating-point operation on a "
"non-floating-point type!");
break;
- case UDiv:
- case SDiv:
+ case UDiv:
+ case SDiv:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
assert(getType()->isIntOrIntVectorTy() &&
@@ -1951,8 +1951,8 @@ void BinaryOperator::AssertOK() {
assert(getType()->isFPOrFPVectorTy() &&
"Incorrect operand type (not floating point) for FDIV");
break;
- case URem:
- case SRem:
+ case URem:
+ case SRem:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
assert(getType()->isIntOrIntVectorTy() &&
@@ -2185,7 +2185,7 @@ bool CastInst::isLosslessCast() const {
Type *DstTy = getType();
if (SrcTy == DstTy)
return true;
-
+
// Pointer to pointer is always lossless.
if (SrcTy->isPointerTy())
return DstTy->isPointerTy();
@@ -2194,10 +2194,10 @@ bool CastInst::isLosslessCast() const {
/// This function determines if the CastInst does not require any bits to be
/// changed in order to effect the cast. Essentially, it identifies cases where
-/// no code gen is necessary for the cast, hence the name no-op cast. For
+/// no code gen is necessary for the cast, hence the name no-op cast. For
/// example, the following are all no-op casts:
/// # bitcast i32* %x to i8*
-/// # bitcast <2 x i32> %x to <4 x i16>
+/// # bitcast <2 x i32> %x to <4 x i16>
/// # ptrtoint i32* %x to i32 ; on 32-bit plaforms only
/// Determine if the described cast is a no-op.
bool CastInst::isNoopCast(Instruction::CastOps Opcode,
@@ -2208,7 +2208,7 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode,
default: llvm_unreachable("Invalid CastOp");
case Instruction::Trunc:
case Instruction::ZExt:
- case Instruction::SExt:
+ case Instruction::SExt:
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::UIToFP:
@@ -2247,7 +2247,7 @@ unsigned CastInst::isEliminableCastPair(
Type *DstIntPtrTy) {
// Define the 144 possibilities for these two cast instructions. The values
// in this matrix determine what to do in a given situation and select the
- // case in the switch below. The rows correspond to firstOp, the columns
+ // case in the switch below. The rows correspond to firstOp, the columns
// correspond to secondOp. In looking at the table below, keep in mind
// the following cast properties:
//
@@ -2315,16 +2315,16 @@ unsigned CastInst::isEliminableCastPair(
int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
[secondOp-Instruction::CastOpsBegin];
switch (ElimCase) {
- case 0:
+ case 0:
// Categorically disallowed.
return 0;
- case 1:
+ case 1:
// Allowed, use first cast's opcode.
return firstOp;
- case 2:
+ case 2:
// Allowed, use second cast's opcode.
return secondOp;
- case 3:
+ case 3:
// No-op cast in second op implies firstOp as long as the DestTy
// is integer and we are not converting between a vector and a
// non-vector type.
@@ -2337,7 +2337,7 @@ unsigned CastInst::isEliminableCastPair(
if (DstTy->isFloatingPointTy())
return firstOp;
return 0;
- case 5:
+ case 5:
// No-op cast in first op implies secondOp as long as the SrcTy
// is an integer.
if (SrcTy->isIntegerTy())
@@ -2449,7 +2449,7 @@ unsigned CastInst::isEliminableCastPair(
case 17:
// (sitofp (zext x)) -> (uitofp x)
return Instruction::UIToFP;
- case 99:
+ case 99:
// Cast combination can't happen (error in input). This is for all cases
// where the MidTy is not the same for the two cast instructions.
llvm_unreachable("Invalid Cast Combination");
@@ -2458,7 +2458,7 @@ unsigned CastInst::isEliminableCastPair(
}
}
-CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
const Twine &Name, Instruction *InsertBefore) {
assert(castIsValid(op, S, Ty) && "Invalid cast!");
// Construct and return the appropriate CastInst subclass
@@ -2502,7 +2502,7 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
}
}
-CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
const Twine &Name,
Instruction *InsertBefore) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2510,7 +2510,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
const Twine &Name,
BasicBlock *InsertAtEnd) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2518,7 +2518,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
}
-CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
const Twine &Name,
Instruction *InsertBefore) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2526,7 +2526,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
const Twine &Name,
BasicBlock *InsertAtEnd) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2543,7 +2543,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty,
}
CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty,
- const Twine &Name,
+ const Twine &Name,
BasicBlock *InsertAtEnd) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
@@ -2636,7 +2636,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
return Create(opcode, C, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
+CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
bool isSigned, const Twine &Name,
BasicBlock *InsertAtEnd) {
assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
@@ -2650,8 +2650,8 @@ CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
return Create(opcode, C, Ty, Name, InsertAtEnd);
}
-CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
- const Twine &Name,
+CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
+ const Twine &Name,
Instruction *InsertBefore) {
assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
"Invalid cast");
@@ -2663,8 +2663,8 @@ CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
return Create(opcode, C, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
- const Twine &Name,
+CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
+ const Twine &Name,
BasicBlock *InsertAtEnd) {
assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
"Invalid cast");
@@ -2707,7 +2707,7 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) {
return DestBits == SrcBits;
// Casting from something else
return SrcTy->isPointerTy();
- }
+ }
if (DestTy->isFloatingPointTy()) { // Casting to floating pt
if (SrcTy->isIntegerTy()) // Casting from integral
return true;
@@ -2724,7 +2724,7 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) {
if (SrcTy->isPointerTy()) // Casting from pointer
return true;
return SrcTy->isIntegerTy(); // Casting from integral
- }
+ }
if (DestTy->isX86_MMXTy()) {
if (SrcTy->isVectorTy())
return DestBits == SrcBits; // 64-bit vector to MMX
@@ -2834,10 +2834,10 @@ CastInst::getCastOpcode(
return BitCast; // Same size, No-op cast
}
} else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
- if (DestIsSigned)
+ if (DestIsSigned)
return FPToSI; // FP -> sint
else
- return FPToUI; // FP -> uint
+ return FPToUI; // FP -> uint
} else if (SrcTy->isVectorTy()) {
assert(DestBits == SrcBits &&
"Casting vector to integer of different width");
@@ -2898,7 +2898,7 @@ CastInst::getCastOpcode(
/// could be broken out into the separate constructors but it is useful to have
/// it in one place and to eliminate the redundant code for getting the sizes
/// of the types involved.
-bool
+bool
CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
// Check for type sanity on the arguments
Type *SrcTy = S->getType();
@@ -2928,7 +2928,7 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
case Instruction::ZExt:
return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
SrcLength == DstLength && SrcBitSize < DstBitSize;
- case Instruction::SExt:
+ case Instruction::SExt:
return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
SrcLength == DstLength && SrcBitSize < DstBitSize;
case Instruction::FPTrunc:
@@ -3019,138 +3019,138 @@ TruncInst::TruncInst(
TruncInst::TruncInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) {
+) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
}
ZExtInst::ZExtInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, ZExt, S, Name, InsertBefore) {
+) : CastInst(Ty, ZExt, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
}
ZExtInst::ZExtInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, ZExt, S, Name, InsertAtEnd) {
+) : CastInst(Ty, ZExt, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
}
SExtInst::SExtInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, SExt, S, Name, InsertBefore) {
+) : CastInst(Ty, SExt, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
}
SExtInst::SExtInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, SExt, S, Name, InsertAtEnd) {
+) : CastInst(Ty, SExt, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
}
FPTruncInst::FPTruncInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) {
+) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
}
FPTruncInst::FPTruncInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) {
+) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
}
FPExtInst::FPExtInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, FPExt, S, Name, InsertBefore) {
+) : CastInst(Ty, FPExt, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
}
FPExtInst::FPExtInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) {
+) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
}
UIToFPInst::UIToFPInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, UIToFP, S, Name, InsertBefore) {
+) : CastInst(Ty, UIToFP, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
}
UIToFPInst::UIToFPInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) {
+) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
}
SIToFPInst::SIToFPInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, SIToFP, S, Name, InsertBefore) {
+) : CastInst(Ty, SIToFP, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
}
SIToFPInst::SIToFPInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) {
+) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
}
FPToUIInst::FPToUIInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, FPToUI, S, Name, InsertBefore) {
+) : CastInst(Ty, FPToUI, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
}
FPToUIInst::FPToUIInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) {
+) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
}
FPToSIInst::FPToSIInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, FPToSI, S, Name, InsertBefore) {
+) : CastInst(Ty, FPToSI, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
}
FPToSIInst::FPToSIInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) {
+) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
}
PtrToIntInst::PtrToIntInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) {
+) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
}
PtrToIntInst::PtrToIntInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) {
+) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
}
IntToPtrInst::IntToPtrInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) {
+) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
}
IntToPtrInst::IntToPtrInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) {
+) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
}
BitCastInst::BitCastInst(
Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, BitCast, S, Name, InsertBefore) {
+) : CastInst(Ty, BitCast, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
}
BitCastInst::BitCastInst(
Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) {
+) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
}
@@ -3205,7 +3205,7 @@ CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2,
return new ICmpInst(CmpInst::Predicate(predicate),
S1, S2, Name);
}
-
+
if (InsertBefore)
return new FCmpInst(InsertBefore, CmpInst::Predicate(predicate),
S1, S2, Name);
@@ -3312,8 +3312,8 @@ StringRef CmpInst::getPredicateName(Predicate Pred) {
ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
switch (pred) {
default: llvm_unreachable("Unknown icmp predicate!");
- case ICMP_EQ: case ICMP_NE:
- case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE:
+ case ICMP_EQ: case ICMP_NE:
+ case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE:
return pred;
case ICMP_UGT: return ICMP_SGT;
case ICMP_ULT: return ICMP_SLT;
@@ -3325,8 +3325,8 @@ ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
switch (pred) {
default: llvm_unreachable("Unknown icmp predicate!");
- case ICMP_EQ: case ICMP_NE:
- case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE:
+ case ICMP_EQ: case ICMP_NE:
+ case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE:
return pred;
case ICMP_SGT: return ICMP_UGT;
case ICMP_SLT: return ICMP_ULT;
@@ -3371,7 +3371,7 @@ CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
case ICMP_ULT: return ICMP_UGT;
case ICMP_UGE: return ICMP_ULE;
case ICMP_ULE: return ICMP_UGE;
-
+
case FCMP_FALSE: case FCMP_TRUE:
case FCMP_OEQ: case FCMP_ONE:
case FCMP_UEQ: case FCMP_UNE:
@@ -3422,7 +3422,7 @@ CmpInst::Predicate CmpInst::getSignedPredicate(Predicate pred) {
bool CmpInst::isUnsigned(Predicate predicate) {
switch (predicate) {
default: return false;
- case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE: return true;
}
}
@@ -3430,7 +3430,7 @@ bool CmpInst::isUnsigned(Predicate predicate) {
bool CmpInst::isSigned(Predicate predicate) {
switch (predicate) {
default: return false;
- case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE: return true;
}
}
@@ -3438,17 +3438,17 @@ bool CmpInst::isSigned(Predicate predicate) {
bool CmpInst::isOrdered(Predicate predicate) {
switch (predicate) {
default: return false;
- case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT:
- case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE:
+ case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT:
+ case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE:
case FCmpInst::FCMP_ORD: return true;
}
}
-
+
bool CmpInst::isUnordered(Predicate predicate) {
switch (predicate) {
default: return false;
- case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT:
- case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE:
case FCmpInst::FCMP_UNO: return true;
}
}
@@ -3619,7 +3619,7 @@ void IndirectBrInst::init(Value *Address, unsigned NumDests) {
void IndirectBrInst::growOperands() {
unsigned e = getNumOperands();
unsigned NumOps = e*2;
-
+
ReservedSpace = NumOps;
growHungoffUses(ReservedSpace);
}
@@ -3665,13 +3665,13 @@ void IndirectBrInst::addDestination(BasicBlock *DestBB) {
/// indirectbr instruction.
void IndirectBrInst::removeDestination(unsigned idx) {
assert(idx < getNumOperands()-1 && "Successor index out of range!");
-
+
unsigned NumOps = getNumOperands();
Use *OL = getOperandList();
// Replace this value with the last one.
OL[idx+1] = OL[NumOps-1];
-
+
// Nuke the last value.
OL[NumOps-1].set(nullptr);
setNumHungOffUseOperands(NumOps-1);
@@ -3725,7 +3725,7 @@ LoadInst *LoadInst::cloneImpl() const {
StoreInst *StoreInst::cloneImpl() const {
return new StoreInst(getOperand(0), getOperand(1), isVolatile(),
getAlignment(), getOrdering(), getSyncScopeID());
-
+
}
AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const {
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index d5046d644187..3b2e1e81b1c1 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file declares LLVMContextImpl, the opaque implementation
+// This file declares LLVMContextImpl, the opaque implementation
// of LLVMContext.
//
//===----------------------------------------------------------------------===//
@@ -1217,7 +1217,7 @@ public:
/// OwnedModules - The set of modules instantiated in this context, and which
/// will be automatically deleted if this context is deleted.
SmallPtrSet<Module*, 4> OwnedModules;
-
+
LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler = nullptr;
void *InlineAsmDiagContext = nullptr;
@@ -1265,10 +1265,10 @@ public:
using ArrayConstantsTy = ConstantUniqueMap<ConstantArray>;
ArrayConstantsTy ArrayConstants;
-
+
using StructConstantsTy = ConstantUniqueMap<ConstantStruct>;
StructConstantsTy StructConstants;
-
+
using VectorConstantsTy = ConstantUniqueMap<ConstantVector>;
VectorConstantsTy VectorConstants;
@@ -1293,11 +1293,11 @@ public:
Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy, TokenTy;
Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty;
-
+
/// TypeAllocator - All dynamically allocated types are allocated from this.
/// They live forever until the context is torn down.
BumpPtrAllocator TypeAllocator;
-
+
DenseMap<unsigned, IntegerType*> IntegerTypes;
using FunctionTypeSet = DenseSet<FunctionType *, FunctionTypeKeyInfo>;
@@ -1306,7 +1306,7 @@ public:
StructTypeSet AnonStructTypes;
StringMap<StructType*> NamedStructTypes;
unsigned NamedStructTypesUniqueID = 0;
-
+
DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
DenseMap<Type*, PointerType*> PointerTypes; // Pointers in AddrSpace = 0
@@ -1317,7 +1317,7 @@ public:
/// whether or not a value has an entry in this map.
using ValueHandlesTy = DenseMap<Value *, ValueHandleBase *>;
ValueHandlesTy ValueHandles;
-
+
/// CustomMDKindNames - Map to hold the metadata string to ID mapping.
StringMap<unsigned> CustomMDKindNames;
diff --git a/lib/IR/SymbolTableListTraitsImpl.h b/lib/IR/SymbolTableListTraitsImpl.h
index 6ddab6b4c69d..d4ad1eba33c6 100644
--- a/lib/IR/SymbolTableListTraitsImpl.h
+++ b/lib/IR/SymbolTableListTraitsImpl.h
@@ -33,17 +33,17 @@ void SymbolTableListTraits<ValueSubClass>::setSymTabObject(TPtr *Dest,
// Do it.
*Dest = Src;
-
+
// Get the new SymTab object.
ValueSymbolTable *NewST = getSymTab(getListOwner());
-
+
// If there is nothing to do, quick exit.
if (OldST == NewST) return;
-
+
// Move all the elements from the old symtab to the new one.
ListTy &ItemList = getList(getListOwner());
if (ItemList.empty()) return;
-
+
if (OldST) {
// Remove all entries from the previous symtab.
for (auto I = ItemList.begin(); I != ItemList.end(); ++I)
@@ -57,7 +57,7 @@ void SymbolTableListTraits<ValueSubClass>::setSymTabObject(TPtr *Dest,
if (I->hasName())
NewST->reinsertValue(&*I);
}
-
+
}
template <typename ValueSubClass>
diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp
index 0a7f2803cd4c..f4bea5604043 100644
--- a/lib/IR/ValueSymbolTable.cpp
+++ b/lib/IR/ValueSymbolTable.cpp
@@ -79,7 +79,7 @@ void ValueSymbolTable::reinsertValue(Value* V) {
// *V << "\n");
return;
}
-
+
// Otherwise, there is a naming conflict. Rename this value.
SmallString<256> UniqueName(V->getName().begin(), V->getName().end());
@@ -107,7 +107,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
// << *V << "\n");
return &*IterBool.first;
}
-
+
// Otherwise, there is a naming conflict. Rename this value.
SmallString<256> UniqueName(Name.begin(), Name.end());
return makeUniqueName(V, UniqueName);
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index 90d0f9bdb885..642e538ecf92 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -422,7 +422,7 @@ public:
int TempFD;
llvm::sys::path::remove_filename(CachePath);
sys::path::append(TempFilename, CachePath, "Thin-%%%%%%.tmp.o");
- std::error_code EC =
+ std::error_code EC =
sys::fs::createUniqueFile(TempFilename, TempFD, TempFilename);
if (EC) {
errs() << "Error: " << EC.message() << "\n";
@@ -432,7 +432,7 @@ public:
raw_fd_ostream OS(TempFD, /* ShouldClose */ true);
OS << OutputBuffer.getBuffer();
}
- // Rename temp file to final destination; rename is atomic
+ // Rename temp file to final destination; rename is atomic
EC = sys::fs::rename(TempFilename, EntryPath);
if (EC)
sys::fs::remove(TempFilename);
@@ -1048,10 +1048,10 @@ void ThinLTOCodeGenerator::run() {
if (SavedObjectsDirectoryPath.empty()) {
// We need to generated a memory buffer for the linker.
if (!CacheEntryPath.empty()) {
- // When cache is enabled, reload from the cache if possible.
+ // When cache is enabled, reload from the cache if possible.
// Releasing the buffer from the heap and reloading it from the
- // cache file with mmap helps us to lower memory pressure.
- // The freed memory can be used for the next input file.
+ // cache file with mmap helps us to lower memory pressure.
+ // The freed memory can be used for the next input file.
// The final binary link will read from the VFS cache (hopefully!)
// or from disk (if the memory pressure was too high).
auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer();
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 92f615180561..ae02f50bf8bd 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -337,7 +337,7 @@ void MCAsmStreamer::AddComment(const Twine &T, bool EOL) {
if (!IsVerboseAsm) return;
T.toVector(CommentToEmit);
-
+
if (EOL)
CommentToEmit.push_back('\n'); // Place comment in a new line.
}
@@ -655,7 +655,7 @@ void MCAsmStreamer::EmitSyntaxDirective() {
EmitEOL();
}
// FIXME: Currently emit unprefix'ed registers.
- // The intel_syntax directive has one optional argument
+ // The intel_syntax directive has one optional argument
// with may have a value of prefix or noprefix.
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 1470e026d985..1e23b6d816e8 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -550,7 +550,7 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
break;
}
- case MCFragment::FT_Data:
+ case MCFragment::FT_Data:
++stats::EmittedDataFragments;
OS << cast<MCDataFragment>(F).getContents();
break;
@@ -822,6 +822,9 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
} else if (auto *FragWithFixups = dyn_cast<MCCVDefRangeFragment>(&Frag)) {
Fixups = FragWithFixups->getFixups();
Contents = FragWithFixups->getContents();
+ } else if (auto *FragWithFixups = dyn_cast<MCDwarfLineAddrFragment>(&Frag)) {
+ Fixups = FragWithFixups->getFixups();
+ Contents = FragWithFixups->getContents();
} else
llvm_unreachable("Unknown fragment with fixups!");
for (const MCFixup &Fixup : Fixups) {
@@ -951,16 +954,43 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
MCContext &Context = Layout.getAssembler().getContext();
uint64_t OldSize = DF.getContents().size();
int64_t AddrDelta;
- bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
- assert(Abs && "We created a line delta with an invalid expression");
- (void) Abs;
+ bool Abs;
+ if (getBackend().requiresDiffExpressionRelocations())
+ Abs = DF.getAddrDelta().evaluateAsAbsolute(AddrDelta, Layout);
+ else {
+ Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
+ assert(Abs && "We created a line delta with an invalid expression");
+ }
int64_t LineDelta;
LineDelta = DF.getLineDelta();
- SmallString<8> &Data = DF.getContents();
+ SmallVectorImpl<char> &Data = DF.getContents();
Data.clear();
raw_svector_ostream OSE(Data);
- MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta,
- AddrDelta, OSE);
+ DF.getFixups().clear();
+
+ if (Abs) {
+ MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta,
+ AddrDelta, OSE);
+ } else {
+ uint32_t Offset;
+ uint32_t Size;
+ bool SetDelta = MCDwarfLineAddr::FixedEncode(Context,
+ getDWARFLinetableParams(),
+ LineDelta, AddrDelta,
+ OSE, &Offset, &Size);
+ // Add Fixups for address delta or new address.
+ const MCExpr *FixupExpr;
+ if (SetDelta) {
+ FixupExpr = &DF.getAddrDelta();
+ } else {
+ const MCBinaryExpr *ABE = cast<MCBinaryExpr>(&DF.getAddrDelta());
+ FixupExpr = ABE->getLHS();
+ }
+ DF.getFixups().push_back(
+ MCFixup::create(Offset, FixupExpr,
+ MCFixup::getKindForSize(Size, false /*isPCRel*/)));
+ }
+
return OldSize != Data.size();
}
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 30e0bb562644..ad0a39991c53 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
// LLVMCreateDisasm() creates a disassembler for the TripleName. Symbolic
// disassembly is supported by passing a block of information in the DisInfo
// parameter and specifying the TagType and callback functions as described in
-// the header llvm-c/Disassembler.h . The pointer to the block and the
+// the header llvm-c/Disassembler.h . The pointer to the block and the
// functions can all be passed as NULL. If successful, this returns a
// disassembler context. If not, it returns NULL.
//
diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h
index 25d17dafb576..f638fdc781d7 100644
--- a/lib/MC/MCDisassembler/Disassembler.h
+++ b/lib/MC/MCDisassembler/Disassembler.h
@@ -4,10 +4,10 @@
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
-// This file defines the interface for the Disassembly library's disassembler
+// This file defines the interface for the Disassembly library's disassembler
// context. The disassembler is responsible for producing strings for
// individual instructions according to a given architecture and disassembly
// syntax.
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 6131fcd658b2..0461c2564ccf 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -492,7 +492,7 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
// Parameters of the state machine, are next.
MCOS->EmitIntValue(context.getAsmInfo()->getMinInstAlignment(), 1);
- // maximum_operations_per_instruction
+ // maximum_operations_per_instruction
// For non-VLIW architectures this field is always 1.
// FIXME: VLIW architectures need to update this field accordingly.
if (LineTableVersion >= 4)
@@ -731,6 +731,57 @@ void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params,
}
}
+bool MCDwarfLineAddr::FixedEncode(MCContext &Context,
+ MCDwarfLineTableParams Params,
+ int64_t LineDelta, uint64_t AddrDelta,
+ raw_ostream &OS,
+ uint32_t *Offset, uint32_t *Size) {
+ if (LineDelta != INT64_MAX) {
+ OS << char(dwarf::DW_LNS_advance_line);
+ encodeSLEB128(LineDelta, OS);
+ }
+
+ // Use address delta to adjust address or use absolute address to adjust
+ // address.
+ bool SetDelta;
+ // According to DWARF spec., the DW_LNS_fixed_advance_pc opcode takes a
+ // single uhalf (unencoded) operand. So, the maximum value of AddrDelta
+ // is 65535. We set a conservative upper bound for it for relaxation.
+ if (AddrDelta > 60000) {
+ const MCAsmInfo *asmInfo = Context.getAsmInfo();
+ unsigned AddrSize = asmInfo->getCodePointerSize();
+
+ OS << char(dwarf::DW_LNS_extended_op);
+ encodeULEB128(1 + AddrSize, OS);
+ OS << char(dwarf::DW_LNE_set_address);
+ // Generate fixup for the address.
+ *Offset = OS.tell();
+ *Size = AddrSize;
+ SetDelta = false;
+ std::vector<uint8_t> FillData;
+ FillData.insert(FillData.begin(), AddrSize, 0);
+ OS.write(reinterpret_cast<char *>(FillData.data()), AddrSize);
+ } else {
+ OS << char(dwarf::DW_LNS_fixed_advance_pc);
+ // Generate fixup for 2-bytes address delta.
+ *Offset = OS.tell();
+ *Size = 2;
+ SetDelta = true;
+ OS << char(0);
+ OS << char(0);
+ }
+
+ if (LineDelta == INT64_MAX) {
+ OS << char(dwarf::DW_LNS_extended_op);
+ OS << char(1);
+ OS << char(dwarf::DW_LNE_end_sequence);
+ } else {
+ OS << char(dwarf::DW_LNS_copy);
+ }
+
+ return SetDelta;
+}
+
// Utility function to write a tuple for .debug_abbrev.
static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) {
MCOS->EmitULEB128IntValue(Name);
diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp
index 8223f3a5c66f..4d7c89116893 100644
--- a/lib/MC/MCInstrAnalysis.cpp
+++ b/lib/MC/MCInstrAnalysis.cpp
@@ -24,6 +24,11 @@ bool MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
return false;
}
+bool MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI,
+ const MCInst &Inst) const {
+ return false;
+}
+
bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
uint64_t Size, uint64_t &Target) const {
if (Inst.getNumOperands() == 0 ||
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 29d34a8c1e3e..b88d2d801822 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -950,8 +950,19 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
}
MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const {
- return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP,
- 0, utostr(Hash));
+ switch (TT.getObjectFormat()) {
+ case Triple::ELF:
+ return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP,
+ 0, utostr(Hash));
+ case Triple::MachO:
+ case Triple::COFF:
+ case Triple::Wasm:
+ case Triple::UnknownObjectFormat:
+ report_fatal_error("Cannot get DWARF types section for this object file "
+ "format: not implemented.");
+ break;
+ }
+ llvm_unreachable("Unknown ObjectFormatType");
}
MCSection *
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 67e3512cc5bd..7bf14968c973 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -254,7 +254,7 @@ bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
if (getLexer().is(AsmToken::Comma) ||
getLexer().is(AsmToken::EndOfStatement))
break;
-
+
unsigned CurSize;
if (getLexer().is(AsmToken::String)) {
CurSize = getTok().getIdentifier().size() + 2;
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 8dd4b61be68f..21a9c3604cfc 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -514,7 +514,7 @@ void MCStreamer::EmitCFIEscape(StringRef Values) {
void MCStreamer::EmitCFIGnuArgsSize(int64_t Size) {
MCSymbol *Label = EmitCFILabel();
- MCCFIInstruction Instruction =
+ MCCFIInstruction Instruction =
MCCFIInstruction::createGnuArgsSize(Label, Size);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index a464af1d42a7..2664528909af 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -952,7 +952,7 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
const DataRegionData *Data = &(*it);
uint64_t Start = getSymbolAddress(*Data->Start, Layout);
uint64_t End;
- if (Data->End)
+ if (Data->End)
End = getSymbolAddress(*Data->End, Layout);
else
report_fatal_error("Data region not terminated");
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index d72da3187e07..85b1913cb23b 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -339,7 +339,7 @@ unsigned COFFObjectFile::getSectionID(SectionRef Sec) const {
bool COFFObjectFile::isSectionVirtual(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
- // In COFF, a virtual section won't have any in-file
+ // In COFF, a virtual section won't have any in-file
// content, so the file pointer to the content will be zero.
return Sec->PointerToRawData == 0;
}
diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index f67a0db690eb..745f79cd77f3 100644
--- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -550,6 +550,10 @@ template <> void SymbolRecordImpl<ThreadLocalDataSym>::map(IO &IO) {
IO.mapRequired("DisplayName", Symbol.Name);
}
+template <> void SymbolRecordImpl<UsingNamespaceSym>::map(IO &IO) {
+ IO.mapRequired("Namespace", Symbol.Name);
+}
+
} // end namespace detail
} // end namespace CodeViewYAML
} // end namespace llvm
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 24005c1890c9..e9e429c8031b 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1752,7 +1752,7 @@ IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
if (compareAbsoluteValue(V) == cmpLessThan)
V = scalbn(V, -1, rmNearestTiesToEven);
V.sign = sign;
-
+
fs = subtract(V, rmNearestTiesToEven);
assert(fs==opOK);
}
diff --git a/lib/Support/ConvertUTF.cpp b/lib/Support/ConvertUTF.cpp
index e56854a3ae42..8f02fae4f558 100644
--- a/lib/Support/ConvertUTF.cpp
+++ b/lib/Support/ConvertUTF.cpp
@@ -8,9 +8,9 @@
*===------------------------------------------------------------------------=*/
/*
* Copyright 2001-2004 Unicode, Inc.
- *
+ *
* Disclaimer
- *
+ *
* This source code is provided as is by Unicode, Inc. No claims are
* made as to fitness for any particular purpose. No warranties of any
* kind are expressed or implied. The recipient agrees to determine
@@ -18,9 +18,9 @@
* purchased on magnetic or optical media from Unicode, Inc., the
* sole remedy for any claim will be exchange of defective media
* within 90 days of receipt.
- *
+ *
* Limitations on Rights to Redistribute This Code
- *
+ *
* Unicode, Inc. hereby grants the right to freely use the information
* supplied in this file in the creation of products supporting the
* Unicode Standard, and to make copies of this file in any form
@@ -117,7 +117,7 @@ static const char trailingBytesForUTF8[256] = {
* This table contains as many values as there might be trailing bytes
* in a UTF-8 sequence.
*/
-static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
/*
@@ -143,7 +143,7 @@ static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF16 (
- const UTF32** sourceStart, const UTF32* sourceEnd,
+ const UTF32** sourceStart, const UTF32* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
@@ -192,7 +192,7 @@ ConversionResult ConvertUTF32toUTF16 (
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF16toUTF32 (
- const UTF16** sourceStart, const UTF16* sourceEnd,
+ const UTF16** sourceStart, const UTF16* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF16* source = *sourceStart;
@@ -246,7 +246,7 @@ if (result == sourceIllegal) {
return result;
}
ConversionResult ConvertUTF16toUTF8 (
- const UTF16** sourceStart, const UTF16* sourceEnd,
+ const UTF16** sourceStart, const UTF16* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF16* source = *sourceStart;
@@ -255,7 +255,7 @@ ConversionResult ConvertUTF16toUTF8 (
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
- const UTF32 byteMark = 0x80;
+ const UTF32 byteMark = 0x80;
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
ch = *source++;
/* If we have a surrogate pair, convert to UTF32 first. */
@@ -316,7 +316,7 @@ ConversionResult ConvertUTF16toUTF8 (
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF8 (
- const UTF32** sourceStart, const UTF32* sourceEnd,
+ const UTF32** sourceStart, const UTF32* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
@@ -325,7 +325,7 @@ ConversionResult ConvertUTF32toUTF8 (
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
- const UTF32 byteMark = 0x80;
+ const UTF32 byteMark = 0x80;
ch = *source++;
if (flags == strictConversion ) {
/* UTF-16 surrogate values are illegal in UTF-32 */
@@ -347,7 +347,7 @@ ConversionResult ConvertUTF32toUTF8 (
ch = UNI_REPLACEMENT_CHAR;
result = sourceIllegal;
}
-
+
target += bytesToWrite;
if (target > targetEnd) {
--source; /* Back up source pointer! */
@@ -540,7 +540,7 @@ Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF8toUTF16 (
- const UTF8** sourceStart, const UTF8* sourceEnd,
+ const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
@@ -613,7 +613,7 @@ ConversionResult ConvertUTF8toUTF16 (
/* --------------------------------------------------------------------- */
static ConversionResult ConvertUTF8toUTF32Impl(
- const UTF8** sourceStart, const UTF8* sourceEnd,
+ const UTF8** sourceStart, const UTF8* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags,
Boolean InputIsPartial) {
ConversionResult result = conversionOK;
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index fd5d097d2b7e..be4b5c3e01c3 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -49,7 +49,7 @@ public:
/// Called when the separate crash-recovery thread was finished, to
/// indicate that we don't need to clear the thread-local CurrentContext.
- void setSwitchedThread() {
+ void setSwitchedThread() {
#if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0
SwitchedThread = true;
#endif
@@ -96,7 +96,7 @@ CrashRecoveryContext::~CrashRecoveryContext() {
delete tmp;
}
tlIsRecoveringFromCrash->set(PC);
-
+
CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
delete CRCI;
}
diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp
index b82aec1423f5..bd9f98b0b82d 100644
--- a/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/lib/Support/DAGDeltaAlgorithm.cpp
@@ -96,7 +96,7 @@ private:
assert(PredClosure.count(Node) && "Invalid node!");
return PredClosure[Node].end();
}
-
+
succ_iterator_ty succ_begin(change_ty Node) {
assert(Successors.count(Node) && "Invalid node!");
return Successors[Node].begin();
@@ -205,7 +205,7 @@ DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(
Worklist.pop_back();
std::set<change_ty> &ChangeSuccs = SuccClosure[Change];
- for (pred_iterator_ty it = pred_begin(Change),
+ for (pred_iterator_ty it = pred_begin(Change),
ie = pred_end(Change); it != ie; ++it) {
SuccClosure[*it].insert(Change);
SuccClosure[*it].insert(ChangeSuccs.begin(), ChangeSuccs.end());
@@ -222,7 +222,7 @@ DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(
for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
ie2 = succ_closure_end(*it); it2 != ie2; ++it2)
PredClosure[*it2].insert(*it);
-
+
// Dump useful debug info.
LLVM_DEBUG({
llvm::errs() << "-- DAGDeltaAlgorithmImpl --\n";
diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp
index 10be9b391b49..2149f21281d3 100644
--- a/lib/Support/Errno.cpp
+++ b/lib/Support/Errno.cpp
@@ -42,7 +42,7 @@ std::string StrError(int errnum) {
const int MaxErrStrLen = 2000;
char buffer[MaxErrStrLen];
buffer[0] = '\0';
-#endif
+#endif
#ifdef HAVE_STRERROR_R
// strerror_r is thread-safe.
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index ec7d57586e8b..cf9847faccd1 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -92,7 +92,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
unsigned Units = Size / 4;
unsigned Pos = 0;
const unsigned *Base = (const unsigned*) String.data();
-
+
// If the string is aligned do a bulk transfer.
if (!((intptr_t)Base & 3)) {
Bits.append(Base, Base + Units);
@@ -121,7 +121,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
}
}
}
-
+
// With the leftover bits.
unsigned V = 0;
// Pos will have overshot size by 4 - #bytes left over.
@@ -141,7 +141,7 @@ void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
Bits.append(ID.Bits.begin(), ID.Bits.end());
}
-/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
/// lookup the node in the FoldingSetBase.
unsigned FoldingSetNodeID::ComputeHash() const {
return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
@@ -192,7 +192,7 @@ static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) {
// The low bit is set if this is the pointer back to the bucket.
if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
return nullptr;
-
+
return static_cast<FoldingSetBase::Node*>(NextInBucketPtr);
}
@@ -272,11 +272,11 @@ void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) {
assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!");
void **OldBuckets = Buckets;
unsigned OldNumBuckets = NumBuckets;
-
+
// Clear out new buckets.
Buckets = AllocateBuckets(NewBucketCount);
// Set NumBuckets only if allocation of new buckets was succesful
- NumBuckets = NewBucketCount;
+ NumBuckets = NewBucketCount;
NumNodes = 0;
// Walk the old buckets, rehashing nodes into their new place.
@@ -296,7 +296,7 @@ void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) {
TempID.clear();
}
}
-
+
free(OldBuckets);
}
@@ -324,9 +324,9 @@ FoldingSetBase::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
unsigned IDHash = ID.ComputeHash();
void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
void *Probe = *Bucket;
-
+
InsertPos = nullptr;
-
+
FoldingSetNodeID TempID;
while (Node *NodeInBucket = GetNextPtr(Probe)) {
if (NodeEquals(NodeInBucket, ID, IDHash, TempID))
@@ -335,14 +335,14 @@ FoldingSetBase::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
Probe = NodeInBucket->getNextInBucket();
}
-
+
// Didn't find the node, return null with the bucket as the InsertPos.
InsertPos = Bucket;
return nullptr;
}
/// InsertNode - Insert the specified node into the folding set, knowing that it
-/// is not already in the map. InsertPos must be obtained from
+/// is not already in the map. InsertPos must be obtained from
/// FindNodeOrInsertPos.
void FoldingSetBase::InsertNode(Node *N, void *InsertPos) {
assert(!N->getNextInBucket());
@@ -354,12 +354,12 @@ void FoldingSetBase::InsertNode(Node *N, void *InsertPos) {
}
++NumNodes;
-
+
/// The insert position is actually a bucket pointer.
void **Bucket = static_cast<void**>(InsertPos);
-
+
void *Next = *Bucket;
-
+
// If this is the first insertion into this bucket, its next pointer will be
// null. Pretend as if it pointed to itself, setting the low bit to indicate
// that it is a pointer to the bucket.
@@ -384,13 +384,13 @@ bool FoldingSetBase::RemoveNode(Node *N) {
// Remember what N originally pointed to, either a bucket or another node.
void *NodeNextPtr = Ptr;
-
+
// Chase around the list until we find the node (or bucket) which points to N.
while (true) {
if (Node *NodeInBucket = GetNextPtr(Ptr)) {
// Advance pointer.
Ptr = NodeInBucket->getNextInBucket();
-
+
// We found a node that points to N, change it to point to N's next node,
// removing N from the list.
if (Ptr == N) {
@@ -400,7 +400,7 @@ bool FoldingSetBase::RemoveNode(Node *N) {
} else {
void **Bucket = GetBucketPtr(Ptr);
Ptr = *Bucket;
-
+
// If we found that the bucket points to N, update the bucket to point to
// whatever is next.
if (Ptr == N) {
@@ -432,7 +432,7 @@ FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
while (*Bucket != reinterpret_cast<void*>(-1) &&
(!*Bucket || !GetNextPtr(*Bucket)))
++Bucket;
-
+
NodePtr = static_cast<FoldingSetNode*>(*Bucket);
}
@@ -443,7 +443,7 @@ void FoldingSetIteratorImpl::advance() {
if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
NodePtr = NextNodeInBucket;
else {
- // Otherwise, this is the last link in this bucket.
+ // Otherwise, this is the last link in this bucket.
void **Bucket = GetBucketPtr(Probe);
// Skip to the next non-null non-self-cycle bucket.
@@ -451,7 +451,7 @@ void FoldingSetIteratorImpl::advance() {
++Bucket;
} while (*Bucket != reinterpret_cast<void*>(-1) &&
(!*Bucket || !GetNextPtr(*Bucket)));
-
+
NodePtr = static_cast<FoldingSetNode*>(*Bucket);
}
}
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index a9f4409f5dde..b0cb06c1daa2 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -65,7 +65,7 @@ void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {
///
/// \param NewCol - The column to move to.
///
-formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
+formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
// Figure out what's in the buffer and add it to the column count.
ComputePosition(getBufferStart(), GetNumBytesInBuffer());
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 1c884dc70fc9..74f71a385027 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -43,7 +43,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
Ptr.store(Tmp, std::memory_order_release);
DeleterFn = Deleter;
-
+
// Add to list of managed statics.
Next = StaticList;
StaticList = this;
@@ -53,7 +53,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
"Partially initialized ManagedStatic!?");
Ptr = Creator();
DeleterFn = Deleter;
-
+
// Add to list of managed statics.
Next = StaticList;
StaticList = this;
@@ -70,7 +70,7 @@ void ManagedStaticBase::destroy() const {
// Destroy memory.
DeleterFn(Ptr);
-
+
// Cleanup.
Ptr = nullptr;
DeleterFn = nullptr;
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 4428c2f24e32..ef9159bac284 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -152,7 +152,7 @@ MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize,
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
-MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize,
+MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize,
uint64_t Offset, bool IsVolatile) {
return getFileAux<MemoryBuffer>(FilePath, -1, MapSize, Offset, false,
IsVolatile);
@@ -533,5 +533,4 @@ MemoryBufferRef MemoryBuffer::getMemBufferRef() const {
return MemoryBufferRef(Data, Identifier);
}
-void MemoryBuffer::anchor() {}
-void SmallVectorMemoryBuffer::anchor() {}
+SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() {}
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index a806da23ec50..098230290ed2 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -1157,9 +1157,13 @@ Error TempFile::keep(const Twine &Name) {
setDeleteDisposition(H, true);
#else
std::error_code RenameEC = fs::rename(TmpName, Name);
- // If we can't rename, discard the temporary file.
- if (RenameEC)
- remove(TmpName);
+ if (RenameEC) {
+ // If we can't rename, try to copy to work around cross-device link issues.
+ RenameEC = sys::fs::copy_file(TmpName, Name);
+ // If we can't rename or copy, discard the temporary file.
+ if (RenameEC)
+ remove(TmpName);
+ }
sys::DontRemoveFileOnSignal(TmpName);
#endif
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index f5b6e6f3652d..206de91ae239 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -1,10 +1,10 @@
//===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines some helpful functions for dealing with the possibility of
@@ -72,10 +72,10 @@ static void PrintStack(raw_ostream &OS) {
static void PrintCurStackTrace(raw_ostream &OS) {
// Don't print an empty trace.
if (!PrettyStackTraceHead) return;
-
+
// If there are pretty stack frames registered, walk and emit them.
OS << "Stack dump:\n";
-
+
PrintStack(OS);
OS.flush();
}
@@ -85,9 +85,9 @@ static void PrintCurStackTrace(raw_ostream &OS) {
// If any clients of llvm try to link to libCrashReporterClient.a themselves,
// only one crash info struct will be used.
extern "C" {
-CRASH_REPORTER_CLIENT_HIDDEN
-struct crashreporter_annotations_t gCRAnnotations
- __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION)))
+CRASH_REPORTER_CLIENT_HIDDEN
+struct crashreporter_annotations_t gCRAnnotations
+ __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION)))
#if CRASHREPORTER_ANNOTATIONS_VERSION < 5
= { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0, 0, 0 };
#else
@@ -114,17 +114,17 @@ static void CrashHandler(void *) {
raw_svector_ostream Stream(TmpStr);
PrintCurStackTrace(Stream);
}
-
+
if (!TmpStr.empty()) {
#ifdef HAVE_CRASHREPORTERCLIENT_H
// Cast to void to avoid warning.
(void)CRSetCrashLogMessage(TmpStr.c_str());
-#elif HAVE_CRASHREPORTER_INFO
+#elif HAVE_CRASHREPORTER_INFO
__crashreporter_info__ = strdup(TmpStr.c_str());
#endif
errs() << TmpStr.str();
}
-
+
#endif
}
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index bc15fd4e4014..d8fde7fa8990 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -175,14 +175,14 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
std::pair<unsigned, unsigned> LineAndCol;
StringRef BufferID = "<unknown>";
std::string LineStr;
-
+
if (Loc.isValid()) {
unsigned CurBuf = FindBufferContainingLoc(Loc);
assert(CurBuf && "Invalid or unspecified location!");
const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf);
BufferID = CurMB->getBufferIdentifier();
-
+
// Scan backward to find the start of the line.
const char *LineStart = Loc.getPointer();
const char *BufStart = CurMB->getBufferStart();
@@ -202,17 +202,17 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
SMRange R = Ranges[i];
if (!R.isValid()) continue;
-
+
// If the line doesn't contain any part of the range, then ignore it.
if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
continue;
-
+
// Ignore pieces of the range that go onto other lines.
if (R.Start.getPointer() < LineStart)
R.Start = SMLoc::getFromPointer(LineStart);
if (R.End.getPointer() > LineEnd)
R.End = SMLoc::getFromPointer(LineEnd);
-
+
// Translate from SMLoc ranges to column ranges.
// FIXME: Handle multibyte characters.
ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart,
@@ -221,7 +221,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
LineAndCol = getLineAndColumn(Loc, CurBuf);
}
-
+
return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
LineAndCol.second-1, Kind, Msg.str(),
LineStr, ColRanges, FixIts);
@@ -440,7 +440,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
// Build the line with the caret and ranges.
std::string CaretLine(NumColumns+1, ' ');
-
+
// Expand any ranges.
for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
std::pair<unsigned, unsigned> R = Ranges[r];
@@ -459,14 +459,14 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
// Finally, plop on the caret.
if (unsigned(ColumnNo) <= NumColumns)
CaretLine[ColumnNo] = '^';
- else
+ else
CaretLine[NumColumns] = '^';
-
+
// ... and remove trailing whitespace so the output doesn't wrap for it. We
// know that the line isn't completely empty because it has the caret in it at
// least.
CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
-
+
printSourceLine(S, LineContents);
if (ShowColors)
@@ -479,7 +479,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
++OutCol;
continue;
}
-
+
// Okay, we have a tab. Insert the appropriate number of characters.
do {
S << CaretLine[i];
@@ -494,7 +494,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
// Print out the replacement line, matching tabs in the source line.
if (FixItInsertionLine.empty())
return;
-
+
for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
if (i >= LineContents.size() || LineContents[i] != '\t') {
S << FixItInsertionLine[i];
diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp
index 76faabc92bb5..c591857c415d 100644
--- a/lib/Support/StringPool.cpp
+++ b/lib/Support/StringPool.cpp
@@ -26,10 +26,10 @@ PooledStringPtr StringPool::intern(StringRef Key) {
table_t::iterator I = InternTable.find(Key);
if (I != InternTable.end())
return PooledStringPtr(&*I);
-
+
entry_t *S = entry_t::Create(Key);
S->getValue().Pool = this;
InternTable.insert(S);
-
+
return PooledStringPtr(S);
}
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index 9ba7a09f9962..f0349260e22f 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -389,7 +389,7 @@ static unsigned GetAutoSenseRadix(StringRef &Str) {
Str = Str.substr(2);
return 16;
}
-
+
if (Str.startswith("0b") || Str.startswith("0B")) {
Str = Str.substr(2);
return 2;
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index ed999fce5dad..c5eba5714766 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -98,7 +98,7 @@ void TargetRegistry::RegisterTarget(Target &T, const char *Name,
// convenience to some clients.
if (T.Name)
return;
-
+
// Add to the list of targets.
T.Next = FirstTarget;
FirstTarget = &T;
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index f425d607af47..b64b013d7407 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -450,7 +450,7 @@ static std::error_code rename_handle(HANDLE FromHandle, const Twine &To) {
if (std::error_code EC2 = realPathFromHandle(FromHandle, WideFrom))
return EC2;
if (::MoveFileExW(WideFrom.begin(), WideTo.begin(),
- MOVEFILE_REPLACE_EXISTING))
+ MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED))
return std::error_code();
return mapWindowsError(GetLastError());
}
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 354b7d0740de..9ef1410b99a5 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -1113,7 +1113,7 @@ bool Scanner::scanDirective() {
Current = skip_while(&Scanner::skip_ns_char, Current);
StringRef Name(NameStart, Current - NameStart);
Current = skip_while(&Scanner::skip_s_white, Current);
-
+
Token T;
if (Name == "YAML") {
Current = skip_while(&Scanner::skip_ns_char, Current);
diff --git a/lib/Support/regex_impl.h b/lib/Support/regex_impl.h
index f8296c9ff75e..8ddac7dcf998 100644
--- a/lib/Support/regex_impl.h
+++ b/lib/Support/regex_impl.h
@@ -96,7 +96,7 @@ extern "C" {
int llvm_regcomp(llvm_regex_t *, const char *, int);
size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t);
-int llvm_regexec(const llvm_regex_t *, const char *, size_t,
+int llvm_regexec(const llvm_regex_t *, const char *, size_t,
llvm_regmatch_t [], int);
void llvm_regfree(llvm_regex_t *);
size_t llvm_strlcpy(char *dst, const char *src, size_t siz);
diff --git a/lib/Support/xxhash.cpp b/lib/Support/xxhash.cpp
index df643f9bd639..e9dceed2c4ae 100644
--- a/lib/Support/xxhash.cpp
+++ b/lib/Support/xxhash.cpp
@@ -132,3 +132,7 @@ uint64_t llvm::xxHash64(StringRef Data) {
return H64;
}
+
+uint64_t llvm::xxHash64(ArrayRef<uint8_t> Data) {
+ return xxHash64({(const char *)Data.data(), Data.size()});
+}
diff --git a/lib/TableGen/StringMatcher.cpp b/lib/TableGen/StringMatcher.cpp
index 32599104f6a2..2c4d1f33997d 100644
--- a/lib/TableGen/StringMatcher.cpp
+++ b/lib/TableGen/StringMatcher.cpp
@@ -25,19 +25,19 @@ using namespace llvm;
/// FindFirstNonCommonLetter - Find the first character in the keys of the
/// string pairs that is not shared across the whole set of strings. All
/// strings are assumed to have the same length.
-static unsigned
+static unsigned
FindFirstNonCommonLetter(const std::vector<const
StringMatcher::StringPair*> &Matches) {
assert(!Matches.empty());
for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) {
// Check to see if letter i is the same across the set.
char Letter = Matches[0]->first[i];
-
+
for (unsigned str = 0, e = Matches.size(); str != e; ++str)
if (Matches[str]->first[i] != Letter)
return i;
}
-
+
return Matches[0]->first.size();
}
@@ -51,7 +51,7 @@ bool StringMatcher::EmitStringMatcherForChar(
unsigned IndentCount, bool IgnoreDuplicates) const {
assert(!Matches.empty() && "Must have at least one string to match!");
std::string Indent(IndentCount * 2 + 4, ' ');
-
+
// If we have verified that the entire string matches, we're done: output the
// matching code.
if (CharNo == Matches[0]->first.size()) {
@@ -60,7 +60,7 @@ bool StringMatcher::EmitStringMatcherForChar(
// If the to-execute code has \n's in it, indent each subsequent line.
StringRef Code = Matches[0]->second;
-
+
std::pair<StringRef, StringRef> Split = Code.split('\n');
OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n";
@@ -72,20 +72,20 @@ bool StringMatcher::EmitStringMatcherForChar(
}
return false;
}
-
+
// Bucket the matches by the character we are comparing.
std::map<char, std::vector<const StringPair*>> MatchesByLetter;
-
+
for (unsigned i = 0, e = Matches.size(); i != e; ++i)
MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
-
-
+
+
// If we have exactly one bucket to match, see how many characters are common
// across the whole set and match all of them at once.
if (MatchesByLetter.size() == 1) {
unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches);
unsigned NumChars = FirstNonCommonLetter-CharNo;
-
+
// Emit code to break out if the prefix doesn't match.
if (NumChars == 1) {
// Do the comparison with if (Str[1] != 'f')
@@ -105,13 +105,13 @@ bool StringMatcher::EmitStringMatcherForChar(
return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount,
IgnoreDuplicates);
}
-
+
// Otherwise, we have multiple possible things, emit a switch on the
// character.
OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
OS << Indent << "default: break;\n";
-
- for (std::map<char, std::vector<const StringPair*>>::iterator LI =
+
+ for (std::map<char, std::vector<const StringPair*>>::iterator LI =
MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
// TODO: escape hard stuff (like \n) if we ever care about it.
OS << Indent << "case '" << LI->first << "':\t // "
@@ -122,7 +122,7 @@ bool StringMatcher::EmitStringMatcherForChar(
IgnoreDuplicates))
OS << Indent << " break;\n";
}
-
+
OS << Indent << "}\n";
return true;
}
@@ -132,18 +132,18 @@ bool StringMatcher::EmitStringMatcherForChar(
void StringMatcher::Emit(unsigned Indent, bool IgnoreDuplicates) const {
// If nothing to match, just fall through.
if (Matches.empty()) return;
-
+
// First level categorization: group strings by length.
std::map<unsigned, std::vector<const StringPair*>> MatchesByLength;
-
+
for (unsigned i = 0, e = Matches.size(); i != e; ++i)
MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
-
+
// Output a switch statement on length and categorize the elements within each
// bin.
OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n";
OS.indent(Indent*2+2) << "default: break;\n";
-
+
for (std::map<unsigned, std::vector<const StringPair*>>::iterator LI =
MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
OS.indent(Indent*2+2) << "case " << LI->first << ":\t // "
@@ -152,6 +152,6 @@ void StringMatcher::Emit(unsigned Indent, bool IgnoreDuplicates) const {
if (EmitStringMatcherForChar(LI->second, 0, Indent, IgnoreDuplicates))
OS.indent(Indent*2+4) << "break;\n";
}
-
+
OS.indent(Indent*2+2) << "}\n";
}
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 43a3ae77a170..572d1c22feea 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3774,7 +3774,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
CallingConv::ID CC = F.getCallingConv();
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0c72f2ebee18..de762a7bb1d4 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8580,7 +8580,7 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
SDValue
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const {
+ SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
@@ -8603,11 +8603,9 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
- if (Created) {
- Created->push_back(Cmp.getNode());
- Created->push_back(Add.getNode());
- Created->push_back(CSel.getNode());
- }
+ Created.push_back(Cmp.getNode());
+ Created.push_back(Add.getNode());
+ Created.push_back(CSel.getNode());
// Divide by pow2.
SDValue SRA =
@@ -8618,8 +8616,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (Divisor.isNonNegative())
return SRA;
- if (Created)
- Created->push_back(SRA.getNode());
+ Created.push_back(SRA.getNode());
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 592845640a44..d783c8a6048c 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -644,7 +644,7 @@ private:
SelectionDAG &DAG) const;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const override;
+ SmallVectorImpl<SDNode *> &Created) const override;
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &ExtraSteps, bool &UseOneConst,
bool Reciprocal) const override;
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 1060c64f7b5d..15d61cd1ad26 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -57,6 +57,14 @@ class EncodedI<string cstr, list<dag> pattern> : AArch64Inst<NormalFrm, cstr> {
let Size = 4;
}
+// Enum describing whether an instruction is
+// destructive in its first source operand.
+class DestructiveInstTypeEnum<bits<1> val> {
+ bits<1> Value = val;
+}
+def NotDestructive : DestructiveInstTypeEnum<0>;
+def Destructive : DestructiveInstTypeEnum<1>;
+
// Normal instructions
class I<dag oops, dag iops, string asm, string operands, string cstr,
list<dag> pattern>
@@ -64,6 +72,13 @@ class I<dag oops, dag iops, string asm, string operands, string cstr,
dag OutOperandList = oops;
dag InOperandList = iops;
let AsmString = !strconcat(asm, operands);
+
+ // Destructive operations (SVE)
+ DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
+ ElementSizeEnum ElementSize = ElementSizeB;
+
+ let TSFlags{3} = DestructiveInstType.Value;
+ let TSFlags{2-0} = ElementSize.Value;
}
class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 230480cf1cea..032d53d19620 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4851,75 +4851,92 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
- /// Constants defining how certain sequences should be outlined.
- /// This encompasses how an outlined function should be called, and what kind of
- /// frame should be emitted for that outlined function.
- ///
- /// \p MachineOutlinerDefault implies that the function should be called with
- /// a save and restore of LR to the stack.
- ///
- /// That is,
- ///
- /// I1 Save LR OUTLINED_FUNCTION:
- /// I2 --> BL OUTLINED_FUNCTION I1
- /// I3 Restore LR I2
- /// I3
- /// RET
- ///
- /// * Call construction overhead: 3 (save + BL + restore)
- /// * Frame construction overhead: 1 (ret)
- /// * Requires stack fixups? Yes
- ///
- /// \p MachineOutlinerTailCall implies that the function is being created from
- /// a sequence of instructions ending in a return.
- ///
- /// That is,
- ///
- /// I1 OUTLINED_FUNCTION:
- /// I2 --> B OUTLINED_FUNCTION I1
- /// RET I2
- /// RET
- ///
- /// * Call construction overhead: 1 (B)
- /// * Frame construction overhead: 0 (Return included in sequence)
- /// * Requires stack fixups? No
- ///
- /// \p MachineOutlinerNoLRSave implies that the function should be called using
- /// a BL instruction, but doesn't require LR to be saved and restored. This
- /// happens when LR is known to be dead.
- ///
- /// That is,
- ///
- /// I1 OUTLINED_FUNCTION:
- /// I2 --> BL OUTLINED_FUNCTION I1
- /// I3 I2
- /// I3
- /// RET
- ///
- /// * Call construction overhead: 1 (BL)
- /// * Frame construction overhead: 1 (RET)
- /// * Requires stack fixups? No
- ///
- /// \p MachineOutlinerThunk implies that the function is being created from
- /// a sequence of instructions ending in a call. The outlined function is
- /// called with a BL instruction, and the outlined function tail-calls the
- /// original call destination.
- ///
- /// That is,
- ///
- /// I1 OUTLINED_FUNCTION:
- /// I2 --> BL OUTLINED_FUNCTION I1
- /// BL f I2
- /// B f
- /// * Call construction overhead: 1 (BL)
- /// * Frame construction overhead: 0
- /// * Requires stack fixups? No
- ///
+/// Constants defining how certain sequences should be outlined.
+/// This encompasses how an outlined function should be called, and what kind of
+/// frame should be emitted for that outlined function.
+///
+/// \p MachineOutlinerDefault implies that the function should be called with
+/// a save and restore of LR to the stack.
+///
+/// That is,
+///
+/// I1 Save LR OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 Restore LR I2
+/// I3
+/// RET
+///
+/// * Call construction overhead: 3 (save + BL + restore)
+/// * Frame construction overhead: 1 (ret)
+/// * Requires stack fixups? Yes
+///
+/// \p MachineOutlinerTailCall implies that the function is being created from
+/// a sequence of instructions ending in a return.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> B OUTLINED_FUNCTION I1
+/// RET I2
+/// RET
+///
+/// * Call construction overhead: 1 (B)
+/// * Frame construction overhead: 0 (Return included in sequence)
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerNoLRSave implies that the function should be called using
+/// a BL instruction, but doesn't require LR to be saved and restored. This
+/// happens when LR is known to be dead.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 I2
+/// I3
+/// RET
+///
+/// * Call construction overhead: 1 (BL)
+/// * Frame construction overhead: 1 (RET)
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerThunk implies that the function is being created from
+/// a sequence of instructions ending in a call. The outlined function is
+/// called with a BL instruction, and the outlined function tail-calls the
+/// original call destination.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// BL f I2
+/// B f
+/// * Call construction overhead: 1 (BL)
+/// * Frame construction overhead: 0
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerRegSave implies that the function should be called with a
+/// save and restore of LR to an available register. This allows us to avoid
+/// stack fixups. Note that this outlining variant is compatible with the
+/// NoLRSave case.
+///
+/// That is,
+///
+/// I1 Save LR OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 Restore LR I2
+/// I3
+/// RET
+///
+/// * Call construction overhead: 3 (save + BL + restore)
+/// * Frame construction overhead: 1 (ret)
+/// * Requires stack fixups? No
enum MachineOutlinerClass {
MachineOutlinerDefault, /// Emit a save, restore, call, and return.
MachineOutlinerTailCall, /// Only emit a branch.
MachineOutlinerNoLRSave, /// Emit a call and return.
MachineOutlinerThunk, /// Emit a call and tail-call.
+ MachineOutlinerRegSave /// Same as default, but save to a register.
};
enum MachineOutlinerMBBFlags {
@@ -4927,6 +4944,27 @@ enum MachineOutlinerMBBFlags {
HasCalls = 0x4
};
+unsigned
+AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
+ MachineFunction *MF = C.getMF();
+ const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
+
+ // Check if there is an available register across the sequence that we can
+ // use.
+ for (unsigned Reg : AArch64::GPR64RegClass) {
+ if (!ARI->isReservedReg(*MF, Reg) &&
+ Reg != AArch64::LR && // LR is not reserved, but don't use it.
+ Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
+ Reg != AArch64::X17 && // Ditto for X17.
+ C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
+ return Reg;
+ }
+
+ // No suitable register. Return 0.
+ return 0u;
+}
+
outliner::OutlinedFunction
AArch64InstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
@@ -5015,11 +5053,27 @@ AArch64InstrInfo::getOutliningCandidateInfo(
SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
}
- // LR is live, so we need to save it to the stack.
+ // LR is live, so we need to save it. Decide whether it should be saved to
+ // the stack, or if it can be saved to a register.
else {
- FrameID = MachineOutlinerDefault;
- NumBytesToCreateFrame = 4;
- SetCandidateCallInfo(MachineOutlinerDefault, 12);
+ if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
+ [this](outliner::Candidate &C) {
+ return findRegisterToSaveLRTo(C);
+ })) {
+ // Every candidate has an available callee-saved register for the save.
+ // We can save LR to a register.
+ FrameID = MachineOutlinerRegSave;
+ NumBytesToCreateFrame = 4;
+ SetCandidateCallInfo(MachineOutlinerRegSave, 12);
+ }
+
+ else {
+ // At least one candidate does not have an available callee-saved
+ // register. We must save LR to the stack.
+ FrameID = MachineOutlinerDefault;
+ NumBytesToCreateFrame = 4;
+ SetCandidateCallInfo(MachineOutlinerDefault, 12);
+ }
}
// Check if the range contains a call. These require a save + restore of the
@@ -5088,7 +5142,7 @@ AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
MBB.rend(),
[&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
- if (!LRU.available(AArch64::LR))
+ if (!LRU.available(AArch64::LR))
Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
return Flags;
@@ -5114,14 +5168,14 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
// ahead and skip over them.
if (MI.isKill())
return outliner::InstrType::Invisible;
-
+
// Is this a terminator for a basic block?
if (MI.isTerminator()) {
// Is this the end of a function?
if (MI.getParent()->succ_empty())
return outliner::InstrType::Legal;
-
+
// It's not, so don't outline it.
return outliner::InstrType::Illegal;
}
@@ -5424,7 +5478,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
MBB.insert(MBB.end(), ret);
// Did we have to modify the stack by saving the link register?
- if (OF.FrameConstructionID == MachineOutlinerNoLRSave)
+ if (OF.FrameConstructionID != MachineOutlinerDefault)
return;
// We modified the stack.
@@ -5457,13 +5511,41 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
// We want to return the spot where we inserted the call.
MachineBasicBlock::iterator CallPt;
- // We have a default call. Save the link register.
- MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
- .addReg(AArch64::SP, RegState::Define)
- .addReg(AArch64::LR)
- .addReg(AArch64::SP)
- .addImm(-16);
- It = MBB.insert(It, STRXpre);
+ // Instructions for saving and restoring LR around the call instruction we're
+ // going to insert.
+ MachineInstr *Save;
+ MachineInstr *Restore;
+ // Can we save to a register?
+ if (C.CallConstructionID == MachineOutlinerRegSave) {
+ // FIXME: This logic should be sunk into a target-specific interface so that
+ // we don't have to recompute the register.
+ unsigned Reg = findRegisterToSaveLRTo(C);
+ assert(Reg != 0 && "No callee-saved register available?");
+
+ // Save and restore LR from that register.
+ Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::LR)
+ .addImm(0);
+ Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
+ .addReg(AArch64::XZR)
+ .addReg(Reg)
+ .addImm(0);
+ } else {
+ // We have the default case. Save and restore from SP.
+ Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(-16);
+ Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR, RegState::Define)
+ .addReg(AArch64::SP)
+ .addImm(16);
+ }
+
+ It = MBB.insert(It, Save);
It++;
// Insert the call.
@@ -5472,13 +5554,11 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
CallPt = It;
It++;
- // Restore the link register.
- MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
- .addReg(AArch64::SP, RegState::Define)
- .addReg(AArch64::LR, RegState::Define)
- .addReg(AArch64::SP)
- .addImm(16);
- It = MBB.insert(It, LDRXpost);
-
+ It = MBB.insert(It, Restore);
return CallPt;
}
+
+bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
+ MachineFunction &MF) const {
+ return MF.getFunction().optForMinSize();
+}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 0e5953f6216d..11882e238b70 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -249,6 +249,7 @@ public:
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
+ bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
/// Returns true if the instruction sets to an immediate value that can be
/// executed more efficiently.
bool isExynosResetFast(const MachineInstr &MI) const;
@@ -271,6 +272,10 @@ private:
ArrayRef<MachineOperand> Cond) const;
bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg,
const MachineRegisterInfo *MRI) const;
+
+ /// Returns an unused general-purpose register which can be used for
+ /// constructing an outlined call if one exists. Returns 0 otherwise.
+ unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
};
/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
@@ -339,6 +344,32 @@ static inline bool isIndirectBranchOpcode(int Opc) {
return Opc == AArch64::BR;
}
+// struct TSFlags {
+#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
+#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 1-bit
+// }
+
+namespace AArch64 {
+
+enum ElementSizeType {
+ ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7),
+ ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0),
+ ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1),
+ ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2),
+ ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3),
+ ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4),
+};
+
+enum DestructiveInstType {
+ DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
+ NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
+ Destructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
+};
+
+#undef TSFLAG_ELEMENT_SIZE_TYPE
+#undef TSFLAG_DESTRUCTIVE_INST_TYPE
+}
+
} // end namespace llvm
#endif
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 4d7ca2349ed1..b2b500320b5c 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -21,6 +21,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -94,6 +95,10 @@ private:
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
+ // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
+ void materializeLargeCMVal(MachineInstr &I, const Value *V,
+ unsigned char OpFlags) const;
+
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
const AArch64InstrInfo &TII;
@@ -655,6 +660,45 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
return true;
}
+void AArch64InstructionSelector::materializeLargeCMVal(
+ MachineInstr &I, const Value *V, unsigned char OpFlags) const {
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineIRBuilder MIB(I);
+
+ auto MovZ = MIB.buildInstr(AArch64::MOVZXi, &AArch64::GPR64RegClass);
+ MovZ->addOperand(MF, I.getOperand(1));
+ MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
+ AArch64II::MO_NC);
+ MovZ->addOperand(MF, MachineOperand::CreateImm(0));
+ constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
+
+ auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
+ unsigned ForceDstReg) {
+ unsigned DstReg = ForceDstReg
+ ? ForceDstReg
+ : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
+ if (auto *GV = dyn_cast<GlobalValue>(V)) {
+ MovI->addOperand(MF, MachineOperand::CreateGA(
+ GV, MovZ->getOperand(1).getOffset(), Flags));
+ } else {
+ MovI->addOperand(
+ MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
+ MovZ->getOperand(1).getOffset(), Flags));
+ }
+ MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
+ constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
+ return DstReg;
+ };
+ unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
+ AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
+ DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
+ BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
+ return;
+}
+
bool AArch64InstructionSelector::select(MachineInstr &I,
CodeGenCoverage &CoverageInfo) const {
assert(I.getParent() && "Instruction should be in a basic block!");
@@ -936,36 +980,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
I.getOperand(1).setTargetFlags(OpFlags);
} else if (TM.getCodeModel() == CodeModel::Large) {
// Materialize the global using movz/movk instructions.
- unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- auto InsertPt = std::next(I.getIterator());
- auto MovZ =
- BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi))
- .addDef(MovZDstReg);
- MovZ->addOperand(MF, I.getOperand(1));
- MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
- AArch64II::MO_NC);
- MovZ->addOperand(MF, MachineOperand::CreateImm(0));
- constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
-
- auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags,
- unsigned Offset, unsigned ForceDstReg) {
- unsigned DstReg =
- ForceDstReg ? ForceDstReg
- : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(),
- TII.get(AArch64::MOVKXi))
- .addDef(DstReg)
- .addReg(SrcReg);
- MovI->addOperand(MF, MachineOperand::CreateGA(
- GV, MovZ->getOperand(1).getOffset(), Flags));
- MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
- constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
- return DstReg;
- };
- unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
- AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
- DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
- BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
+ materializeLargeCMVal(I, GV, OpFlags);
I.eraseFromParent();
return true;
} else {
@@ -1482,7 +1497,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
.addImm(1);
I.eraseFromParent();
return true;
- case TargetOpcode::G_IMPLICIT_DEF:
+ case TargetOpcode::G_IMPLICIT_DEF: {
I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const unsigned DstReg = I.getOperand(0).getReg();
@@ -1492,6 +1507,25 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
return true;
}
+ case TargetOpcode::G_BLOCK_ADDR: {
+ if (TM.getCodeModel() == CodeModel::Large) {
+ materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
+ I.eraseFromParent();
+ return true;
+ } else {
+ I.setDesc(TII.get(AArch64::MOVaddrBA));
+ auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
+ I.getOperand(0).getReg())
+ .addBlockAddress(I.getOperand(1).getBlockAddress(),
+ /* Offset */ 0, AArch64II::MO_PAGE)
+ .addBlockAddress(
+ I.getOperand(1).getBlockAddress(), /* Offset */ 0,
+ AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
+ }
+ }
+ }
return false;
}
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 9b8c0a34efba..327c758a7f8e 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -293,6 +293,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
}
+ getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
+
// Merge/Unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 798340f8fed8..e42214d15699 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -146,7 +146,7 @@ public:
Optional<bool> hasRedZone() const { return HasRedZone; }
void setHasRedZone(bool s) { HasRedZone = s; }
-
+
int getVarArgsStackIndex() const { return VarArgsStackIndex; }
void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; }
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index 7a653e117fd1..bbf401b474ca 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -764,18 +764,35 @@ def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>;
def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>;
}
+// Enum descibing the element size for destructive
+// operations.
+class ElementSizeEnum<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def ElementSizeNone : ElementSizeEnum<0>;
+def ElementSizeB : ElementSizeEnum<1>;
+def ElementSizeH : ElementSizeEnum<2>;
+def ElementSizeS : ElementSizeEnum<3>;
+def ElementSizeD : ElementSizeEnum<4>;
+def ElementSizeQ : ElementSizeEnum<5>; // Unused
+
class SVERegOp <string Suffix, AsmOperandClass C,
+ ElementSizeEnum Size,
RegisterClass RC> : RegisterOperand<RC> {
+ ElementSizeEnum ElementSize;
+
+ let ElementSize = Size;
let PrintMethod = !if(!eq(Suffix, ""),
"printSVERegOp<>",
"printSVERegOp<'" # Suffix # "'>");
let ParserMatchClass = C;
}
-class PPRRegOp <string Suffix, AsmOperandClass C,
- RegisterClass RC> : SVERegOp<Suffix, C, RC> {}
-class ZPRRegOp <string Suffix, AsmOperandClass C,
- RegisterClass RC> : SVERegOp<Suffix, C, RC> {}
+class PPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
+ RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
+class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
+ RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
//******************************************************************************
@@ -805,11 +822,11 @@ def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>;
def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>;
def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>;
-def PPRAny : PPRRegOp<"", PPRAsmOpAny, PPR>;
-def PPR8 : PPRRegOp<"b", PPRAsmOp8, PPR>;
-def PPR16 : PPRRegOp<"h", PPRAsmOp16, PPR>;
-def PPR32 : PPRRegOp<"s", PPRAsmOp32, PPR>;
-def PPR64 : PPRRegOp<"d", PPRAsmOp64, PPR>;
+def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>;
+def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>;
+def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>;
+def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>;
+def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>;
def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>;
def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>;
@@ -817,11 +834,11 @@ def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>;
def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>;
def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>;
-def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, PPR_3b>;
-def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, PPR_3b>;
-def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, PPR_3b>;
-def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, PPR_3b>;
-def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, PPR_3b>;
+def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>;
+def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, ElementSizeB, PPR_3b>;
+def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, ElementSizeH, PPR_3b>;
+def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, ElementSizeS, PPR_3b>;
+def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>;
//******************************************************************************
@@ -874,28 +891,28 @@ def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>;
def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>;
def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>;
-def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ZPR>;
-def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ZPR>;
-def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ZPR>;
-def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ZPR>;
-def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ZPR>;
-def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ZPR>;
+def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ElementSizeNone, ZPR>;
+def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ElementSizeB, ZPR>;
+def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ElementSizeH, ZPR>;
+def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ElementSizeS, ZPR>;
+def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ElementSizeD, ZPR>;
+def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ElementSizeQ, ZPR>;
def ZPRAsmOp3b8 : ZPRAsmOperand<"Vector3bB", 8, "_3b">;
def ZPRAsmOp3b16 : ZPRAsmOperand<"Vector3bH", 16, "_3b">;
def ZPRAsmOp3b32 : ZPRAsmOperand<"Vector3bS", 32, "_3b">;
-def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ZPR_3b>;
-def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ZPR_3b>;
-def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ZPR_3b>;
+def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ElementSizeB, ZPR_3b>;
+def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ElementSizeH, ZPR_3b>;
+def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ElementSizeS, ZPR_3b>;
def ZPRAsmOp4b16 : ZPRAsmOperand<"Vector4bH", 16, "_4b">;
def ZPRAsmOp4b32 : ZPRAsmOperand<"Vector4bS", 32, "_4b">;
def ZPRAsmOp4b64 : ZPRAsmOperand<"Vector4bD", 64, "_4b">;
-def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ZPR_4b>;
-def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ZPR_4b>;
-def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ZPR_4b>;
+def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ElementSizeH, ZPR_4b>;
+def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ElementSizeS, ZPR_4b>;
+def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ElementSizeD, ZPR_4b>;
class FPRasZPR<int Width> : AsmOperandClass{
let Name = "FPR" # Width # "asZPR";
diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 16e6ddda6398..0fde68011e86 100644
--- a/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -220,10 +220,33 @@ let Predicates = [HasSVE] in {
def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">;
def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">;
+ defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
+ defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
+ def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>;
def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>;
def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>;
+ def BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa">;
+ def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">;
+ def BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb">;
+ def BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs">;
+
+ def BRKN_PPzP : sve_int_brkn<0b0, "brkn">;
+ def BRKNS_PPzP : sve_int_brkn<0b1, "brkns">;
+
+ defm BRKA_PPzP : sve_int_break_z<0b000, "brka">;
+ defm BRKA_PPmP : sve_int_break_m<0b001, "brka">;
+ defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas">;
+ defm BRKB_PPzP : sve_int_break_z<0b100, "brkb">;
+ defm BRKB_PPmP : sve_int_break_m<0b101, "brkb">;
+ defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs">;
+
+ def PTEST_PP : sve_int_ptest<0b010000, "ptest">;
+ def PFALSE : sve_int_pfalse<0b000000, "pfalse">;
+ defm PFIRST : sve_int_pfirst<0b00000, "pfirst">;
+ defm PNEXT : sve_int_pnext<0b00110, "pnext">;
+
def AND_PPzPP : sve_int_pred_log<0b0000, "and">;
def BIC_PPzPP : sve_int_pred_log<0b0001, "bic">;
def EOR_PPzPP : sve_int_pred_log<0b0010, "eor">;
@@ -731,6 +754,21 @@ let Predicates = [HasSVE] in {
defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">;
defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">;
+ defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt">;
+ defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele">;
+ defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo">;
+ defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels">;
+
+ defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt">;
+ defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele">;
+ defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo">;
+ defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels">;
+
+ def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>;
+ def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>;
+ def CTERMEQ_XX : sve_int_cterm<0b1, 0b0, "ctermeq", GPR64>;
+ def CTERMNE_XX : sve_int_cterm<0b1, 0b1, "ctermne", GPR64>;
+
def RDVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdvl">;
def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">;
def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">;
@@ -854,40 +892,40 @@ let Predicates = [HasSVE] in {
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">;
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">;
- def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16>;
- def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32>;
- def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16>;
- def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32>;
- def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32>;
- def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16>;
- def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16>;
- def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32>;
- def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16>;
- def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32>;
- def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16>;
- def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64>;
- def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32>;
- def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64>;
- def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64>;
- def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64>;
- def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16>;
- def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32>;
- def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16>;
- def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16>;
- def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32>;
- def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16>;
- def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64>;
- def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64>;
- def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32>;
- def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32>;
- def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64>;
- def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32>;
- def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64>;
- def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32>;
- def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64>;
- def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64>;
- def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64>;
- def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64>;
+ def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>;
+ def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>;
+ def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>;
+ def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>;
+ def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>;
+ def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>;
+ def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>;
+ def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>;
+ def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>;
+ def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>;
+ def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>;
+ def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>;
+ def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>;
+ def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>;
+ def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>;
+ def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>;
+ def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>;
+ def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>;
+ def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>;
+ def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>;
+ def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>;
+ def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>;
+ def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>;
+ def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>;
+ def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>;
+ def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>;
+ def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>;
+ def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>;
+ def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>;
+ def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>;
+ def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>;
+ def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>;
+ def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>;
+ def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>;
defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">;
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">;
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 01a997e5aed7..120d71381c67 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -255,6 +255,9 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
// AArch64 supports the MachineOutliner.
setMachineOutliner(true);
+
+ // AArch64 supports default outlining behaviour.
+ setSupportsDefaultOutlining(true);
}
AArch64TargetMachine::~AArch64TargetMachine() = default;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index d75fef7b0171..96e751e86971 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -577,7 +577,7 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
unsigned NumVectorInstToHideOverhead = 10;
int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && SE &&
+ if (Ty->isVectorTy() && SE &&
!BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index a51c41d70915..30a9a08f2346 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
+#include "AArch64InstrInfo.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -79,6 +80,67 @@ private:
// Map of register aliases registers via the .req directive.
StringMap<std::pair<RegKind, unsigned>> RegisterReqs;
+ class PrefixInfo {
+ public:
+ static PrefixInfo CreateFromInst(const MCInst &Inst, uint64_t TSFlags) {
+ PrefixInfo Prefix;
+ switch (Inst.getOpcode()) {
+ case AArch64::MOVPRFX_ZZ:
+ Prefix.Active = true;
+ Prefix.Dst = Inst.getOperand(0).getReg();
+ break;
+ case AArch64::MOVPRFX_ZPmZ_B:
+ case AArch64::MOVPRFX_ZPmZ_H:
+ case AArch64::MOVPRFX_ZPmZ_S:
+ case AArch64::MOVPRFX_ZPmZ_D:
+ Prefix.Active = true;
+ Prefix.Predicated = true;
+ Prefix.ElementSize = TSFlags & AArch64::ElementSizeMask;
+ assert(Prefix.ElementSize != AArch64::ElementSizeNone &&
+ "No destructive element size set for movprfx");
+ Prefix.Dst = Inst.getOperand(0).getReg();
+ Prefix.Pg = Inst.getOperand(2).getReg();
+ break;
+ case AArch64::MOVPRFX_ZPzZ_B:
+ case AArch64::MOVPRFX_ZPzZ_H:
+ case AArch64::MOVPRFX_ZPzZ_S:
+ case AArch64::MOVPRFX_ZPzZ_D:
+ Prefix.Active = true;
+ Prefix.Predicated = true;
+ Prefix.ElementSize = TSFlags & AArch64::ElementSizeMask;
+ assert(Prefix.ElementSize != AArch64::ElementSizeNone &&
+ "No destructive element size set for movprfx");
+ Prefix.Dst = Inst.getOperand(0).getReg();
+ Prefix.Pg = Inst.getOperand(1).getReg();
+ break;
+ default:
+ break;
+ }
+
+ return Prefix;
+ }
+
+ PrefixInfo() : Active(false), Predicated(false) {}
+ bool isActive() const { return Active; }
+ bool isPredicated() const { return Predicated; }
+ unsigned getElementSize() const {
+ assert(Predicated);
+ return ElementSize;
+ }
+ unsigned getDstReg() const { return Dst; }
+ unsigned getPgReg() const {
+ assert(Predicated);
+ return Pg;
+ }
+
+ private:
+ bool Active;
+ bool Predicated;
+ unsigned ElementSize;
+ unsigned Dst;
+ unsigned Pg;
+ } NextPrefix;
+
AArch64TargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AArch64TargetStreamer &>(TS);
@@ -113,7 +175,8 @@ private:
bool parseDirectiveReq(StringRef Name, SMLoc L);
bool parseDirectiveUnreq(SMLoc L);
- bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
+ bool validateInstruction(MCInst &Inst, SMLoc &IDLoc,
+ SmallVectorImpl<SMLoc> &Loc);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -3665,12 +3728,89 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
return false;
}
+static inline bool isMatchingOrAlias(unsigned ZReg, unsigned Reg) {
+ assert((ZReg >= AArch64::Z0) && (ZReg <= AArch64::Z31));
+ return (ZReg == ((Reg - AArch64::B0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::H0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::S0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::D0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::Q0) + AArch64::Z0)) ||
+ (ZReg == ((Reg - AArch64::Z0) + AArch64::Z0));
+}
+
// FIXME: This entire function is a giant hack to provide us with decent
// operand range validation/diagnostics until TableGen/MC can be extended
// to support autogeneration of this kind of validation.
-bool AArch64AsmParser::validateInstruction(MCInst &Inst,
- SmallVectorImpl<SMLoc> &Loc) {
+bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
+ SmallVectorImpl<SMLoc> &Loc) {
const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+
+ // A prefix only applies to the instruction following it. Here we extract
+ // prefix information for the next instruction before validating the current
+ // one so that in the case of failure we don't erronously continue using the
+ // current prefix.
+ PrefixInfo Prefix = NextPrefix;
+ NextPrefix = PrefixInfo::CreateFromInst(Inst, MCID.TSFlags);
+
+ // Before validating the instruction in isolation we run through the rules
+ // applicable when it follows a prefix instruction.
+ // NOTE: brk & hlt can be prefixed but require no additional validation.
+ if (Prefix.isActive() &&
+ (Inst.getOpcode() != AArch64::BRK) &&
+ (Inst.getOpcode() != AArch64::HLT)) {
+
+ // Prefixed intructions must have a destructive operand.
+ if ((MCID.TSFlags & AArch64::DestructiveInstTypeMask) ==
+ AArch64::NotDestructive)
+ return Error(IDLoc, "instruction is unpredictable when following a"
+ " movprfx, suggest replacing movprfx with mov");
+
+ // Destination operands must match.
+ if (Inst.getOperand(0).getReg() != Prefix.getDstReg())
+ return Error(Loc[0], "instruction is unpredictable when following a"
+ " movprfx writing to a different destination");
+
+ // Destination operand must not be used in any other location.
+ for (unsigned i = 1; i < Inst.getNumOperands(); ++i) {
+ if (Inst.getOperand(i).isReg() &&
+ (MCID.getOperandConstraint(i, MCOI::TIED_TO) == -1) &&
+ isMatchingOrAlias(Prefix.getDstReg(), Inst.getOperand(i).getReg()))
+ return Error(Loc[0], "instruction is unpredictable when following a"
+ " movprfx and destination also used as non-destructive"
+ " source");
+ }
+
+ auto PPRRegClass = AArch64MCRegisterClasses[AArch64::PPRRegClassID];
+ if (Prefix.isPredicated()) {
+ int PgIdx = -1;
+
+ // Find the instructions general predicate.
+ for (unsigned i = 1; i < Inst.getNumOperands(); ++i)
+ if (Inst.getOperand(i).isReg() &&
+ PPRRegClass.contains(Inst.getOperand(i).getReg())) {
+ PgIdx = i;
+ break;
+ }
+
+ // Instruction must be predicated if the movprfx is predicated.
+ if (PgIdx == -1 ||
+ (MCID.TSFlags & AArch64::ElementSizeMask) == AArch64::ElementSizeNone)
+ return Error(IDLoc, "instruction is unpredictable when following a"
+ " predicated movprfx, suggest using unpredicated movprfx");
+
+ // Instruction must use same general predicate as the movprfx.
+ if (Inst.getOperand(PgIdx).getReg() != Prefix.getPgReg())
+ return Error(IDLoc, "instruction is unpredictable when following a"
+ " predicated movprfx using a different general predicate");
+
+ // Instruction element type must match the movprfx.
+ if ((MCID.TSFlags & AArch64::ElementSizeMask) != Prefix.getElementSize())
+ return Error(IDLoc, "instruction is unpredictable when following a"
+ " predicated movprfx with a different element size");
+ }
+ }
+
// Check for indexed addressing modes w/ the base register being the
// same as a destination/source register or pair load where
// the Rt == Rt2. All of those are undefined behaviour.
@@ -4516,7 +4656,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
NumOperands = Operands.size();
for (unsigned i = 1; i < NumOperands; ++i)
OperandLocs.push_back(Operands[i]->getStartLoc());
- if (validateInstruction(Inst, OperandLocs))
+ if (validateInstruction(Inst, IDLoc, OperandLocs))
return true;
Inst.setLoc(IDLoc);
@@ -4719,7 +4859,6 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
const MCObjectFileInfo::Environment Format =
getContext().getObjectFileInfo()->getObjectFileType();
bool IsMachO = Format == MCObjectFileInfo::IsMachO;
- bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
StringRef IDVal = DirectiveID.getIdentifier();
SMLoc Loc = DirectiveID.getLoc();
@@ -4733,14 +4872,14 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveLtorg(Loc);
else if (IDVal == ".unreq")
parseDirectiveUnreq(Loc);
- else if (!IsMachO && !IsCOFF) {
- if (IDVal == ".inst")
- parseDirectiveInst(Loc);
+ else if (IDVal == ".inst")
+ parseDirectiveInst(Loc);
+ else if (IsMachO) {
+ if (IDVal == MCLOHDirectiveName())
+ parseDirectiveLOH(IDVal, Loc);
else
return true;
- } else if (IDVal == MCLOHDirectiveName())
- parseDirectiveLOH(IDVal, Loc);
- else
+ } else
return true;
return false;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 1b949b54590c..dee964df2635 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -39,4 +39,16 @@ void AArch64TargetStreamer::emitCurrentConstantPool() {
// finish() - write out any non-empty assembler constant pools.
void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
-void AArch64TargetStreamer::emitInst(uint32_t Inst) {}
+void AArch64TargetStreamer::emitInst(uint32_t Inst) {
+ char Buffer[4];
+
+ // We can't just use EmitIntValue here, as that will swap the
+ // endianness on big-endian systems (instructions are always
+ // little-endian).
+ for (unsigned I = 0; I < 4; ++I) {
+ Buffer[I] = uint8_t(Inst);
+ Inst >>= 8;
+ }
+
+ getStreamer().EmitBytes(StringRef(Buffer, 4));
+}
diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td
index 17b3f6041279..7a8dd8bc5aee 100644
--- a/lib/Target/AArch64/SVEInstrFormats.td
+++ b/lib/Target/AArch64/SVEInstrFormats.td
@@ -282,6 +282,79 @@ let Predicates = [HasSVE] in {
//===----------------------------------------------------------------------===//
+// SVE Predicate Misc Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_pfalse<bits<6> opc, string asm>
+: I<(outs PPR8:$Pd), (ins),
+ asm, "\t$Pd",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{5-4};
+ let Inst{21-19} = 0b011;
+ let Inst{18-16} = opc{3-1};
+ let Inst{15-10} = 0b111001;
+ let Inst{9} = opc{0};
+ let Inst{8-4} = 0b00000;
+ let Inst{3-0} = Pd;
+}
+
+class sve_int_ptest<bits<6> opc, string asm>
+: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
+ asm, "\t$Pg, $Pn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pg;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{5-4};
+ let Inst{21-19} = 0b010;
+ let Inst{18-16} = opc{3-1};
+ let Inst{15-14} = 0b11;
+ let Inst{13-10} = Pg;
+ let Inst{9} = opc{0};
+ let Inst{8-5} = Pn;
+ let Inst{4-0} = 0b00000;
+
+ let Defs = [NZCV];
+}
+
+class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
+ PPRRegOp pprty>
+: I<(outs pprty:$Pdn), (ins PPRAny:$Pg, pprty:$_Pdn),
+ asm, "\t$Pdn, $Pg, $_Pdn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pdn;
+ bits<4> Pg;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-19} = 0b011;
+ let Inst{18-16} = opc{4-2};
+ let Inst{15-11} = 0b11000;
+ let Inst{10-9} = opc{1-0};
+ let Inst{8-5} = Pg;
+ let Inst{4} = 0;
+ let Inst{3-0} = Pdn;
+
+ let Constraints = "$Pdn = $_Pdn";
+ let Defs = [NZCV];
+}
+
+multiclass sve_int_pfirst<bits<5> opc, string asm> {
+ def : sve_int_pfirst_next<0b01, opc, asm, PPR8>;
+}
+
+multiclass sve_int_pnext<bits<5> opc, string asm> {
+ def _B : sve_int_pfirst_next<0b00, opc, asm, PPR8>;
+ def _H : sve_int_pfirst_next<0b01, opc, asm, PPR16>;
+ def _S : sve_int_pfirst_next<0b10, opc, asm, PPR32>;
+ def _D : sve_int_pfirst_next<0b11, opc, asm, PPR64>;
+}
+
+//===----------------------------------------------------------------------===//
// SVE Predicate Count Group
//===----------------------------------------------------------------------===//
@@ -348,6 +421,8 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_count_v<bits<5> opc, string asm> {
@@ -433,6 +508,8 @@ class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty> {
@@ -738,6 +815,8 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_perm_insrs<string asm> {
@@ -762,6 +841,8 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_perm_insrv<string asm> {
@@ -790,6 +871,8 @@ class sve_int_perm_extract_i<string asm>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
//===----------------------------------------------------------------------===//
@@ -883,6 +966,8 @@ class sve_int_log_imm<bits<2> opc, string asm>
let Constraints = "$Zdn = $_Zdn";
let DecoderMethod = "DecodeSVELogicalImmInstruction";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_log_imm<bits<2> opc, string asm, string alias> {
@@ -993,6 +1078,8 @@ class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, Operand imm_ty> {
@@ -1020,6 +1107,8 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_2op_p_zds<bits<4> opc, string asm> {
@@ -1045,6 +1134,8 @@ class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_fp_ftmad<string asm> {
@@ -1106,6 +1197,8 @@ class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm> {
@@ -1135,6 +1228,8 @@ class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm> {
@@ -1163,6 +1258,8 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bit opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm> {
@@ -1253,6 +1350,8 @@ class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_fcmla<string asm> {
@@ -1284,6 +1383,8 @@ class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_fp_fcmla_by_indexed_elem<string asm> {
@@ -1325,6 +1426,8 @@ class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_fcadd<string asm> {
@@ -1405,7 +1508,7 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm> {
//===----------------------------------------------------------------------===//
class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
- RegisterOperand o_zprtype>
+ RegisterOperand o_zprtype, ElementSizeEnum size>
: I<(outs o_zprtype:$Zd), (ins i_zprtype:$_Zd, PPR3bAny:$Pg, i_zprtype:$Zn),
asm, "\t$Zd, $Pg/m, $Zn",
"",
@@ -1423,12 +1526,14 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = size;
}
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> {
- def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16>;
- def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32>;
- def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64>;
+ def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
+ def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
+ def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
}
//===----------------------------------------------------------------------===//
@@ -1480,6 +1585,8 @@ class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_bin_pred_log<bits<3> opc, string asm> {
@@ -1541,6 +1648,8 @@ class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm> {
@@ -1571,6 +1680,8 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm> {
@@ -1601,6 +1712,8 @@ class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty1.ElementSize;
}
multiclass sve_intx_dot<bit opc, string asm> {
@@ -1629,6 +1742,8 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
@@ -1670,6 +1785,8 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm> {
@@ -1800,6 +1917,8 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_arith_imm0<bits<3> opc, string asm> {
@@ -1825,6 +1944,8 @@ class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_arith_imm1<bits<2> opc, string asm, Operand immtype> {
@@ -1885,6 +2006,8 @@ class sve_int_dup_fpimm_pred<bits<2> sz, Operand fpimmtype,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_dup_fpimm_pred<string asm> {
@@ -1917,6 +2040,9 @@ class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm,
let Inst{13} = imm{8}; // sh
let Inst{12-5} = imm{7-0}; // imm8
let Inst{4-0} = Zd;
+
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_dup_imm_pred_merge<string asm> {
@@ -2083,6 +2209,65 @@ multiclass sve_int_ucmp_vi<bits<2> opc, string asm> {
//===----------------------------------------------------------------------===//
+// SVE Integer Compare - Scalars Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt>
+: I<(outs), (ins rt:$Rn, rt:$Rm),
+ asm, "\t$Rn, $Rm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-23} = 0b001001011;
+ let Inst{22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-10} = 0b001000;
+ let Inst{9-5} = Rn;
+ let Inst{4} = opc;
+ let Inst{3-0} = 0b0000;
+
+ let Defs = [NZCV];
+}
+
+class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
+ RegisterClass gprty, PPRRegOp pprty>
+: I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm),
+ asm, "\t$Pd, $Rn, $Rm",
+ "", []>, Sched<[]> {
+ bits<4> Pd;
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = opc{3-1};
+ let Inst{9-5} = Rn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Defs = [NZCV];
+}
+
+multiclass sve_int_while4_rr<bits<3> opc, string asm> {
+ def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
+ def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
+ def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
+ def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>;
+}
+
+multiclass sve_int_while8_rr<bits<3> opc, string asm> {
+ def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
+ def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
+ def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
+ def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
+}
+
+
+//===----------------------------------------------------------------------===//
// SVE Floating Point Fast Reduction Group
//===----------------------------------------------------------------------===//
@@ -2312,9 +2497,9 @@ multiclass sve_int_index_rr<string asm> {
//===----------------------------------------------------------------------===//
// SVE Bitwise Shift - Predicated Group
//===----------------------------------------------------------------------===//
-
class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
- ZPRRegOp zprty, Operand immtype>
+ ZPRRegOp zprty, Operand immtype,
+ ElementSizeEnum size>
: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm),
asm, "\t$Zdn, $Pg/m, $_Zdn, $imm",
"",
@@ -2333,31 +2518,41 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = size;
}
multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> {
- def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
- def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
+ def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8,
+ ElementSizeB>;
+ def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16,
+ ElementSizeH> {
let Inst{8} = imm{3};
}
- def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
+ def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32,
+ ElementSizeS> {
let Inst{9-8} = imm{4-3};
}
- def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
+ def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64,
+ ElementSizeD> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}
}
multiclass sve_int_bin_pred_shift_imm_right<bits<3> opc, string asm> {
- def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
- def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+ def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
+ ElementSizeB>;
+ def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
+ ElementSizeH> {
let Inst{8} = imm{3};
}
- def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+ def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32,
+ ElementSizeS> {
let Inst{9-8} = imm{4-3};
}
- def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+ def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64,
+ ElementSizeD> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}
@@ -2383,6 +2578,8 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_bin_pred_shift<bits<3> opc, string asm> {
@@ -3017,6 +3214,8 @@ class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_perm_clast_zz<bit ab, string asm> {
@@ -3094,6 +3293,8 @@ class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
}
multiclass sve_int_perm_splice<string asm> {
@@ -3122,6 +3323,8 @@ class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_perm_rev_rbit<string asm> {
@@ -3163,6 +3366,8 @@ class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_perm_cpy_r<string asm> {
@@ -3198,6 +3403,8 @@ class sve_int_perm_cpy_v<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_perm_cpy_v<string asm> {
@@ -4117,3 +4324,133 @@ multiclass sve_int_reduce_2<bits<3> opc, string asm> {
def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>;
def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>;
}
+
+class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
+ ZPRRegOp zprty, string pg_suffix, dag iops>
+: I<(outs zprty:$Zd), iops,
+ asm, "\t$Zd, $Pg"#pg_suffix#", $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_32;
+ let Inst{21-19} = 0b010;
+ let Inst{18-16} = opc;
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_movprfx_pred_merge<bits<3> opc, string asm> {
+let Constraints = "$Zd = $_Zd" in {
+ def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/m",
+ (ins ZPR8:$_Zd, PPR3bAny:$Pg, ZPR8:$Zn)>;
+ def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/m",
+ (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR16:$Zn)>;
+ def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/m",
+ (ins ZPR32:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn)>;
+ def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/m",
+ (ins ZPR64:$_Zd, PPR3bAny:$Pg, ZPR64:$Zn)>;
+}
+}
+
+multiclass sve_int_movprfx_pred_zero<bits<3> opc, string asm> {
+ def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/z",
+ (ins PPR3bAny:$Pg, ZPR8:$Zn)>;
+ def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/z",
+ (ins PPR3bAny:$Pg, ZPR16:$Zn)>;
+ def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/z",
+ (ins PPR3bAny:$Pg, ZPR32:$Zn)>;
+ def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/z",
+ (ins PPR3bAny:$Pg, ZPR64:$Zn)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Propagate Break Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_brkp<bits<2> opc, string asm>
+: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm),
+ asm, "\t$Pd, $Pg/z, $Pn, $Pm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pg;
+ bits<4> Pm;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23} = 0b0;
+ let Inst{22} = opc{1};
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = Pm;
+ let Inst{15-14} = 0b11;
+ let Inst{13-10} = Pg;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Partition Break Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_brkn<bit S, string asm>
+: I<(outs PPR8:$Pdm), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$_Pdm),
+ asm, "\t$Pdm, $Pg/z, $Pn, $_Pdm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pdm;
+ bits<4> Pg;
+ bits<4> Pn;
+ let Inst{31-23} = 0b001001010;
+ let Inst{22} = S;
+ let Inst{21-14} = 0b01100001;
+ let Inst{13-10} = Pg;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Pdm;
+
+ let Constraints = "$Pdm = $_Pdm";
+ let Defs = !if(!eq (S, 0b1), [NZCV], []);
+}
+
+class sve_int_break<bits<3> opc, string asm, string suffix, dag iops>
+: I<(outs PPR8:$Pd), iops,
+ asm, "\t$Pd, $Pg"#suffix#", $Pn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pg;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{2-1};
+ let Inst{21-14} = 0b01000001;
+ let Inst{13-10} = Pg;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Constraints = !if(!eq (opc{0}, 1), "$Pd = $_Pd", "");
+ let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
+
+}
+
+multiclass sve_int_break_m<bits<3> opc, string asm> {
+ def NAME : sve_int_break<opc, asm, "/m", (ins PPR8:$_Pd, PPRAny:$Pg, PPR8:$Pn)>;
+}
+
+multiclass sve_int_break_z<bits<3> opc, string asm> {
+ def NAME : sve_int_break<opc, asm, "/z", (ins PPRAny:$Pg, PPR8:$Pn)>;
+}
+
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index b201126c593b..21e44e9589d3 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -554,6 +554,7 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT:
+ case ISD::FCANONICALIZE:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::RCP_IFLAG:
@@ -907,6 +908,7 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
LLVMContext &Ctx = Fn.getParent()->getContext();
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn);
+ CallingConv::ID CC = Fn.getCallingConv();
unsigned MaxAlign = 1;
uint64_t ExplicitArgOffset = 0;
@@ -940,16 +942,10 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
EVT ArgVT = ValueVTs[Value];
EVT MemVT = ArgVT;
- MVT RegisterVT =
- getRegisterTypeForCallingConv(Ctx, ArgVT);
- unsigned NumRegs =
- getNumRegistersForCallingConv(Ctx, ArgVT);
-
- if (!Subtarget->isAmdHsaOS() &&
- (ArgVT == MVT::i16 || ArgVT == MVT::i8 || ArgVT == MVT::f16)) {
- // The ABI says the caller will extend these values to 32-bits.
- MemVT = ArgVT.isInteger() ? MVT::i32 : MVT::f32;
- } else if (NumRegs == 1) {
+ MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT);
+ unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT);
+
+ if (NumRegs == 1) {
// This argument is not split, so the IR type is the memory type.
if (ArgVT.isExtended()) {
// We have an extended type, like i24, so we should just use the
@@ -3600,6 +3596,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
case ISD::FRINT:
case ISD::FNEARBYINT: // XXX - Should fround be handled?
case ISD::FSIN:
+ case ISD::FCANONICALIZE:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::RCP_IFLAG:
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 96b7568eec1f..7442a59e594f 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -342,8 +342,9 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp,
def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>;
def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2",
- SDTypeProfile<1, 3, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>,
- SDTCisFP<0>, SDTCisVec<1>]>,
+ SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>,
+ SDTCisFP<0>, SDTCisVec<1>,
+ SDTCisInt<4>]>,
[]>;
def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td
index 9426df399597..c9c932ef2f5f 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -567,6 +567,7 @@ int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
int FP16_ONE = 0x3C00;
+int FP16_NEG_ONE = 0xBC00;
int V2FP16_ONE = 0x3C003C00;
int FP32_ONE = 0x3f800000;
int FP32_NEG_ONE = 0xbf800000;
diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 8cc7e38f7b29..c147830e12ed 100644
--- a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -100,16 +100,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
unsigned Size = DL.getTypeSizeInBits(ArgTy);
unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
-
- // Clover seems to always pad i8/i16 to i32, but doesn't properly align
- // them?
- // Make sure the struct elements have correct size and alignment for ext
- // args. These seem to be padded up to 4-bytes but not correctly aligned.
- bool IsExtArg = AllocSize < 32 && (Arg.hasZExtAttr() || Arg.hasSExtAttr()) &&
- !ST.isAmdHsaOS();
- if (IsExtArg)
- AllocSize = 4;
-
uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize;
@@ -164,8 +154,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
ArgPtr->getName() + ".cast");
}
- assert((!IsExtArg || !IsV3) && "incompatible situation");
-
if (IsV3 && Size >= 32) {
V4Ty = VectorType::get(VT->getVectorElementType(), 4);
// Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
@@ -212,20 +200,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
// TODO: Convert noalias arg to !noalias
if (Size < 32 && !ArgTy->isAggregateType()) {
- if (IsExtArg && OffsetDiff == 0) {
- Type *I32Ty = Builder.getInt32Ty();
- bool IsSext = Arg.hasSExtAttr();
- Metadata *LowAndHigh[] = {
- ConstantAsMetadata::get(
- ConstantInt::get(I32Ty, IsSext ? minIntN(Size) : 0)),
- ConstantAsMetadata::get(
- ConstantInt::get(I32Ty,
- IsSext ? maxIntN(Size) + 1 : maxUIntN(Size) + 1))
- };
-
- Load->setMetadata(LLVMContext::MD_range, MDNode::get(Ctx, LowAndHigh));
- }
-
Value *ExtractBits = OffsetDiff == 0 ?
Load : Builder.CreateLShr(Load, OffsetDiff * 8);
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td
index 1e0bc62c45a6..44c2d366e461 100644
--- a/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/lib/Target/AMDGPU/MIMGInstructions.td
@@ -66,6 +66,22 @@ def MIMGDimInfoTable : GenericTable {
let PrimaryKeyName = "getMIMGDimInfo";
}
+class MIMGLZMapping<MIMGBaseOpcode l, MIMGBaseOpcode lz> {
+ MIMGBaseOpcode L = l;
+ MIMGBaseOpcode LZ = lz;
+}
+
+def MIMGLZMappingTable : GenericTable {
+ let FilterClass = "MIMGLZMapping";
+ let CppTypeName = "MIMGLZMappingInfo";
+ let Fields = ["L", "LZ"];
+ GenericEnum TypeOf_L = MIMGBaseOpcode;
+ GenericEnum TypeOf_LZ = MIMGBaseOpcode;
+
+ let PrimaryKey = ["L"];
+ let PrimaryKeyName = "getMIMGLZMappingInfo";
+}
+
class mimg <bits<7> si, bits<7> vi = si> {
field bits<7> SI = si;
field bits<7> VI = vi;
@@ -547,3 +563,13 @@ foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
AMDGPUImageDimAtomicIntrinsics) in {
def : ImageDimIntrinsicInfo<intr>;
}
+
+// L to LZ Optimization Mapping
+def : MIMGLZMapping<IMAGE_SAMPLE_L, IMAGE_SAMPLE_LZ>;
+def : MIMGLZMapping<IMAGE_SAMPLE_C_L, IMAGE_SAMPLE_C_LZ>;
+def : MIMGLZMapping<IMAGE_SAMPLE_L_O, IMAGE_SAMPLE_LZ_O>;
+def : MIMGLZMapping<IMAGE_SAMPLE_C_L_O, IMAGE_SAMPLE_C_LZ_O>;
+def : MIMGLZMapping<IMAGE_GATHER4_L, IMAGE_GATHER4_LZ>;
+def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;
+def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;
+def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 5b7fc2656a20..25007861fd15 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -694,6 +694,87 @@ bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
return false;
}
+MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ // TODO: Consider splitting all arguments into 32-bit pieces.
+ if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ EVT ScalarVT = VT.getScalarType();
+ unsigned Size = ScalarVT.getSizeInBits();
+ if (Size == 32)
+ return ScalarVT.getSimpleVT();
+
+ if (Size == 64)
+ return MVT::i32;
+
+ if (Size == 16 &&
+ Subtarget->has16BitInsts() &&
+ isPowerOf2_32(VT.getVectorNumElements()))
+ return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
+ }
+
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+}
+
+unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT ScalarVT = VT.getScalarType();
+ unsigned Size = ScalarVT.getSizeInBits();
+
+ if (Size == 32)
+ return NumElts;
+
+ if (Size == 64)
+ return 2 * NumElts;
+
+ // FIXME: Fails to break down as we want with v3.
+ if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts))
+ return VT.getVectorNumElements() / 2;
+ }
+
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+}
+
+unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC,
+ EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+ if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT ScalarVT = VT.getScalarType();
+ unsigned Size = ScalarVT.getSizeInBits();
+ if (Size == 32) {
+ RegisterVT = ScalarVT.getSimpleVT();
+ IntermediateVT = RegisterVT;
+ NumIntermediates = NumElts;
+ return NumIntermediates;
+ }
+
+ if (Size == 64) {
+ RegisterVT = MVT::i32;
+ IntermediateVT = RegisterVT;
+ NumIntermediates = 2 * NumElts;
+ return NumIntermediates;
+ }
+
+ // FIXME: We should fix the ABI to be the same on targets without 16-bit
+ // support, but unless we can properly handle 3-vectors, it will be still be
+ // inconsistent.
+ if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts)) {
+ RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
+ IntermediateVT = RegisterVT;
+ NumIntermediates = NumElts / 2;
+ return NumIntermediates;
+ }
+ }
+
+ return TargetLowering::getVectorTypeBreakdownForCallingConv(
+ Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
+}
+
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
MachineFunction &MF,
@@ -1268,6 +1349,8 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
const ISD::InputArg *Arg = &Ins[I];
+ assert(!Arg->VT.isVector() && "vector type argument should have been split");
+
// First check if it's a PS input addr.
if (CallConv == CallingConv::AMDGPU_PS &&
!Arg->Flags.isInReg() && !Arg->Flags.isByVal() && PSInputNum <= 15) {
@@ -1301,25 +1384,7 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
++PSInputNum;
}
- // Second split vertices into their elements.
- if (Arg->VT.isVector()) {
- ISD::InputArg NewArg = *Arg;
- NewArg.Flags.setSplit();
- NewArg.VT = Arg->VT.getVectorElementType();
-
- // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
- // three or five element vertex only needs three or five registers,
- // NOT four or eight.
- Type *ParamType = FType->getParamType(Arg->getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- for (unsigned J = 0; J != NumElements; ++J) {
- Splits.push_back(NewArg);
- NewArg.PartOffset += NewArg.VT.getStoreSize();
- }
- } else {
- Splits.push_back(*Arg);
- }
+ Splits.push_back(*Arg);
}
}
@@ -4490,6 +4555,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
+ const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
+ AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
+ unsigned IntrOpcode = Intr->BaseOpcode;
SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
bool IsD16 = false;
@@ -4575,6 +4643,18 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
SmallVector<SDValue, 4> VAddrs;
for (unsigned i = 0; i < NumVAddrs; ++i)
VAddrs.push_back(Op.getOperand(AddrIdx + i));
+
+ // Optimize _L to _LZ when _L is zero
+ if (LZMappingInfo) {
+ if (auto ConstantLod =
+ dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) {
+ if (ConstantLod->isZero() || ConstantLod->isNegative()) {
+ IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
+ VAddrs.pop_back(); // remove 'lod'
+ }
+ }
+ }
+
SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
@@ -4634,10 +4714,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
int Opcode = -1;
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx8,
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
NumVDataDwords, NumVAddrDwords);
if (Opcode == -1)
- Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx6,
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);
@@ -4945,7 +5025,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_fdot2:
return DAG.getNode(AMDGPUISD::FDOT2, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(4));
case Intrinsic::amdgcn_fmul_legacy:
return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT,
Op.getOperand(1), Op.getOperand(2));
@@ -6754,10 +6835,6 @@ static bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 ||
ST->hasFP16Denormals();
- case ISD::FP16_TO_FP:
- case ISD::FP_TO_FP16:
- return ST->hasFP16Denormals();
-
// It can/will be lowered or combined as a bit operation.
// Need to check their input recursively to handle.
case ISD::FNEG:
@@ -6799,8 +6876,16 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
- ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0));
+ SDValue N0 = N->getOperand(0);
+ // fcanonicalize undef -> qnan
+ if (N0.isUndef()) {
+ EVT VT = N->getValueType(0);
+ APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
+ return DAG.getConstantFP(QNaN, SDLoc(N), VT);
+ }
+
+ ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0);
if (!CFP) {
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType().getScalarType();
@@ -6853,7 +6938,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
}
- return N->getOperand(0);
+ return N0;
}
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
@@ -7544,8 +7629,10 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
return SDValue();
if ((Vec1 == Vec3 && Vec2 == Vec4) ||
- (Vec1 == Vec4 && Vec2 == Vec3))
- return DAG.getNode(AMDGPUISD::FDOT2, SL, MVT::f32, Vec1, Vec2, FMAAcc);
+ (Vec1 == Vec4 && Vec2 == Vec3)) {
+ return DAG.getNode(AMDGPUISD::FDOT2, SL, MVT::f32, Vec1, Vec2, FMAAcc,
+ DAG.getTargetConstant(0, SL, MVT::i1));
+ }
}
return SDValue();
}
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index ad049f2a71c3..5b3d49b3d8e3 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -25,6 +25,19 @@ class SITargetLowering final : public AMDGPUTargetLowering {
private:
const GCNSubtarget *Subtarget;
+public:
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const override;
+ unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const override;
+
+ unsigned getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const override;
+
+private:
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
SDValue Chain, uint64_t Offset) const;
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp
index 61c8f359e168..dc9397cf7b85 100644
--- a/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -133,28 +133,10 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
return true;
- // V_READFIRSTLANE/V_READLANE destination register may be used as operand
- // by some SALU instruction. If exec mask is zero vector instruction
- // defining the register that is used by the scalar one is not executed
- // and scalar instruction will operate on undefined data. For
- // V_READFIRSTLANE/V_READLANE we should avoid predicated execution.
- if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) ||
- (I->getOpcode() == AMDGPU::V_READLANE_B32)) {
+ if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
return true;
- }
-
- if (I->isInlineAsm()) {
- const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
- const char *AsmStr = I->getOperand(0).getSymbolName();
-
- // inlineasm length estimate is number of bytes assuming the longest
- // instruction.
- uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
- NumInstr += MaxAsmSize / MAI->getMaxInstLength();
- } else {
- ++NumInstr;
- }
+ ++NumInstr;
if (NumInstr >= SkipThreshold)
return true;
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6c85c92454c3..f3745382a6f4 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2332,6 +2332,36 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
changesVGPRIndexingMode(MI);
}
+bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+
+ if (MI.mayStore() && isSMRD(MI))
+ return true; // scalar store or atomic
+
+ // These instructions cause shader I/O that may cause hardware lockups
+ // when executed with an empty EXEC mask.
+ //
+ // Note: exp with VM = DONE = 0 is automatically skipped by hardware when
+ // EXEC = 0, but checking for that case here seems not worth it
+ // given the typical code patterns.
+ if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
+ Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
+ return true;
+
+ if (MI.isInlineAsm())
+ return true; // conservative assumption
+
+ // These are like SALU instructions in terms of effects, so it's questionable
+ // whether we should return true for those.
+ //
+ // However, executing them with EXEC = 0 causes them to operate on undefined
+ // data, which we avoid by returning true here.
+ if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || Opcode == AMDGPU::V_READLANE_B32)
+ return true;
+
+ return false;
+}
+
bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
switch (Imm.getBitWidth()) {
case 32:
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 0a735257d34e..d681b926504e 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -597,6 +597,9 @@ public:
return !RI.isSGPRReg(MRI, Dest);
}
+ /// Whether we must prevent this instruction from executing with EXEC = 0.
+ bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
+
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index c3f8bfb53ef4..5c10646161b3 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1387,6 +1387,11 @@ def : GCNPat<
>;
def : GCNPat<
+ (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
+ (V_MUL_F16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src, 0, 0)
+>;
+
+def : GCNPat<
(fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
(V_PK_MUL_F16 0, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
>;
@@ -1411,6 +1416,11 @@ def : GCNPat<
(fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
(V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0)
>;
+
+def : GCNPat<
+ (fcanonicalize (f32 (fneg (VOP3Mods f32:$src, i32:$src_mods)))),
+ (V_MUL_F32_e64 0, (i32 CONST.FP32_NEG_ONE), $src_mods, $src, 0, 0)
+>;
}
let OtherPredicates = [FP32Denormals] in {
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 3fd3c75874a3..4eba19382315 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -110,6 +110,7 @@ struct MIMGInfo {
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
+#define GET_MIMGLZMappingTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 70681c271697..5b7af8268cda 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,6 +42,7 @@ namespace AMDGPU {
#define GET_MIMGBaseOpcode_DECL
#define GET_MIMGDim_DECL
#define GET_MIMGEncoding_DECL
+#define GET_MIMGLZMapping_DECL
#include "AMDGPUGenSearchableTables.inc"
namespace IsaInfo {
@@ -211,6 +212,14 @@ struct MIMGDimInfo {
LLVM_READONLY
const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
+struct MIMGLZMappingInfo {
+ MIMGBaseOpcode L;
+ MIMGBaseOpcode LZ;
+};
+
+LLVM_READONLY
+const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
+
LLVM_READONLY
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
unsigned VDataDwords, unsigned VAddrDwords);
diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td
index 5c78ada3211e..b51828b54679 100644
--- a/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -167,13 +167,30 @@ defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
let SubtargetPredicate = HasDLInsts in {
-def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>, AMDGPUfdot2>;
-def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2>;
-def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2>;
-def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4>;
-def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4>;
-def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8>;
-def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8>;
+def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
+def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
+def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
+def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+
+multiclass DotPats<SDPatternOperator dot_op,
+ VOP3PInst dot_inst> {
+ def : GCNPat <
+ (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)),
+ (dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)),
+ (dot_inst.Pfl.Src2VT (VOP3PMods dot_inst.Pfl.Src2VT:$src2, i32:$src2_modifiers)), i1:$clamp),
+ (dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1imm $clamp))>;
+}
+
+defm : DotPats<AMDGPUfdot2, V_DOT2_F32_F16>;
+defm : DotPats<int_amdgcn_sdot2, V_DOT2_I32_I16>;
+defm : DotPats<int_amdgcn_udot2, V_DOT2_U32_U16>;
+defm : DotPats<int_amdgcn_sdot4, V_DOT4_I32_I8>;
+defm : DotPats<int_amdgcn_udot4, V_DOT4_U32_U8>;
+defm : DotPats<int_amdgcn_sdot8, V_DOT8_I32_I4>;
+defm : DotPats<int_amdgcn_udot8, V_DOT8_U32_U4>;
} // End SubtargetPredicate = HasDLInsts
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 2196f9b47f3b..b227eaed8d61 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -117,7 +117,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// globals from all functions in PromotedGlobals.
for (auto *GV : AFI->getGlobalsPromotedToConstantPool())
PromotedGlobals.insert(GV);
-
+
// Calculate this function's optimization goal.
unsigned OptimizationGoal;
if (F.hasFnAttribute(Attribute::OptimizeNone))
@@ -367,8 +367,9 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
unsigned RC;
- InlineAsm::hasRegClassConstraint(Flags, RC);
- if (RC == ARM::GPRPairRegClassID) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ if (InlineAsm::hasRegClassConstraint(Flags, RC) &&
+ ARM::GPRPairRegClass.hasSubClassEq(TRI->getRegClass(RC))) {
if (NumVals != 1)
return true;
const MachineOperand &MO = MI->getOperand(OpNum);
@@ -990,7 +991,7 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
if (Subtarget->isThumb1Only())
EmitAlignment(2);
-
+
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
OutStreamer->EmitLabel(JTISymbol);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 43e8b7d66c62..5342e6e2cd13 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -584,7 +584,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// don't know for sure yet whether we'll need that, so we guess based
// on whether there are any local variables that would trigger it.
unsigned StackAlign = TFI->getStackAlignment();
- if (TFI->hasFP(MF) &&
+ if (TFI->hasFP(MF) &&
!((MFI.getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset))
return false;
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 63bf48abb7ac..543165de38d0 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -269,14 +269,15 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
for (auto Reg : RegList)
State.AllocateReg(Reg);
+ // After the first item has been allocated, the rest are packed as tightly as
+ // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
+ // be allocating a bunch of i32 slots).
+ unsigned RestAlign = std::min(Align, Size);
+
for (auto &It : PendingMembers) {
It.convertToMem(State.AllocateStack(Size, Align));
State.addLoc(It);
-
- // After the first item has been allocated, the rest are packed as tightly
- // as possible. (E.g. an incoming i64 would have starting Align of 8, but
- // we'll be allocating a bunch of i32 slots).
- Align = Size;
+ Align = RestAlign;
}
// All pending members have now been allocated
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index de08eb8c6985..2c4738d3cb74 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -2128,7 +2128,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
unsigned DeadSize = 0;
bool CanDeleteLEA = false;
bool BaseRegKill = false;
-
+
unsigned IdxReg = ~0U;
bool IdxRegKill = true;
if (isThumb2) {
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 5139a18f9263..55194ed94532 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -113,7 +113,7 @@ public:
bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
bool isPromotedGlobal() const{ return Kind == ARMCP::CPPromotedGlobal; }
-
+
int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) override;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 26d4aaa12acf..a66cd7053c0a 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -2116,7 +2116,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index af983ce2606a..a8c75702d7b5 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -372,7 +372,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc dl;
-
+
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// Determine the sizes of each callee-save spill areas and record which frame
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 081d4ff033bd..9592dd53c347 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2539,7 +2539,7 @@ void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
}
};
-
+
if (Range->second == 0) {
// 1. Mask includes the LSB -> Simply shift the top N bits off
NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
@@ -2633,7 +2633,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
MachineMemOperand::MOLoad, 4, 4);
cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1);
-
+
ReplaceNode(N, ResNode);
return;
}
@@ -2920,7 +2920,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
assert(N3.getOpcode() == ISD::Register);
unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
-
+
if (InFlag.getOpcode() == ARMISD::CMPZ) {
bool SwitchEQNEToPLMI;
SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
@@ -3023,7 +3023,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
-
+
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 47222a66f798..ede276dd91bb 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3096,7 +3096,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
// need to be duplicated) or duplicating the constant wouldn't increase code
// size (implying the constant is no larger than 4 bytes).
const Function &F = DAG.getMachineFunction().getFunction();
-
+
// We rely on this decision to inline being idemopotent and unrelated to the
// use-site. We know that if we inline a variable at one use site, we'll
// inline it elsewhere too (and reuse the constant pool entry). Fast-isel
@@ -5162,7 +5162,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
return SDValue();
// SoftFP: read half-precision arguments:
//
- // t2: i32,ch = ...
+ // t2: i32,ch = ...
// t7: i16 = truncate t2 <~~~~ Op
// t8: f16 = bitcast t7 <~~~~ N
//
@@ -5173,7 +5173,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
- // Half-precision return values
+ // Half-precision return values
if (SrcVT == MVT::f16 && DstVT == MVT::i16) {
if (!HasFullFP16)
return SDValue();
@@ -13461,13 +13461,13 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!RHS || RHS->getZExtValue() != 4)
return false;
-
+
Offset = Op->getOperand(1);
Base = Op->getOperand(0);
AM = ISD::POST_INC;
return true;
}
-
+
bool isInc;
bool isLegal = false;
if (Subtarget->isThumb2())
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 901138dbdfd5..db5f28480e90 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1275,7 +1275,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// we're minimizing code size.
if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
return false;
-
+
bool HighRegsUsed = false;
for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
if (MI->getOperand(i).getReg() >= ARM::R8) {
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 816116772995..91310e81e398 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -126,7 +126,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// The amount the literal pool has been increasedby due to promoted globals.
int PromotedGlobalsIncrease = 0;
-
+
public:
ARMFunctionInfo() = default;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index d4fbf76f299f..4d685158e258 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -49,7 +49,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
case RTLIB::MEMMOVE:
AEABILibcall = AEABI_MEMMOVE;
break;
- case RTLIB::MEMSET:
+ case RTLIB::MEMSET:
AEABILibcall = AEABI_MEMSET;
if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
if (ConstantSrc->getZExtValue() == 0)
@@ -93,14 +93,14 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
else if (Src.getValueType().bitsLT(MVT::i32))
Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
- Entry.Node = Src;
+ Entry.Node = Src;
Entry.Ty = Type::getInt32Ty(*DAG.getContext());
Entry.IsSExt = false;
Args.push_back(Entry);
} else {
Entry.Node = Src;
Args.push_back(Entry);
-
+
Entry.Node = Size;
Args.push_back(Entry);
}
@@ -121,7 +121,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
std::move(Args))
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
-
+
return CallResult.second;
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index f8cae31641ff..94f9cefe429c 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -389,7 +389,7 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
unsigned NumVectorInstToHideOverhead = 10;
int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && SE &&
+ if (Ty->isVectorTy() && SE &&
!BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index cd9fa0709020..e0cd2d8e26a6 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -153,7 +153,7 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
+ int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
const SCEV *Ptr);
int getArithmeticInstrCost(
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 807d62547337..a5fbbbf26be9 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -969,7 +969,7 @@ public:
// checks whether this operand is a memory operand computed as an offset
// applied to PC. the offset may have 8 bits of magnitude and is represented
- // with two bits of shift. textually it may be either [pc, #imm], #imm or
+ // with two bits of shift. textually it may be either [pc, #imm], #imm or
// relocable expression...
bool isThumbMemPC() const {
int64_t Val = 0;
@@ -2284,7 +2284,7 @@ public:
}
const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val);
-
+
assert(SR && "Unknown value type!");
Inst.addOperand(MCOperand::createExpr(SR));
return;
@@ -2326,7 +2326,7 @@ public:
assert(isImm() && "Not an immediate!");
// If we have an immediate that's not a constant, treat it as a label
- // reference needing a fixup.
+ // reference needing a fixup.
if (!isa<MCConstantExpr>(getImm())) {
Inst.addOperand(MCOperand::createExpr(getImm()));
return;
@@ -3419,7 +3419,7 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
- return -1;
+ return -1;
std::string lowerCase = Tok.getString().lower();
ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
@@ -4311,7 +4311,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- if (!Tok.is(AsmToken::Identifier))
+ if (!Tok.is(AsmToken::Identifier))
return MatchOperand_NoMatch;
StringRef IFlagsStr = Tok.getString();
@@ -4353,7 +4353,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
return MatchOperand_NoMatch;
}
unsigned SYSmvalue = Val & 0xFF;
- Parser.Lex();
+ Parser.Lex();
Operands.push_back(ARMOperand::CreateMSRMask(SYSmvalue, S));
return MatchOperand_Success;
}
@@ -4996,7 +4996,7 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
// first decide whether or not the branch should be conditional
// by looking at it's location relative to an IT block
if(inITBlock()) {
- // inside an IT block we cannot have any conditional branches. any
+ // inside an IT block we cannot have any conditional branches. any
// such instructions needs to be converted to unconditional form
switch(Inst.getOpcode()) {
case ARM::tBcc: Inst.setOpcode(ARM::tB); break;
@@ -5008,11 +5008,11 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
unsigned Cond = static_cast<ARMOperand &>(*Operands[CondOp]).getCondCode();
switch(Inst.getOpcode()) {
case ARM::tB:
- case ARM::tBcc:
- Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc);
+ case ARM::tBcc:
+ Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc);
break;
case ARM::t2B:
- case ARM::t2Bcc:
+ case ARM::t2Bcc:
Inst.setOpcode(Cond == ARMCC::AL ? ARM::t2B : ARM::t2Bcc);
break;
}
@@ -8882,7 +8882,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
case ARM::MOVsi: {
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
// rrx shifts and asr/lsr of #32 is encoded as 0
- if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr)
+ if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr)
return false;
if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) {
// Shifting by zero is accepted as a vanilla 'MOVr'
@@ -9371,6 +9371,12 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveAlign(DirectiveID.getLoc()); // Use Generic on failure.
else if (IDVal == ".thumb_set")
parseDirectiveThumbSet(DirectiveID.getLoc());
+ else if (IDVal == ".inst")
+ parseDirectiveInst(DirectiveID.getLoc());
+ else if (IDVal == ".inst.n")
+ parseDirectiveInst(DirectiveID.getLoc(), 'n');
+ else if (IDVal == ".inst.w")
+ parseDirectiveInst(DirectiveID.getLoc(), 'w');
else if (!IsMachO && !IsCOFF) {
if (IDVal == ".arch")
parseDirectiveArch(DirectiveID.getLoc());
@@ -9382,12 +9388,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveFPU(DirectiveID.getLoc());
else if (IDVal == ".fnstart")
parseDirectiveFnStart(DirectiveID.getLoc());
- else if (IDVal == ".inst")
- parseDirectiveInst(DirectiveID.getLoc());
- else if (IDVal == ".inst.n")
- parseDirectiveInst(DirectiveID.getLoc(), 'n');
- else if (IDVal == ".inst.w")
- parseDirectiveInst(DirectiveID.getLoc(), 'w');
else if (IDVal == ".object_arch")
parseDirectiveObjectArch(DirectiveID.getLoc());
else if (IDVal == ".tlsdescseq")
@@ -10012,8 +10012,8 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
case 'w':
break;
default:
- return Error(Loc, "cannot determine Thumb instruction size, "
- "use inst.n/inst.w instead");
+ Width = 0;
+ break;
}
} else {
if (Suffix)
@@ -10029,6 +10029,7 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
return Error(Loc, "expected constant expression");
}
+ char CurSuffix = Suffix;
switch (Width) {
case 2:
if (Value->getValue() > 0xffff)
@@ -10039,11 +10040,21 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
return Error(Loc, StringRef(Suffix ? "inst.w" : "inst") +
" operand is too big");
break;
+ case 0:
+ // Thumb mode, no width indicated. Guess from the opcode, if possible.
+ if (Value->getValue() < 0xe800)
+ CurSuffix = 'n';
+ else if (Value->getValue() >= 0xe8000000)
+ CurSuffix = 'w';
+ else
+ return Error(Loc, "cannot determine Thumb instruction size, "
+ "use inst.n/inst.w instead");
+ break;
default:
llvm_unreachable("only supported widths are 2 and 4");
}
- getTargetStreamer().emitInst(Value->getValue(), Suffix);
+ getTargetStreamer().emitInst(Value->getValue(), CurSuffix);
return false;
};
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4733cf49827e..61bec04678dd 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -620,7 +620,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
// assume a predicate of AL.
unsigned CC;
CC = ITBlock.getITCC();
- if (CC == 0xF)
+ if (CC == 0xF)
CC = ARMCC::AL;
if (ITBlock.instrInITBlock())
ITBlock.advanceITState();
@@ -888,7 +888,7 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- if (RegNo == 15)
+ if (RegNo == 15)
S = MCDisassembler::SoftFail;
Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
@@ -2171,7 +2171,7 @@ static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
const FeatureBitset &FeatureBits = Dis->getSubtargetInfo().getFeatureBits();
- if (!FeatureBits[ARM::HasV8_1aOps] ||
+ if (!FeatureBits[ARM::HasV8_1aOps] ||
!FeatureBits[ARM::HasV8Ops])
return MCDisassembler::Fail;
@@ -4467,7 +4467,7 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
index = fieldFromInstruction(Insn, 7, 1);
switch (fieldFromInstruction(Insn, 4, 2)) {
- case 0:
+ case 0:
align = 0; break;
case 3:
align = 4; break;
@@ -5279,7 +5279,7 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
return MCDisassembler::Fail;
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
- if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder)))
+ if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 75ed40c18fa2..bfc32073ba18 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -834,7 +834,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
return;
}
- O << SYSm;
+ O << SYSm;
return;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index dfa339091a7b..7d04c73fb3f2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -64,7 +64,7 @@ bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
}
}
-// Need to examine the Fixup when determining whether to
+// Need to examine the Fixup when determining whether to
// emit the relocation as an explicit symbol or as a section relative
// offset
unsigned ARMELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 0dab789505d5..b37b8073548f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -740,7 +740,7 @@ getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand MO = MI.getOperand(OpIdx);
if (MO.isExpr()) {
if (HasConditionalBranch(MI))
- return ::getBranchTargetOpValue(MI, OpIdx,
+ return ::getBranchTargetOpValue(MI, OpIdx,
ARM::fixup_arm_condbl, Fixups, STI);
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups, STI);
}
@@ -766,10 +766,10 @@ uint32_t ARMMCCodeEmitter::getThumbBranchTargetOpValue(
const MCSubtargetInfo &STI) const {
unsigned Val = 0;
const MCOperand MO = MI.getOperand(OpIdx);
-
+
if(MO.isExpr())
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups, STI);
- else
+ else
Val = MO.getImm() >> 1;
bool I = (Val & 0x800000);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 42371736fef4..63aa9735e8a4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -13,6 +13,8 @@
#include "ARMTargetMachine.h"
#include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -47,6 +49,41 @@ void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
// reset() - Reset any state
void ARMTargetStreamer::reset() {}
+void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {
+ unsigned Size;
+ char Buffer[4];
+ const bool LittleEndian = getStreamer().getContext().getAsmInfo()->isLittleEndian();
+
+ switch (Suffix) {
+ case '\0':
+ Size = 4;
+
+ for (unsigned II = 0, IE = Size; II != IE; II++) {
+ const unsigned I = LittleEndian ? (Size - II - 1) : II;
+ Buffer[Size - II - 1] = uint8_t(Inst >> I * CHAR_BIT);
+ }
+
+ break;
+ case 'n':
+ case 'w':
+ Size = (Suffix == 'n' ? 2 : 4);
+
+ // Thumb wide instructions are emitted as a pair of 16-bit words of the
+ // appropriate endianness.
+ for (unsigned II = 0, IE = Size; II != IE; II = II + 2) {
+ const unsigned I0 = LittleEndian ? II + 0 : II + 1;
+ const unsigned I1 = LittleEndian ? II + 1 : II + 0;
+ Buffer[Size - II - 2] = uint8_t(Inst >> I0 * CHAR_BIT);
+ Buffer[Size - II - 1] = uint8_t(Inst >> I1 * CHAR_BIT);
+ }
+
+ break;
+ default:
+ llvm_unreachable("Invalid Suffix");
+ }
+ getStreamer().EmitBytes(StringRef(Buffer, Size));
+}
+
// The remaining callbacks should be handled separately by each
// streamer.
void ARMTargetStreamer::emitFnStart() {}
@@ -76,7 +113,6 @@ void ARMTargetStreamer::emitArchExtension(unsigned ArchExt) {}
void ARMTargetStreamer::emitObjectArch(ARM::ArchKind Arch) {}
void ARMTargetStreamer::emitFPU(unsigned FPU) {}
void ARMTargetStreamer::finishAttributeSection() {}
-void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
void
ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 637e4a44c428..7f03e1463c1d 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -233,7 +233,7 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
// On Swift, we mostly care about hazards from multiplication instructions
// writing the accumulator and the pipelining of loop iterations by out-of-
- // order execution.
+ // order execution.
if (isSwift)
return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index a65e22fd86e8..5c745e112b2e 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -127,7 +127,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc dl;
-
+
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned BasePtr = RegInfo->getBaseRegister();
int CFAOffset = 0;
diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp
index c1515571aae5..1b412a9c6813 100644
--- a/lib/Target/AVR/AVRISelLowering.cpp
+++ b/lib/Target/AVR/AVRISelLowering.cpp
@@ -63,6 +63,13 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setOperationAction(ISD::ADDC, VT, Legal);
+ setOperationAction(ISD::SUBC, VT, Legal);
+ setOperationAction(ISD::ADDE, VT, Legal);
+ setOperationAction(ISD::SUBE, VT, Legal);
+ }
+
// sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types
// revert into a sub since we don't have an add with immediate instruction.
setOperationAction(ISD::ADD, MVT::i32, Custom);
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 4791b067aa8d..ba255d30fede 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1777,6 +1777,7 @@ namespace {
const BitTracker::RegisterCell &RC);
bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
+ bool simplifyRCmp0(MachineInstr *MI, BitTracker::RegisterRef RD);
// Cache of created instructions to avoid creating duplicates.
// XXX Currently only used by genBitSplit.
@@ -2567,6 +2568,127 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
return Changed;
}
+bool BitSimplification::simplifyRCmp0(MachineInstr *MI,
+ BitTracker::RegisterRef RD) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi)
+ return false;
+ MachineOperand &CmpOp = MI->getOperand(2);
+ if (!CmpOp.isImm() || CmpOp.getImm() != 0)
+ return false;
+
+ const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+ if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
+ return false;
+ assert(RD.Sub == 0);
+
+ MachineBasicBlock &B = *MI->getParent();
+ const DebugLoc &DL = MI->getDebugLoc();
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
+ bool KnownZ = true;
+ bool KnownNZ = false;
+
+ BitTracker::RegisterRef SR = MI->getOperand(1);
+ if (!BT.has(SR.Reg))
+ return false;
+ const BitTracker::RegisterCell &SC = BT.lookup(SR.Reg);
+ unsigned F, W;
+ if (!HBS::getSubregMask(SR, F, W, MRI))
+ return false;
+
+ for (uint16_t I = F; I != F+W; ++I) {
+ const BitTracker::BitValue &V = SC[I];
+ if (!V.is(0))
+ KnownZ = false;
+ if (V.is(1))
+ KnownNZ = true;
+ }
+
+ auto ReplaceWithConst = [&] (int C) {
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR)
+ .addImm(C);
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ BitTracker::RegisterCell NewRC(W);
+ for (uint16_t I = 0; I != W; ++I) {
+ NewRC[I] = BitTracker::BitValue(C & 1);
+ C = unsigned(C) >> 1;
+ }
+ BT.put(BitTracker::RegisterRef(NewR), NewRC);
+ return true;
+ };
+
+ auto IsNonZero = [] (const MachineOperand &Op) {
+ if (Op.isGlobal() || Op.isBlockAddress())
+ return true;
+ if (Op.isImm())
+ return Op.getImm() != 0;
+ if (Op.isCImm())
+ return !Op.getCImm()->isZero();
+ if (Op.isFPImm())
+ return !Op.getFPImm()->isZero();
+ return false;
+ };
+
+ auto IsZero = [] (const MachineOperand &Op) {
+ if (Op.isGlobal() || Op.isBlockAddress())
+ return false;
+ if (Op.isImm())
+ return Op.getImm() == 0;
+ if (Op.isCImm())
+ return Op.getCImm()->isZero();
+ if (Op.isFPImm())
+ return Op.getFPImm()->isZero();
+ return false;
+ };
+
+ // If the source register is known to be 0 or non-0, the comparison can
+ // be folded to a load of a constant.
+ if (KnownZ || KnownNZ) {
+ assert(KnownZ != KnownNZ && "Register cannot be both 0 and non-0");
+ return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi));
+ }
+
+ // Special case: if the compare comes from a C2_muxii, then we know the
+ // two possible constants that can be the source value.
+ MachineInstr *InpDef = MRI.getVRegDef(SR.Reg);
+ if (!InpDef)
+ return false;
+ if (SR.Sub == 0 && InpDef->getOpcode() == Hexagon::C2_muxii) {
+ MachineOperand &Src1 = InpDef->getOperand(2);
+ MachineOperand &Src2 = InpDef->getOperand(3);
+ // Check if both are non-zero.
+ bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2);
+ if (KnownNZ1 && KnownNZ2)
+ return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi);
+ // Check if both are zero.
+ bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2);
+ if (KnownZ1 && KnownZ2)
+ return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi);
+
+ // If for both operands we know that they are either 0 or non-0,
+ // replace the comparison with a C2_muxii, using the same predicate
+ // register, but with operands substituted with 0/1 accordingly.
+ if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) {
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR)
+ .addReg(InpDef->getOperand(1).getReg())
+ .addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi))
+ .addImm(KnownZ2 == (Opc == Hexagon::A4_rcmpeqi));
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ // Create a new cell with only the least significant bit unknown.
+ BitTracker::RegisterCell NewRC(W);
+ NewRC[0] = BitTracker::BitValue::self();
+ NewRC.fill(1, W, BitTracker::BitValue::Zero);
+ BT.put(BitTracker::RegisterRef(NewR), NewRC);
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool BitSimplification::processBlock(MachineBasicBlock &B,
const RegisterSet &AVs) {
if (!BT.reached(&B))
@@ -2615,6 +2737,7 @@ bool BitSimplification::processBlock(MachineBasicBlock &B,
T = T || genExtractHalf(MI, RD, RC);
T = T || genCombineHalf(MI, RD, RC);
T = T || genExtractLow(MI, RD, RC);
+ T = T || simplifyRCmp0(MI, RD);
Changed |= T;
continue;
}
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index e13cfd3f655a..94aacbed6af6 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -347,9 +347,11 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
return rr0(RC, Outputs);
}
case C2_tfrrp: {
- RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
- W0 = 8; // XXX Pred size
- return rr0(eINS(RC, eXTR(rc(1), 0, W0), 0), Outputs);
+ uint16_t RW = W0;
+ uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+ RegisterCell RC = RegisterCell::self(Reg[0].Reg, RW);
+ RC.fill(PW, RW, BT::BitValue::Zero);
+ return rr0(eINS(RC, eXTR(rc(1), 0, PW), 0), Outputs);
}
// Arithmetic:
@@ -950,6 +952,19 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
}
default:
+ // For instructions that define a single predicate registers, store
+ // the low 8 bits of the register only.
+ if (unsigned DefR = getUniqueDefVReg(MI)) {
+ if (MRI.getRegClass(DefR) == &Hexagon::PredRegsRegClass) {
+ BT::RegisterRef PD(DefR, 0);
+ uint16_t RW = getRegBitWidth(PD);
+ uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+ RegisterCell RC = RegisterCell::self(DefR, RW);
+ RC.fill(PW, RW, BT::BitValue::Zero);
+ putCell(PD, RC, Outputs);
+ return true;
+ }
+ }
return MachineEvaluator::evaluate(MI, Inputs, Outputs);
}
#undef im
@@ -1016,6 +1031,21 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI,
return true;
}
+unsigned HexagonEvaluator::getUniqueDefVReg(const MachineInstr &MI) const {
+ unsigned DefReg = 0;
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (DefReg != 0)
+ return 0;
+ DefReg = R;
+ }
+ return DefReg;
+}
+
bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
const CellMapType &Inputs,
CellMapType &Outputs) const {
diff --git a/lib/Target/Hexagon/HexagonBitTracker.h b/lib/Target/Hexagon/HexagonBitTracker.h
index d9dd04e1b088..f0b7c9d91950 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.h
+++ b/lib/Target/Hexagon/HexagonBitTracker.h
@@ -49,6 +49,7 @@ struct HexagonEvaluator : public BitTracker::MachineEvaluator {
const HexagonInstrInfo &TII;
private:
+ unsigned getUniqueDefVReg(const MachineInstr &MI) const;
bool evaluateLoad(const MachineInstr &MI, const CellMapType &Inputs,
CellMapType &Outputs) const;
bool evaluateFormalCopy(const MachineInstr &MI, const CellMapType &Inputs,
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index 183dee36a047..de486ec4b7bd 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -2,7 +2,7 @@
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 2acf701b43cb..ce7db657f5e9 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -7371,7 +7371,7 @@ bool MipsAsmParser::parseDirectiveGpWord() {
getParser().getStreamer().EmitGPRel32Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
- return Error(getLexer().getLoc(),
+ return Error(getLexer().getLoc(),
"unexpected token, expected end of statement");
Parser.Lex(); // Eat EndOfStatement token.
return false;
@@ -7506,7 +7506,7 @@ bool MipsAsmParser::parseDirectiveOption() {
}
// Unknown option.
- Warning(Parser.getTok().getLoc(),
+ Warning(Parser.getTok().getLoc(),
"unknown option, expected 'pic0' or 'pic2'");
Parser.eatToEndOfStatement();
return false;
@@ -8193,7 +8193,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".abicalls") {
getTargetStreamer().emitDirectiveAbiCalls();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
- Error(Parser.getTok().getLoc(),
+ Error(Parser.getTok().getLoc(),
"unexpected token, expected end of statement");
}
return false;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index fdb560f3c72f..d7f6cf91db73 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -114,7 +114,7 @@ namespace Mips {
// resulting in - R_MIPS_GOT_DISP
fixup_Mips_GOT_DISP,
- // resulting in - R_MIPS_HIGHER/R_MICROMIPS_HIGHER
+ // resulting in - R_MIPS_HIGHER/R_MICROMIPS_HIGHER
fixup_Mips_HIGHER,
fixup_MICROMIPS_HIGHER,
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 8ffc0731abcb..2e0c25de2bc8 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1094,7 +1094,7 @@ void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
// ALIGN
// B .tmpN
// 11 NOP instructions (44 bytes)
- // ADDIU T9, T9, 52
+ // ADDIU T9, T9, 52
// .tmpN
//
// We need the 44 bytes (11 instructions) because at runtime, we'd
diff --git a/lib/Target/Mips/MipsCallLowering.cpp b/lib/Target/Mips/MipsCallLowering.cpp
index e82f62260b3f..a705ebb6b193 100644
--- a/lib/Target/Mips/MipsCallLowering.cpp
+++ b/lib/Target/Mips/MipsCallLowering.cpp
@@ -418,7 +418,8 @@ void MipsCallLowering::subTargetRegTypeForCallingConv(
for (auto &Arg : Args) {
EVT VT = TLI.getValueType(DL, Arg.Ty);
- MVT RegisterVT = TLI.getRegisterTypeForCallingConv(F.getContext(), VT);
+ MVT RegisterVT = TLI.getRegisterTypeForCallingConv(F.getContext(),
+ F.getCallingConv(), VT);
ISD::ArgFlagsTy Flags = Arg.Flags;
Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL));
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index 9eb13a68e561..744523cc6cb9 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This pass is used to make Pc relative loads of constants.
-// For now, only Mips16 will use this.
+// For now, only Mips16 will use this.
//
// Loading constants inline is expensive on Mips16 and it's in general better
// to place the constant nearby in code space and then it can be loaded with a
@@ -1171,7 +1171,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
/// findAvailableWater - Look for an existing entry in the WaterList in which
/// we can place the CPE referenced from U so it's within range of U's MI.
/// Returns true if found, false if not. If it returns true, WaterIter
-/// is set to the WaterList entry.
+/// is set to the WaterList entry.
/// To ensure that this pass
/// terminates, the CPE location for a particular CPUser is only allowed to
/// move to a lower address, so search backward from the end of the list and
@@ -1231,7 +1231,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
// If the block does not end in an unconditional branch already, and if the
- // end of the block is within range, make new water there.
+ // end of the block is within range, make new water there.
if (BBHasFallthrough(UserMBB)) {
// Size of branch to insert.
unsigned Delta = 2;
@@ -1258,7 +1258,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
}
}
- // What a big block. Find a place within the block to split it.
+ // What a big block. Find a place within the block to split it.
// Try to split the block so it's fully aligned. Compute the latest split
// point where we can add a 4-byte branch instruction, and then align to
@@ -1582,7 +1582,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
MachineInstr *BMI = &MBB->back();
bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
unsigned OppositeBranchOpcode = TII->getOppositeBranchOpc(Opcode);
-
+
++NumCBrFixed;
if (BMI != MI) {
if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
@@ -1595,7 +1595,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
// bnez L2
// b L1
unsigned BMITargetOperand = branchTargetOperand(BMI);
- MachineBasicBlock *NewDest =
+ MachineBasicBlock *NewDest =
BMI->getOperand(BMITargetOperand).getMBB();
if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
LLVM_DEBUG(
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 7b39507812ed..19b30a44e86a 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -1662,7 +1662,7 @@ bool MipsFastISel::selectRet(const Instruction *I) {
return false;
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 9ffc38356b76..0677d378a115 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -111,6 +111,7 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
// The MIPS MSA ABI passes vector arguments in the integer register set.
// The number of integer registers used is dependant on the ABI used.
MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
if (VT.isVector()) {
if (Subtarget.isABI_O32()) {
@@ -123,6 +124,7 @@ MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
}
unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
if (VT.isVector())
return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)),
@@ -131,10 +133,10 @@ unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
}
unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
- LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const {
// Break down vector types to either 2 i64s or 4 i32s.
- RegisterVT = getRegisterTypeForCallingConv(Context, VT) ;
+ RegisterVT = getRegisterTypeForCallingConv(Context, CC, VT);
IntermediateVT = RegisterVT;
NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits()
? VT.getVectorNumElements()
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index b58d92c370d8..5a0de45c44f3 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -288,17 +288,18 @@ class TargetRegisterClass;
/// Return the register type for a given MVT, ensuring vectors are treated
/// as a series of gpr sized integers.
- MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
EVT VT) const override;
/// Return the number of registers for a given MVT, ensuring vectors are
/// treated as a series of gpr sized integers.
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const override;
/// Break down vectors to the correct number of gpr sized integers.
unsigned getVectorTypeBreakdownForCallingConv(
- LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const override;
/// Return the correct alignment for the current calling convention.
diff --git a/lib/Target/Mips/MipsInstructionSelector.cpp b/lib/Target/Mips/MipsInstructionSelector.cpp
index af0ac006bc9e..6c5b83021f74 100644
--- a/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -166,6 +166,33 @@ bool MipsInstructionSelector::select(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case G_GLOBAL_VALUE: {
+ if (MF.getTarget().isPositionIndependent())
+ return false;
+
+ const llvm::GlobalValue *GVal = I.getOperand(1).getGlobal();
+ unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ MachineInstr *LUi, *ADDiu;
+
+ LUi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
+ .addDef(LUiReg)
+ .addGlobalAddress(GVal);
+ LUi->getOperand(1).setTargetFlags(MipsII::MO_ABS_HI);
+
+ ADDiu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(LUiReg)
+ .addGlobalAddress(GVal);
+ ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO);
+
+ if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
+ return false;
+ if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+ }
default:
return false;
diff --git a/lib/Target/Mips/MipsLegalizerInfo.cpp b/lib/Target/Mips/MipsLegalizerInfo.cpp
index da6f9dabdaaf..fb259516be09 100644
--- a/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -36,6 +36,9 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
getActionDefinitionsBuilder(G_FRAME_INDEX)
.legalFor({p0});
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE)
+ .legalFor({p0});
+
computeTables();
verify(*ST.getInstrInfo());
}
diff --git a/lib/Target/Mips/MipsRegisterBankInfo.cpp b/lib/Target/Mips/MipsRegisterBankInfo.cpp
index cef21f447205..351135079217 100644
--- a/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -88,6 +88,7 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
case G_CONSTANT:
case G_FRAME_INDEX:
+ case G_GLOBAL_VALUE:
OperandsMapping =
getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr});
break;
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 676d702ba63e..896dd0eb0a5e 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -163,7 +163,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// HasEVA -- supports EVA ASE.
bool HasEVA;
-
+
// nomadd4 - disables generation of 4-operand madd.s, madd.d and
// related instructions.
bool DisableMadd4;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 3b042c74b26c..efe98003b1c8 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -248,7 +248,7 @@ protected:
private:
bool GlobalsEmitted;
-
+
// This is specific per MachineFunction.
const MachineRegisterInfo *MRI;
// The contents are specific for each
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index f12ed81b6d9f..ad1d7cbb52fc 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -2,7 +2,7 @@
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 10f1135ad841..5a9115f6f7f1 100644
--- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -2,7 +2,7 @@
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index ea709a73ebf2..fd7f81591426 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -175,7 +175,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O,
+ raw_ostream &O,
const char *Modifier) {
unsigned Code = MI->getOperand(OpNo).getImm();
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index f000fbb98110..351ccefa2da2 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -35,11 +35,11 @@ public:
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
-
+
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
-
+
bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
unsigned PrintMethodIdx,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 8ac461b96b88..fb7bf23509c7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -61,7 +61,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
CommentString = "#";
// Uses '.section' before '.bss' directive
- UsesELFSectionDirectiveForBSS = true;
+ UsesELFSectionDirectiveForBSS = true;
// Debug Information
SupportsDebugInformation = true;
@@ -73,7 +73,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
-
+
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
AssemblerDialect = 1; // New-Style mnemonics.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 2b948ca60028..57bda1403c62 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -102,7 +102,7 @@ public:
unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
-
+
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
@@ -138,7 +138,7 @@ public:
default:
llvm_unreachable("Invalid instruction size");
}
-
+
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
@@ -147,7 +147,7 @@ private:
void verifyInstructionPredicates(const MCInst &MI,
uint64_t AvailableFeatures) const;
};
-
+
} // end anonymous namespace
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
@@ -162,7 +162,7 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
-
+
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_br24));
@@ -212,7 +212,7 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
-
+
// Add a fixup for the immediate field.
Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
@@ -226,11 +226,11 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
// displacement and the next 5 bits as the register #.
assert(MI.getOperand(OpNo+1).isReg());
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 16;
-
+
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits;
-
+
// Add a fixup for the displacement field.
Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
@@ -244,11 +244,11 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
// displacement and the next 5 bits as the register #.
assert(MI.getOperand(OpNo+1).isReg());
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 14;
-
+
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits;
-
+
// Add a fixup for the displacement field.
Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16ds));
@@ -320,7 +320,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups, STI);
-
+
// Add a fixup for the TLS register, which simply provides a relocation
// hint to the linker that this statement is part of a relocation sequence.
// Return the thread-pointer register's encoding.
@@ -373,7 +373,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return Encode;
}
-
+
assert(MO.isImm() &&
"Relocation required in an instruction that we cannot encode!");
return MO.getImm();
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index fe7e7aeeb182..481ba3f09cc7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -58,7 +58,7 @@ namespace PPC {
PRED_BIT_SET = 1024,
PRED_BIT_UNSET = 1025
};
-
+
// Bit for branch taken (plus) or not-taken (minus) hint
enum BranchHintBit {
BR_NO_HINT = 0x0,
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index dfdec246e868..bfc613af3dc0 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -66,7 +66,7 @@ namespace llvm {
extern char &PPCVSXFMAMutateID;
namespace PPCII {
-
+
/// Target Operand Flag enum.
enum TOF {
//===------------------------------------------------------------------===//
@@ -111,7 +111,7 @@ namespace llvm {
MO_TLS = 8 << 4
};
} // end namespace PPCII
-
+
} // end namespace llvm;
#endif
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 64b8f1168beb..0d1bb9297bcb 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -130,7 +130,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
BlockSizes[MBB->getNumber()].first = BlockSize;
FuncSize += BlockSize;
}
-
+
// If the entire function is smaller than the displacement of a branch field,
// we know we don't need to shrink any branches in this function. This is a
// common case.
@@ -138,7 +138,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
BlockSizes.clear();
return false;
}
-
+
// For each conditional branch, if the offset to its destination is larger
// than the offset field allows, transform it into a long branch sequence
// like this:
@@ -153,7 +153,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
while (MadeChange) {
// Iteratively expand branches until we reach a fixed point.
MadeChange = false;
-
+
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
MachineBasicBlock &MBB = *MFI;
@@ -175,7 +175,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
MBBStartOffset += TII->getInstSizeInBytes(*I);
continue;
}
-
+
// Determine the offset from the current branch to the destination
// block.
int BranchSize;
@@ -184,7 +184,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
// start of this block to this branch, plus the sizes of all blocks
// from this block to the dest.
BranchSize = MBBStartOffset;
-
+
for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
BranchSize += BlockSizes[i].first;
} else {
@@ -213,7 +213,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
// 2. Target MBB
PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
unsigned CRReg = I->getOperand(1).getReg();
-
+
// Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
BuildMI(MBB, I, dl, TII->get(PPC::BCC))
.addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
@@ -234,7 +234,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
} else {
llvm_unreachable("Unhandled branch type!");
}
-
+
// Uncond branch to the real destination.
I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
@@ -277,7 +277,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
EverMadeChange |= MadeChange;
}
-
+
BlockSizes.clear();
return true;
}
diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp
index ed5e496b32fd..ac931f7d0ec0 100644
--- a/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -73,7 +73,7 @@ protected:
if ((*PI)->empty())
continue;
-
+
for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
if (J == (*PI)->end())
break;
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index b00655b50229..f212894035db 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1697,7 +1697,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index f0000c5bafd7..84dacf396462 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -174,7 +174,7 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
{PPC::V22, -160},
{PPC::V21, -176},
{PPC::V20, -192},
-
+
// SPE register save area (overlaps Vector save area).
{PPC::S31, -8},
{PPC::S30, -16},
@@ -1229,7 +1229,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
if (MBBI != MBB.end())
dl = MBBI->getDebugLoc();
-
+
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -1315,7 +1315,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
-
+
if (IsReturnBlock) {
unsigned RetOpcode = MBBI->getOpcode();
bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 551220466901..793a4dd7f624 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -50,7 +50,7 @@ bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
return true;
}
- return false;
+ return false;
}
bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
@@ -76,7 +76,7 @@ bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
return true;
}
- return false;
+ return false;
}
// FIXME: Remove this when we don't need this:
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 1e3e14c71144..51ff8a5cf77e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1224,6 +1224,7 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
}
unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv:: ID CC,
EVT VT) const {
if (Subtarget.hasSPE() && VT == MVT::f64)
return 2;
@@ -1231,6 +1232,7 @@ unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
}
MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv:: ID CC,
EVT VT) const {
if (Subtarget.hasSPE() && VT == MVT::f64)
return MVT::i32;
@@ -13102,8 +13104,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue
PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
- SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const {
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
// fold (sdiv X, pow2)
EVT VT = N->getValueType(0);
if (VT == MVT::i64 && !Subtarget.isPPC64())
@@ -13120,13 +13122,11 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
- if (Created)
- Created->push_back(Op.getNode());
+ Created.push_back(Op.getNode());
if (IsNegPow2) {
Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
- if (Created)
- Created->push_back(Op.getNode());
+ Created.push_back(Op.getNode());
}
return Op;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 9b8d6435515b..f174943a8004 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -665,7 +665,7 @@ namespace llvm {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const override;
+ SmallVectorImpl<SDNode *> &Created) const override;
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
@@ -872,9 +872,11 @@ namespace llvm {
MCContext &Ctx) const override;
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv:: ID CC,
EVT VT) const override;
MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv:: ID CC,
EVT VT) const override;
private:
@@ -1141,7 +1143,7 @@ namespace llvm {
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
- bool
+ bool
CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 4669719744bc..0930f7d3b8d7 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -316,11 +316,11 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
}
// For opcodes with the ReMaterializable flag set, this function is called to
-// verify the instruction is really rematable.
+// verify the instruction is really rematable.
bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
AliasAnalysis *AA) const {
switch (MI.getOpcode()) {
- default:
+ default:
// This function should only be called for opcodes with the ReMaterializable
// flag set.
llvm_unreachable("Unknown rematerializable operation!");
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 2217fa4693ce..0b57dd9b618d 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -360,7 +360,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
// generate direct offsets from both the pre-incremented and
// post-incremented pointer values. Thus, we'll pick the first non-prefetch
// instruction in each bucket, and adjust the recurrence and other offsets
- // accordingly.
+ // accordingly.
for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) {
if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr))
if (II->getIntrinsicID() == Intrinsic::prefetch)
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 62a612feb55c..e731c0bc0c23 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -75,7 +75,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
}
return Sym;
}
-
+
return Sym;
}
@@ -130,7 +130,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
// Subtract off the PIC base if required.
if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
const MachineFunction *MF = MO.getParent()->getParent()->getParent();
-
+
const MCExpr *PB = MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
Expr = MCBinaryExpr::createSub(Expr, PB, Ctx);
}
@@ -151,7 +151,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin) {
OutMI.setOpcode(MI->getOpcode());
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MCOperand MCOp;
if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP,
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index dbe1fe37ddf8..0068df19f0c8 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -891,7 +891,7 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
auto BII = BB.getFirstInstrTerminator();
// We optimize BBs ending with a conditional branch.
// We check only for BCC here, not BCCLR, because BCCLR
- // will be formed only later in the pipeline.
+ // will be formed only later in the pipeline.
if (BB.succ_size() == 2 &&
BII != BB.instr_end() &&
(*BII).getOpcode() == PPC::BCC &&
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index b14bbad2039a..8a3f50aa9565 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -29,7 +29,7 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// stored. Also used as an anchor for instructions that need to be altered
/// when using frame pointers (dyna_add, dyna_sub.)
int FramePointerSaveIndex = 0;
-
+
/// ReturnAddrSaveIndex - Frame index of where the return address is stored.
///
int ReturnAddrSaveIndex = 0;
@@ -128,7 +128,7 @@ public:
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
-
+
int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 6647ceace5eb..96923a97a82c 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -979,7 +979,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
SReg = MF.getRegInfo().createVirtualRegister(RC);
// Insert a set of rA with the full offset value before the ld, st, or add
- if (isInt<16>(Offset))
+ if (isInt<16>(Offset))
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI), SReg)
.addImm(Offset);
else {
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 226c75f704f4..b0da9b5a6d70 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -201,7 +201,7 @@ unsigned PPCTTIImpl::getUserCost(const User *U,
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, U->getType());
return LT.first * BaseT::getUserCost(U, Operands);
}
-
+
return BaseT::getUserCost(U, Operands);
}
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 1e8a1750ec3b..1be193e08c01 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -443,7 +443,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
// We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX,
// by adding special handling for narrowing copies as well as
// widening ones. However, I've experimented with this, and in
- // practice we currently do not appear to use STXSDX fed by
+ // practice we currently do not appear to use STXSDX fed by
// a narrowing copy from a full vector register. Since I can't
// generate any useful test cases, I've left this alone for now.
case PPC::STXSDX:
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index c7a5a1e8e6ee..35f52f7d279b 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -190,7 +190,7 @@ public:
Sparc::C8_C9, Sparc::C10_C11, Sparc::C12_C13, Sparc::C14_C15,
Sparc::C16_C17, Sparc::C18_C19, Sparc::C20_C21, Sparc::C22_C23,
Sparc::C24_C25, Sparc::C26_C27, Sparc::C28_C29, Sparc::C30_C31};
-
+
namespace {
/// SparcOperand - Instances of this class represent a parsed Sparc machine
@@ -459,7 +459,7 @@ public:
Op.Reg.Kind = rk_CoprocPairReg;
return true;
}
-
+
static std::unique_ptr<SparcOperand>
MorphToMEMrr(unsigned Base, std::unique_ptr<SparcOperand> Op) {
unsigned offsetReg = Op->getReg();
@@ -1000,7 +1000,7 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
RegKind = SparcOperand::rk_Special;
return true;
}
-
+
if (name.equals("wim")) {
RegNo = Sparc::WIM;
RegKind = SparcOperand::rk_Special;
@@ -1093,7 +1093,7 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
RegKind = SparcOperand::rk_CoprocReg;
return true;
}
-
+
if (name.equals("tpc")) {
RegNo = Sparc::TPC;
RegKind = SparcOperand::rk_Special;
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 8e298e8316da..3e30dae1537f 100644
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -350,18 +350,18 @@ DecodeStatus SparcDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
return MCDisassembler::Fail;
// Calling the auto-generated decoder function.
-
+
if (STI.getFeatureBits()[Sparc::FeatureV9])
{
Result = decodeInstruction(DecoderTableSparcV932, Instr, Insn, Address, this, STI);
}
else
{
- Result = decodeInstruction(DecoderTableSparcV832, Instr, Insn, Address, this, STI);
+ Result = decodeInstruction(DecoderTableSparcV832, Instr, Insn, Address, this, STI);
}
if (Result != MCDisassembler::Fail)
return Result;
-
+
Result =
decodeInstruction(DecoderTableSparc32, Instr, Insn, Address, this, STI);
@@ -662,7 +662,7 @@ static DecodeStatus DecodeTRAP(MCInst &MI, unsigned insn, uint64_t Address,
if (status != MCDisassembler::Success)
return status;
}
-
+
// Decode CC
MI.addOperand(MCOperand::createImm(cc));
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index 4981deae6af6..c1512cbdc44f 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -118,9 +118,9 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
if (MO.isImm()) {
switch (MI->getOpcode()) {
default:
- O << (int)MO.getImm();
+ O << (int)MO.getImm();
return;
-
+
case SP::TICCri: // Fall through
case SP::TICCrr: // Fall through
case SP::TRAPri: // Fall through
@@ -128,7 +128,7 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
case SP::TXCCri: // Fall through
case SP::TXCCrr: // Fall through
// Only seven-bit values up to 127.
- O << ((int) MO.getImm() & 0x7f);
+ O << ((int) MO.getImm() & 0x7f);
return;
}
}
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index 4135e4e1b61d..0cea53b359eb 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -73,7 +73,7 @@ namespace llvm {
FCC_LE = 13+16, // Less or Equal
FCC_ULE = 14+16, // Unordered or Less or Equal
FCC_O = 15+16, // Ordered
-
+
CPCC_A = 8+32, // Always
CPCC_N = 0+32, // Never
CPCC_3 = 7+32,
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index bf700d6a99d8..0cbbda787881 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -59,9 +59,9 @@ namespace llvm {
public:
SparcTargetLowering(const TargetMachine &TM, const SparcSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
-
+
bool useSoftFloat() const override;
-
+
/// computeKnownBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 6750763d8ee5..47b42444b94d 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -115,7 +115,7 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
case SPCC::FCC_UE: return SPCC::FCC_LG;
case SPCC::FCC_NE: return SPCC::FCC_E;
case SPCC::FCC_E: return SPCC::FCC_NE;
-
+
case SPCC::CPCC_A: return SPCC::CPCC_N;
case SPCC::CPCC_N: return SPCC::CPCC_A;
case SPCC::CPCC_3: LLVM_FALLTHROUGH;
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index a0d40653fd9b..07f9e7250bd9 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -100,7 +100,7 @@ SparcTargetMachine::SparcTargetMachine(
SparcTargetMachine::~SparcTargetMachine() {}
-const SparcSubtarget *
+const SparcSubtarget *
SparcTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
@@ -119,7 +119,7 @@ SparcTargetMachine::getSubtargetImpl(const Function &F) const {
F.hasFnAttribute("use-soft-float") &&
F.getFnAttribute("use-soft-float").getValueAsString() == "true";
- if (softFloat)
+ if (softFloat)
FS += FS.empty() ? "+soft-float" : ",+soft-float";
auto &I = SubtargetMap[CPU + FS];
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index d300d1d88abc..b9e5788cf018 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -55,7 +55,7 @@ getNumDecoderSlots(SUnit *SU) const {
else
return 3; // Expanded/group-alone instruction
}
-
+
return 1; // Normal instruction
}
@@ -81,6 +81,7 @@ getHazardType(SUnit *m, int Stalls) {
void SystemZHazardRecognizer::Reset() {
CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
clearProcResCounters();
GrpCount = 0;
LastFPdOpCycleIdx = UINT_MAX;
@@ -99,6 +100,12 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
if (SC->BeginGroup)
return (CurrGroupSize == 0);
+ // An instruction with 4 register operands will not fit in last slot.
+ assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
+ "Current decoder group is already full!");
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return false;
+
// Since a full group is handled immediately in EmitInstruction(),
// SU should fit into current group. NumSlots should be 1 or 0,
// since it is not a cracked or expanded instruction.
@@ -108,6 +115,23 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
return true;
}
+bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+ const MCInstrDesc &MID = MI->getDesc();
+ unsigned Count = 0;
+ for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
+ const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
+ if (RC == nullptr)
+ continue;
+ if (OpIdx >= MID.getNumDefs() &&
+ MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ continue;
+ Count++;
+ }
+ return Count >= 4;
+}
+
void SystemZHazardRecognizer::nextGroup() {
if (CurrGroupSize == 0)
return;
@@ -119,6 +143,7 @@ void SystemZHazardRecognizer::nextGroup() {
// Reset counter for next group.
CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
// Decrease counters for execution units by one.
for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
@@ -142,7 +167,7 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return;
-
+
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
@@ -172,6 +197,8 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
OS << "/EndsGroup";
if (SU->isUnbuffered)
OS << "/Unbuffered";
+ if (has4RegOps(SU->getInstr()))
+ OS << "/4RegOps";
}
void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
@@ -184,6 +211,7 @@ void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
dbgs() << "{ " << CurGroupDbg << " }";
dbgs() << " (" << CurrGroupSize << " decoder slot"
<< (CurrGroupSize > 1 ? "s":"")
+ << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
<< ")\n";
}
}
@@ -294,11 +322,14 @@ EmitInstruction(SUnit *SU) {
// Insert SU into current group by increasing number of slots used
// in current group.
CurrGroupSize += getNumDecoderSlots(SU);
- assert (CurrGroupSize <= 3);
+ CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
+ unsigned GroupLim =
+ ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3);
+ assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!");
// Check if current group is now full/ended. If so, move on to next
// group to be ready to evaluate more candidates.
- if (CurrGroupSize == 3 || SC->EndGroup)
+ if (CurrGroupSize == GroupLim || SC->EndGroup)
nextGroup();
}
@@ -306,7 +337,7 @@ int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return 0;
-
+
// If SU begins new group, it can either break a current group early
// or fit naturally if current group is empty (negative cost).
if (SC->BeginGroup) {
@@ -325,6 +356,10 @@ int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
return -1;
}
+ // An instruction with 4 register operands will not fit in last slot.
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return 1;
+
// Most instructions can be placed in any decoder slot.
return 0;
}
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.h b/lib/Target/SystemZ/SystemZHazardRecognizer.h
index 40cb3acc7009..6292feefbfea 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.h
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -45,15 +45,17 @@ namespace llvm {
/// SystemZHazardRecognizer maintains the state for one MBB during scheduling.
class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
-#ifndef NDEBUG
const SystemZInstrInfo *TII;
-#endif
const TargetSchedModel *SchedModel;
/// Keep track of the number of decoder slots used in the current
/// decoder group.
unsigned CurrGroupSize;
+ /// True if an instruction with four reg operands have been scheduled into
+ /// the current decoder group.
+ bool CurrGroupHas4RegOps;
+
/// The tracking of resources here are quite similar to the common
/// code use of a critical resource. However, z13 differs in the way
/// that it has two processor sides which may be interesting to
@@ -73,6 +75,9 @@ class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
/// Return true if MI fits into current decoder group.
bool fitsIntoCurrentGroup(SUnit *SU) const;
+ /// Return true if this instruction has four register operands.
+ bool has4RegOps(const MachineInstr *MI) const;
+
/// Two decoder groups per cycle are formed (for z13), meaning 2x3
/// instructions. This function returns a number between 0 and 5,
/// representing the current decoder slot of the current cycle. If an SU
@@ -105,11 +110,7 @@ class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
public:
SystemZHazardRecognizer(const SystemZInstrInfo *tii,
const TargetSchedModel *SM)
- :
-#ifndef NDEBUG
- TII(tii),
-#endif
- SchedModel(SM) {
+ : TII(tii), SchedModel(SM) {
Reset();
}
@@ -134,7 +135,7 @@ public:
/// new decoder group, this is negative if this fits the schedule or
/// positive if it would mean ending a group prematurely. For normal
/// instructions this returns 0.
- int groupingCost(SUnit *SU) const;
+ int groupingCost(SUnit *SU) const;
/// Return the cost of SU in regards to processor resources usage.
/// A positive value means it would be better to wait with SU, while
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 302c7883f97b..e76fa71dacd7 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -527,10 +527,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::BSWAP);
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::ROTL);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -609,7 +605,7 @@ struct AddressingMode {
// True if use of index register is supported.
bool IndexReg;
-
+
AddressingMode(bool LongDispl, bool IdxReg) :
LongDisplacement(LongDispl), IndexReg(IdxReg) {}
};
@@ -5524,76 +5520,6 @@ SDValue SystemZTargetLowering::combineBSWAP(
return SDValue();
}
-SDValue SystemZTargetLowering::combineSHIFTROT(
- SDNode *N, DAGCombinerInfo &DCI) const {
-
- SelectionDAG &DAG = DCI.DAG;
-
- // Shift/rotate instructions only use the last 6 bits of the second operand
- // register. If the second operand is the result of an AND with an immediate
- // value that has its last 6 bits set, we can safely remove the AND operation.
- //
- // If the AND operation doesn't have the last 6 bits set, we can't remove it
- // entirely, but we can still truncate it to a 16-bit value. This prevents
- // us from ending up with a NILL with a signed operand, which will cause the
- // instruction printer to abort.
- SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::AND) {
- SDValue AndMaskOp = N1->getOperand(1);
- auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
-
- // The AND mask is constant
- if (AndMask) {
- auto AmtVal = AndMask->getZExtValue();
-
- // Bottom 6 bits are set
- if ((AmtVal & 0x3f) == 0x3f) {
- SDValue AndOp = N1->getOperand(0);
-
- // This is the only use, so remove the node
- if (N1.hasOneUse()) {
- // Combine the AND away
- DCI.CombineTo(N1.getNode(), AndOp);
-
- // Return N so it isn't rechecked
- return SDValue(N, 0);
-
- // The node will be reused, so create a new node for this one use
- } else {
- SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
- N->getValueType(0), N->getOperand(0),
- AndOp);
- DCI.AddToWorklist(Replace.getNode());
-
- return Replace;
- }
-
- // We can't remove the AND, but we can use NILL here (normally we would
- // use NILF). Only keep the last 16 bits of the mask. The actual
- // transformation will be handled by .td definitions.
- } else if (AmtVal >> 16 != 0) {
- SDValue AndOp = N1->getOperand(0);
-
- auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
- SDLoc(AndMaskOp),
- AndMaskOp.getValueType());
-
- auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
- AndOp, NewMask);
-
- SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
- N->getValueType(0), N->getOperand(0),
- NewAnd);
- DCI.AddToWorklist(Replace.getNode());
-
- return Replace;
- }
- }
- }
-
- return SDValue();
-}
-
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
// We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
// set by the CCReg instruction using the CCValid / CCMask masks,
@@ -5752,10 +5678,6 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::ROTL: return combineSHIFTROT(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 0ca93a38a016..267e31a85216 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -602,7 +602,6 @@ private:
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 9d7312269957..bb5b7aae883b 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1352,8 +1352,8 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
//===----------------------------------------------------------------------===//
// Logical shift left.
-defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
-def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
+defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shiftop<shl>, GR32>;
+def SLLG : BinaryRSY<"sllg", 0xEB0D, shiftop<shl>, GR64>;
def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>;
// Arithmetic shift left.
@@ -1364,20 +1364,20 @@ let Defs = [CC] in {
}
// Logical shift right.
-defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
-def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
+defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, shiftop<srl>, GR32>;
+def SRLG : BinaryRSY<"srlg", 0xEB0C, shiftop<srl>, GR64>;
def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>;
// Arithmetic shift right.
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
- defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
- def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>;
+ defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, shiftop<sra>, GR32>;
+ def SRAG : BinaryRSY<"srag", 0xEB0A, shiftop<sra>, GR64>;
def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>;
}
// Rotate left.
-def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>;
-def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
+def RLL : BinaryRSY<"rll", 0xEB1D, shiftop<rotl>, GR32>;
+def RLLG : BinaryRSY<"rllg", 0xEB1C, shiftop<rotl>, GR64>;
// Rotate second operand left and inserted selected bits into first operand.
// These can act like 32-bit operands provided that the constant start and
@@ -2162,29 +2162,29 @@ def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
// Complexity is added so that we match this before we match NILF on the AND
// operation alone.
let AddedComplexity = 4 in {
- def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
}
// Peepholes for turning scalar operations into block operations.
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
index fcbf4c4b5fe4..98e761ef87fe 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -129,7 +129,7 @@ SystemZPostRASchedStrategy::
SystemZPostRASchedStrategy(const MachineSchedContext *C)
: MLI(C->MLI),
TII(static_cast<const SystemZInstrInfo *>
- (C->MF->getSubtarget().getInstrInfo())),
+ (C->MF->getSubtarget().getInstrInfo())),
MBB(nullptr), HazardRec(nullptr) {
const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
SchedModel.init(ST);
@@ -169,8 +169,7 @@ SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) {
return *Available.begin();
}
- // All nodes that are possible to schedule are stored by in the
- // Available set.
+ // All nodes that are possible to schedule are stored in the Available set.
LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););
Candidate Best;
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.h b/lib/Target/SystemZ/SystemZMachineScheduler.h
index cb0304825966..ab820e5d3e63 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.h
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -26,7 +26,7 @@
using namespace llvm;
namespace llvm {
-
+
/// A MachineSchedStrategy implementation for SystemZ post RA scheduling.
class SystemZPostRASchedStrategy : public MachineSchedStrategy {
@@ -37,7 +37,7 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy {
// non-scheduled instructions, so it would not always be possible to call
// DAG->getSchedClass(SU).
TargetSchedModel SchedModel;
-
+
/// A candidate during instruction evaluation.
struct Candidate {
SUnit *SU = nullptr;
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index da682cb4e5ab..7bf32bf19a4a 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -357,6 +357,7 @@ def imm32zx16 : Immediate<i32, [{
}], UIMM16, "U16Imm">;
def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
+def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
// Full 32-bit immediates. we need both signed and unsigned versions
// because the assembler is picky. E.g. AFI requires signed operands
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 3cfe23aec417..5103867e2d9a 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -697,6 +697,16 @@ class storei<SDPatternOperator operator, SDPatternOperator store = store>
: PatFrag<(ops node:$addr),
(store (operator), node:$addr)>;
+// Create a shift operator that optionally ignores an AND of the
+// shift count with an immediate if the bottom 6 bits are all set.
+def imm32bottom6set : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() & 0x3f) == 0x3f;
+}]>;
+class shiftop<SDPatternOperator operator>
+ : PatFrags<(ops node:$val, node:$count),
+ [(operator node:$val, node:$count),
+ (operator node:$val, (and node:$count, imm32bottom6set))]>;
+
// Vector representation of all-zeros and all-ones.
def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index e2a3efda5c5e..c5cdc22f2099 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -329,7 +329,7 @@ bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
}
int SystemZTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
+ unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo,
@@ -469,7 +469,7 @@ int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
assert (Tp->isVectorTy());
assert (ST->hasVector() && "getShuffleCost() called.");
unsigned NumVectors = getNumberOfParts(Tp);
-
+
// TODO: Since fp32 is expanded, the shuffle cost should always be 0.
// FP128 values are always in scalar registers, so there is no work
@@ -647,7 +647,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return Cost;
}
}
-
+
if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) {
// TODO: Fix base implementation which could simplify things a bit here
@@ -704,7 +704,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)
return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/);
-
+
if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
Src->isIntegerTy(1)) {
// This should be extension of a compare i1 result, which is done with
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 42d92622d6c8..f23ea72eb513 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the common infrastructure (including C bindings) for
+// This file implements the common infrastructure (including C bindings) for
// libLLVMTarget.a, which implements target information.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 907ecf46e8ff..6bcf60fafc3e 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -92,10 +92,10 @@ static bool IsNullTerminatedString(const Constant *C) {
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(C)) {
unsigned NumElts = CDS->getNumElements();
assert(NumElts != 0 && "Can't have an empty CDS");
-
+
if (CDS->getElementAsInteger(NumElts-1) != 0)
return false; // Not null terminated.
-
+
// Verify that the null doesn't occur anywhere else in the string.
for (unsigned i = 0; i != NumElts-1; ++i)
if (CDS->getElementAsInteger(i) == 0)
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index b84c2d31a63e..fafbed0bd935 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2603,11 +2603,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
bool HadVerifyError = false;
// Append default arguments to "ins[bwld]"
- if (Name.startswith("ins") &&
+ if (Name.startswith("ins") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
Name == "ins")) {
-
+
AddDefaultSrcDestOperands(TmpOperands,
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
DefaultMemDIOperand(NameLoc));
@@ -2615,7 +2615,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
// Append default arguments to "outs[bwld]"
- if (Name.startswith("outs") &&
+ if (Name.startswith("outs") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "outsb" || Name == "outsw" || Name == "outsl" ||
Name == "outsd" || Name == "outs")) {
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 82e82fe1efd9..0e861d5ddbc9 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -92,7 +92,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
// the hex value of the immediate operand when it isn't in the range
// [-256,255].
if (CommentStream && !HasCustomInstComment && (Imm > 255 || Imm < -256)) {
- // Don't print unnecessary hex sign bits.
+ // Don't print unnecessary hex sign bits.
if (Imm == (int16_t)(Imm))
*CommentStream << format("imm = 0x%" PRIX16 "\n", (uint16_t)Imm);
else if (Imm == (int32_t)(Imm))
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index d030f26d98de..f1d15e66918b 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -307,10 +307,84 @@ class X86MCInstrAnalysis : public MCInstrAnalysis {
public:
X86MCInstrAnalysis(const MCInstrInfo *MCII) : MCInstrAnalysis(MCII) {}
+ bool isDependencyBreaking(const MCSubtargetInfo &STI,
+ const MCInst &Inst) const override;
bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst,
APInt &Mask) const override;
};
+bool X86MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI,
+ const MCInst &Inst) const {
+ if (STI.getCPU() == "btver2") {
+ // Reference: Agner Fog's microarchitecture.pdf - Section 20 "AMD Bobcat and
+ // Jaguar pipeline", subsection 8 "Dependency-breaking instructions".
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case X86::SUB32rr:
+ case X86::SUB64rr:
+ case X86::SBB32rr:
+ case X86::SBB64rr:
+ case X86::XOR32rr:
+ case X86::XOR64rr:
+ case X86::XORPSrr:
+ case X86::XORPDrr:
+ case X86::VXORPSrr:
+ case X86::VXORPDrr:
+ case X86::ANDNPSrr:
+ case X86::VANDNPSrr:
+ case X86::ANDNPDrr:
+ case X86::VANDNPDrr:
+ case X86::PXORrr:
+ case X86::VPXORrr:
+ case X86::PANDNrr:
+ case X86::VPANDNrr:
+ case X86::PSUBBrr:
+ case X86::PSUBWrr:
+ case X86::PSUBDrr:
+ case X86::PSUBQrr:
+ case X86::VPSUBBrr:
+ case X86::VPSUBWrr:
+ case X86::VPSUBDrr:
+ case X86::VPSUBQrr:
+ case X86::PCMPEQBrr:
+ case X86::PCMPEQWrr:
+ case X86::PCMPEQDrr:
+ case X86::PCMPEQQrr:
+ case X86::VPCMPEQBrr:
+ case X86::VPCMPEQWrr:
+ case X86::VPCMPEQDrr:
+ case X86::VPCMPEQQrr:
+ case X86::PCMPGTBrr:
+ case X86::PCMPGTWrr:
+ case X86::PCMPGTDrr:
+ case X86::PCMPGTQrr:
+ case X86::VPCMPGTBrr:
+ case X86::VPCMPGTWrr:
+ case X86::VPCMPGTDrr:
+ case X86::VPCMPGTQrr:
+ case X86::MMX_PXORirr:
+ case X86::MMX_PANDNirr:
+ case X86::MMX_PSUBBirr:
+ case X86::MMX_PSUBDirr:
+ case X86::MMX_PSUBQirr:
+ case X86::MMX_PSUBWirr:
+ case X86::MMX_PCMPGTBirr:
+ case X86::MMX_PCMPGTDirr:
+ case X86::MMX_PCMPGTWirr:
+ case X86::MMX_PCMPEQBirr:
+ case X86::MMX_PCMPEQDirr:
+ case X86::MMX_PCMPEQWirr:
+ return Inst.getOperand(1).getReg() == Inst.getOperand(2).getReg();
+ case X86::CMP32rr:
+ case X86::CMP64rr:
+ return Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg();
+ }
+ }
+
+ return false;
+}
+
bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
const MCInst &Inst,
APInt &Mask) const {
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
index c49a6838fa44..d0fcbd313312 100644
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -66,7 +66,7 @@ inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
// not to split i64 and double between a register and stack
static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]);
-
+
SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
// If this is the first part of an double/i64/i128, or if we're already
diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp
index f73455cc31b8..1c5f110d8c60 100644
--- a/lib/Target/X86/X86CmovConversion.cpp
+++ b/lib/Target/X86/X86CmovConversion.cpp
@@ -622,7 +622,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
// If the CMOV group is not packed, e.g., there are debug instructions between
// first CMOV and last CMOV, then pack the group and make the CMOV instruction
- // consecutive by moving the debug instructions to after the last CMOV.
+ // consecutive by moving the debug instructions to after the last CMOV.
packCmovGroup(Group.front(), Group.back());
// To convert a CMOVcc instruction, we actually have to insert the diamond
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index de8b40f28a86..35a15577fe09 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1195,7 +1195,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+ GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -2649,7 +2649,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::VMOVPDI2DIrr), ResultReg)
.addReg(InputReg, RegState::Kill);
-
+
// The result value is in the lower 16-bits of ResultReg.
unsigned RegIdx = X86::sub_16bit;
ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
@@ -3687,7 +3687,7 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
unsigned Reg = getRegForValue(I->getOperand(0));
if (Reg == 0)
return false;
-
+
// No instruction is needed for conversion. Reuse the register used by
// the fist operand.
updateValueMap(I, Reg);
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index d85389a0a7f1..f3f7f6a37360 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -578,7 +578,7 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
continue;
if (OptLEA) {
- if (MF.getSubtarget<X86Subtarget>().isSLM())
+ if (MF.getSubtarget<X86Subtarget>().slowLEA())
processInstructionForSLM(I, MFI);
else {
diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp
index 1ba08d39c595..c17c51a7aeac 100644
--- a/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -730,9 +730,12 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
for (MachineInstr &MI :
llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode());
- if (Cond != X86::COND_INVALID && MI.getOperand(0).isReg() &&
- TRI->isVirtualRegister(MI.getOperand(0).getReg()))
+ if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() &&
+ TRI->isVirtualRegister(MI.getOperand(0).getReg())) {
+ assert(MI.getOperand(0).isDef() &&
+ "A non-storing SETcc should always define a register!");
CondRegs[Cond] = MI.getOperand(0).getReg();
+ }
// Stop scanning when we see the first definition of the EFLAGS as prior to
// this we would potentially capture the wrong flag state.
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index ae748901164a..f330acff61a1 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -347,12 +347,12 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
LiveBundle &Bundle =
LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
-
+
// In regcall convention, some FP registers may not be passed through
// the stack, so they will need to be assigned to the stack first
if ((Entry->getParent()->getFunction().getCallingConv() ==
CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
- // In the register calling convention, up to one FP argument could be
+ // In the register calling convention, up to one FP argument could be
// saved in the first FP register.
// If bundle.mask is non-zero and Bundle.FixCount is zero, it means
// that the FP registers contain arguments.
@@ -991,7 +991,7 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
// Reset the FP Stack - It is required because of possible leftovers from
- // passed arguments. The caller should assume that the FP stack is
+ // passed arguments. The caller should assume that the FP stack is
// returned empty (unless the callee returns values on FP stack).
while (StackTop > 0)
popReg();
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index a257ec41f75b..e207c343fac8 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -68,7 +68,7 @@ X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
// needsFrameIndexResolution - Do we need to perform FI resolution for
// this function. Normally, this is required only when the function
// has any stack objects. However, FI resolution actually has another job,
-// not apparent from the title - it resolves callframesetup/destroy
+// not apparent from the title - it resolves callframesetup/destroy
// that were not simplified earlier.
// So, this is required for x86 functions that have push sequences even
// when there are no stack objects.
@@ -607,8 +607,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
int64_t RCXShadowSlot = 0;
int64_t RDXShadowSlot = 0;
- // If inlining in the prolog, save RCX and RDX.
- // Future optimization: don't save or restore if not live in.
+ // If inlining in the prolog, save RCX and RDX.
if (InProlog) {
// Compute the offsets. We need to account for things already
// pushed onto the stack at this point: return address, frame
@@ -616,15 +615,30 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
const bool HasFP = hasFP(MF);
- RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
- RDXShadowSlot = RCXShadowSlot + 8;
- // Emit the saves.
- addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
- RCXShadowSlot)
- .addReg(X86::RCX);
- addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
- RDXShadowSlot)
- .addReg(X86::RDX);
+
+ // Check if we need to spill RCX and/or RDX.
+ // Here we assume that no earlier prologue instruction changes RCX and/or
+ // RDX, so checking the block live-ins is enough.
+ const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
+ const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
+ int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
+ // Assign the initial slot to both registers, then change RDX's slot if both
+ // need to be spilled.
+ if (IsRCXLiveIn)
+ RCXShadowSlot = InitSlot;
+ if (IsRDXLiveIn)
+ RDXShadowSlot = InitSlot;
+ if (IsRDXLiveIn && IsRCXLiveIn)
+ RDXShadowSlot += 8;
+ // Emit the saves if needed.
+ if (IsRCXLiveIn)
+ addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+ RCXShadowSlot)
+ .addReg(X86::RCX);
+ if (IsRDXLiveIn)
+ addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+ RDXShadowSlot)
+ .addReg(X86::RDX);
} else {
// Not in the prolog. Copy RAX to a virtual reg.
BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
@@ -661,6 +675,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB);
// Add code to roundMBB to round the final stack pointer to a page boundary.
+ RoundMBB->addLiveIn(FinalReg);
BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
.addReg(FinalReg)
.addImm(PageMask);
@@ -677,6 +692,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
.addMBB(LoopMBB);
}
+ LoopMBB->addLiveIn(JoinReg);
addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
false, -PageSize);
@@ -688,6 +704,8 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
.addImm(0)
.addReg(0)
.addImm(0);
+
+ LoopMBB->addLiveIn(RoundedReg);
BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
.addReg(RoundedReg)
.addReg(ProbeReg);
@@ -697,16 +715,19 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
// If in prolog, restore RDX and RCX.
if (InProlog) {
- addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
- X86::RCX),
- X86::RSP, false, RCXShadowSlot);
- addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
- X86::RDX),
- X86::RSP, false, RDXShadowSlot);
+ if (RCXShadowSlot) // It means we spilled RCX in the prologue.
+ addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
+ TII.get(X86::MOV64rm), X86::RCX),
+ X86::RSP, false, RCXShadowSlot);
+ if (RDXShadowSlot) // It means we spilled RDX in the prologue.
+ addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
+ TII.get(X86::MOV64rm), X86::RDX),
+ X86::RSP, false, RDXShadowSlot);
}
// Now that the probing is done, add code to continueMBB to update
// the stack pointer for real.
+ ContinueMBB->addLiveIn(SizeReg);
BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
.addReg(X86::RSP)
.addReg(SizeReg);
@@ -734,8 +755,6 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
CMBBI->setFlag(MachineInstr::FrameSetup);
}
}
-
- // Possible TODO: physreg liveness for InProlog case.
}
void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
@@ -2694,7 +2713,7 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
Regs[FoundRegs++] = Regs[0];
for (int i = 0; i < NumPops; ++i)
- BuildMI(MBB, MBBI, DL,
+ BuildMI(MBB, MBBI, DL,
TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
return true;
@@ -2984,7 +3003,7 @@ struct X86FrameSortingComparator {
// in general. Something to keep in mind, though.
if (DensityAScaled == DensityBScaled)
return A.ObjectAlignment < B.ObjectAlignment;
-
+
return DensityAScaled < DensityBScaled;
}
};
@@ -3020,7 +3039,7 @@ void X86FrameLowering::orderFrameObjects(
if (ObjectSize == 0)
// Variable size. Just use 4.
SortingObjects[Obj].ObjectSize = 4;
- else
+ else
SortingObjects[Obj].ObjectSize = ObjectSize;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7dcdb7967058..2820004cfc6d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1800,17 +1800,19 @@ X86TargetLowering::getPreferredVectorAction(EVT VT) const {
}
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return MVT::v32i8;
- return TargetLowering::getRegisterTypeForCallingConv(Context, VT);
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const {
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return 1;
- return TargetLowering::getNumRegistersForCallingConv(Context, VT);
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
@@ -23366,7 +23368,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
return DAG.getBuildVector(VT, dl, Elts);
}
- // If the target doesn't support variable shifts, use either FP conversion
+ // If the target doesn't support variable shifts, use either FP conversion
// or integer multiplication to avoid shifting each element individually.
if (VT == MVT::v4i32) {
Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT));
@@ -23509,6 +23511,24 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
+ // Constant ISD::SRL can be performed efficiently on vXi8/vXi16 vectors as we
+ // can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt).
+ // TODO: Improve support for the shift by zero special case.
+ if (Op.getOpcode() == ISD::SRL && ConstantAmt &&
+ ((Subtarget.hasSSE41() && VT == MVT::v8i16) ||
+ DAG.isKnownNeverZero(Amt)) &&
+ (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ ((VT == MVT::v32i8 || VT == MVT::v16i16) && Subtarget.hasInt256()))) {
+ SDValue EltBits = DAG.getConstant(VT.getScalarSizeInBits(), dl, VT);
+ SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
+ if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ);
+ SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale);
+ return DAG.getSelect(dl, VT, ZAmt, R, Res);
+ }
+ }
+
// v4i32 Non Uniform Shifts.
// If the shift amount is constant we can shift each lane using the SSE2
// immediate shifts, else we need to zero-extend each lane to the lower i64
@@ -33425,33 +33445,32 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
}
}
- // Handle (CMOV C-1, (ADD (CTTZ X), C), (X != 0)) ->
- // (ADD (CMOV (CTTZ X), -1, (X != 0)), C) or
- // (CMOV (ADD (CTTZ X), C), C-1, (X == 0)) ->
- // (ADD (CMOV C-1, (CTTZ X), (X == 0)), C)
- if (CC == X86::COND_NE || CC == X86::COND_E) {
- auto *Cnst = CC == X86::COND_E ? dyn_cast<ConstantSDNode>(TrueOp)
- : dyn_cast<ConstantSDNode>(FalseOp);
- SDValue Add = CC == X86::COND_E ? FalseOp : TrueOp;
-
- if (Cnst && Add.getOpcode() == ISD::ADD && Add.hasOneUse()) {
- auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1));
- SDValue AddOp2 = Add.getOperand(0);
- if (AddOp1 && (AddOp2.getOpcode() == ISD::CTTZ_ZERO_UNDEF ||
- AddOp2.getOpcode() == ISD::CTTZ)) {
- APInt Diff = Cnst->getAPIntValue() - AddOp1->getAPIntValue();
- if (CC == X86::COND_E) {
- Add = DAG.getNode(X86ISD::CMOV, DL, Add.getValueType(), AddOp2,
- DAG.getConstant(Diff, DL, Add.getValueType()),
- DAG.getConstant(CC, DL, MVT::i8), Cond);
- } else {
- Add = DAG.getNode(X86ISD::CMOV, DL, Add.getValueType(),
- DAG.getConstant(Diff, DL, Add.getValueType()),
- AddOp2, DAG.getConstant(CC, DL, MVT::i8), Cond);
- }
- return DAG.getNode(X86ISD::ADD, DL, Add.getValueType(), Add,
- SDValue(AddOp1, 0));
- }
+ // Fold (CMOV C1, (ADD (CTTZ X), C2), (X != 0)) ->
+ // (ADD (CMOV C1-C2, (CTTZ X), (X != 0)), C2)
+ // Or (CMOV (ADD (CTTZ X), C2), C1, (X == 0)) ->
+ // (ADD (CMOV (CTTZ X), C1-C2, (X == 0)), C2)
+ if ((CC == X86::COND_NE || CC == X86::COND_E) &&
+ Cond.getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1))) {
+ SDValue Add = TrueOp;
+ SDValue Const = FalseOp;
+ // Canonicalize the condition code for easier matching and output.
+ if (CC == X86::COND_E) {
+ std::swap(Add, Const);
+ CC = X86::COND_NE;
+ }
+
+ // Ok, now make sure that Add is (add (cttz X), C2) and Const is a constant.
+ if (isa<ConstantSDNode>(Const) && Add.getOpcode() == ISD::ADD &&
+ Add.hasOneUse() && isa<ConstantSDNode>(Add.getOperand(1)) &&
+ (Add.getOperand(0).getOpcode() == ISD::CTTZ_ZERO_UNDEF ||
+ Add.getOperand(0).getOpcode() == ISD::CTTZ) &&
+ Add.getOperand(0).getOperand(0) == Cond.getOperand(0)) {
+ EVT VT = N->getValueType(0);
+ // This should constant fold.
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1));
+ SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0),
+ DAG.getConstant(CC, DL, MVT::i8), Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1));
}
}
@@ -33873,31 +33892,42 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C)
return SDValue();
- uint64_t MulAmt = C->getZExtValue();
- if (isPowerOf2_64(MulAmt))
+ if (isPowerOf2_64(C->getZExtValue()))
return SDValue();
+ int64_t SignMulAmt = C->getSExtValue();
+ assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
+ uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
+
SDLoc DL(N);
- if (MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
- return DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
- N->getOperand(1));
+ if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) {
+ SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(AbsMulAmt, DL, VT));
+ if (SignMulAmt < 0)
+ NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ NewMul);
+
+ return NewMul;
+ }
uint64_t MulAmt1 = 0;
uint64_t MulAmt2 = 0;
- if ((MulAmt % 9) == 0) {
+ if ((AbsMulAmt % 9) == 0) {
MulAmt1 = 9;
- MulAmt2 = MulAmt / 9;
- } else if ((MulAmt % 5) == 0) {
+ MulAmt2 = AbsMulAmt / 9;
+ } else if ((AbsMulAmt % 5) == 0) {
MulAmt1 = 5;
- MulAmt2 = MulAmt / 5;
- } else if ((MulAmt % 3) == 0) {
+ MulAmt2 = AbsMulAmt / 5;
+ } else if ((AbsMulAmt % 3) == 0) {
MulAmt1 = 3;
- MulAmt2 = MulAmt / 3;
+ MulAmt2 = AbsMulAmt / 3;
}
SDValue NewMul;
+ // For negative multiply amounts, only allow MulAmt2 to be a power of 2.
if (MulAmt2 &&
- (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
+ (isPowerOf2_64(MulAmt2) ||
+ (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) {
if (isPowerOf2_64(MulAmt2) &&
!(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
@@ -33919,17 +33949,19 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, DL, VT));
+
+ // Negate the result.
+ if (SignMulAmt < 0)
+ NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ NewMul);
} else if (!Subtarget.slowLEA())
- NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);
+ NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
if (!NewMul) {
- assert(MulAmt != 0 &&
- MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
+ assert(C->getZExtValue() != 0 &&
+ C->getZExtValue() != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
"Both cases that could cause potential overflows should have "
"already been handled.");
- int64_t SignMulAmt = C->getSExtValue();
- assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
- uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
if (isPowerOf2_64(AbsMulAmt - 1)) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
NewMul = DAG.getNode(
@@ -36738,6 +36770,145 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
+// Attempt to match PMADDUBSW, which multiplies corresponding unsigned bytes
+// from one vector with signed bytes from another vector, adds together
+// adjacent pairs of 16-bit products, and saturates the result before
+// truncating to 16-bits.
+//
+// Which looks something like this:
+// (i16 (ssat (add (mul (zext (even elts (i8 A))), (sext (even elts (i8 B)))),
+// (mul (zext (odd elts (i8 A)), (sext (odd elts (i8 B))))))))
+static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ const SDLoc &DL) {
+ if (!VT.isVector() || !Subtarget.hasSSSE3())
+ return SDValue();
+
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT ScalarVT = VT.getVectorElementType();
+ if (ScalarVT != MVT::i16 || NumElems < 8 || !isPowerOf2_32(NumElems))
+ return SDValue();
+
+ SDValue SSatVal = detectSSatPattern(In, VT);
+ if (!SSatVal || SSatVal.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // Ok this is a signed saturation of an ADD. See if this ADD is adding pairs
+ // of multiplies from even/odd elements.
+ SDValue N0 = SSatVal.getOperand(0);
+ SDValue N1 = SSatVal.getOperand(1);
+
+ if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+
+ // TODO: Handle constant vectors and use knownbits/computenumsignbits?
+ // Canonicalize zero_extend to LHS.
+ if (N01.getOpcode() == ISD::ZERO_EXTEND)
+ std::swap(N00, N01);
+ if (N11.getOpcode() == ISD::ZERO_EXTEND)
+ std::swap(N10, N11);
+
+ // Ensure we have a zero_extend and a sign_extend.
+ if (N00.getOpcode() != ISD::ZERO_EXTEND ||
+ N01.getOpcode() != ISD::SIGN_EXTEND ||
+ N10.getOpcode() != ISD::ZERO_EXTEND ||
+ N11.getOpcode() != ISD::SIGN_EXTEND)
+ return SDValue();
+
+ // Peek through the extends.
+ N00 = N00.getOperand(0);
+ N01 = N01.getOperand(0);
+ N10 = N10.getOperand(0);
+ N11 = N11.getOperand(0);
+
+ // Ensure the extend is from vXi8.
+ if (N00.getValueType().getVectorElementType() != MVT::i8 ||
+ N01.getValueType().getVectorElementType() != MVT::i8 ||
+ N10.getValueType().getVectorElementType() != MVT::i8 ||
+ N11.getValueType().getVectorElementType() != MVT::i8)
+ return SDValue();
+
+ // All inputs should be build_vectors.
+ if (N00.getOpcode() != ISD::BUILD_VECTOR ||
+ N01.getOpcode() != ISD::BUILD_VECTOR ||
+ N10.getOpcode() != ISD::BUILD_VECTOR ||
+ N11.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // N00/N10 are zero extended. N01/N11 are sign extended.
+
+ // For each element, we need to ensure we have an odd element from one vector
+ // multiplied by the odd element of another vector and the even element from
+ // one of the same vectors being multiplied by the even element from the
+ // other vector. So we need to make sure for each element i, this operator
+ // is being performed:
+ // A[2 * i] * B[2 * i] + A[2 * i + 1] * B[2 * i + 1]
+ SDValue ZExtIn, SExtIn;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue N00Elt = N00.getOperand(i);
+ SDValue N01Elt = N01.getOperand(i);
+ SDValue N10Elt = N10.getOperand(i);
+ SDValue N11Elt = N11.getOperand(i);
+ // TODO: Be more tolerant to undefs.
+ if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+ auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1));
+ auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1));
+ auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1));
+ auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1));
+ if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt)
+ return SDValue();
+ unsigned IdxN00 = ConstN00Elt->getZExtValue();
+ unsigned IdxN01 = ConstN01Elt->getZExtValue();
+ unsigned IdxN10 = ConstN10Elt->getZExtValue();
+ unsigned IdxN11 = ConstN11Elt->getZExtValue();
+ // Add is commutative so indices can be reordered.
+ if (IdxN00 > IdxN10) {
+ std::swap(IdxN00, IdxN10);
+ std::swap(IdxN01, IdxN11);
+ }
+ // N0 indices be the even element. N1 indices must be the next odd element.
+ if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 ||
+ IdxN01 != 2 * i || IdxN11 != 2 * i + 1)
+ return SDValue();
+ SDValue N00In = N00Elt.getOperand(0);
+ SDValue N01In = N01Elt.getOperand(0);
+ SDValue N10In = N10Elt.getOperand(0);
+ SDValue N11In = N11Elt.getOperand(0);
+ // First time we find an input capture it.
+ if (!ZExtIn) {
+ ZExtIn = N00In;
+ SExtIn = N01In;
+ }
+ if (ZExtIn != N00In || SExtIn != N01In ||
+ ZExtIn != N10In || SExtIn != N11In)
+ return SDValue();
+ }
+
+ auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ // Shrink by adding truncate nodes and let DAGCombine fold with the
+ // sources.
+ EVT InVT = Ops[0].getValueType();
+ assert(InVT.getScalarType() == MVT::i8 &&
+ "Unexpected scalar element type");
+ assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
+ InVT.getVectorNumElements() / 2);
+ return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]);
+ };
+ return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn },
+ PMADDBuilder);
+}
+
static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
@@ -36752,6 +36923,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
+ // Try to detect PMADD
+ if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL))
+ return PMAdd;
+
// Try to combine truncation with signed/unsigned saturation.
if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget))
return Val;
@@ -36793,38 +36968,14 @@ static SDValue isFNEG(SDNode *N) {
if (!Op1.getValueType().isFloatingPoint())
return SDValue();
- SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
-
- unsigned EltBits = Op1.getScalarValueSizeInBits();
- auto isSignMask = [&](const ConstantFP *C) {
- return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits);
- };
-
- // There is more than one way to represent the same constant on
- // the different X86 targets. The type of the node may also depend on size.
- // - load scalar value and broadcast
- // - BUILD_VECTOR node
- // - load from a constant pool.
- // We check all variants here.
- if (Op1.getOpcode() == X86ISD::VBROADCAST) {
- if (auto *C = getTargetConstantFromNode(Op1.getOperand(0)))
- if (isSignMask(cast<ConstantFP>(C)))
- return Op0;
-
- } else if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1)) {
- if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode())
- if (isSignMask(CN->getConstantFPValue()))
- return Op0;
+ // Extract constant bits and see if they are all sign bit masks.
+ APInt UndefElts;
+ SmallVector<APInt, 16> EltBits;
+ if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(),
+ UndefElts, EltBits, false, false))
+ if (llvm::all_of(EltBits, [](APInt &I) { return I.isSignMask(); }))
+ return peekThroughBitcasts(Op.getOperand(0));
- } else if (auto *C = getTargetConstantFromNode(Op1)) {
- if (C->getType()->isVectorTy()) {
- if (auto *SplatV = C->getSplatValue())
- if (isSignMask(cast<ConstantFP>(SplatV)))
- return Op0;
- } else if (auto *FPConst = dyn_cast<ConstantFP>(C))
- if (isSignMask(FPConst))
- return Op0;
- }
return SDValue();
}
@@ -37777,8 +37928,7 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
// Look through extract_vector_elts. If it comes from an FNEG, create a
// new extract from the FNEG input.
if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(V.getOperand(1)) &&
- cast<ConstantSDNode>(V.getOperand(1))->getZExtValue() == 0) {
+ isNullConstant(V.getOperand(1))) {
if (SDValue NegVal = isFNEG(V.getOperand(0).getNode())) {
NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal);
V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(),
@@ -38896,7 +39046,7 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
std::swap(IdxN00, IdxN10);
std::swap(IdxN01, IdxN11);
}
- // N0 indices be the even elemtn. N1 indices must be the next odd element.
+ // N0 indices be the even element. N1 indices must be the next odd element.
if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 ||
IdxN01 != 2 * i || IdxN11 != 2 * i + 1)
return SDValue();
@@ -39322,8 +39472,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
- auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
- if (Idx2 && Idx2->getZExtValue() == 0) {
+ if (isNullConstant(Vec.getOperand(2))) {
SDValue SubVec2 = Vec.getOperand(1);
// If needed, look through bitcasts to get to the load.
if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 32215b170a8c..ff5006d208e5 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1097,10 +1097,11 @@ namespace llvm {
/// Customize the preferred legalization strategy for certain types.
LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
- MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
EVT VT) const override;
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
EVT VT) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
@@ -1125,8 +1126,8 @@ namespace llvm {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
- SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
- SDValue Addr, SelectionDAG &DAG)
+ SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
+ SDValue Addr, SelectionDAG &DAG)
const override;
protected:
diff --git a/lib/Target/X86/X86InstrFoldTables.cpp b/lib/Target/X86/X86InstrFoldTables.cpp
index 5d8400595bfa..7d31cfab4137 100644
--- a/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/lib/Target/X86/X86InstrFoldTables.cpp
@@ -1576,7 +1576,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::SUBSDrr_Int, X86::SUBSDrm_Int, TB_NO_REVERSE },
{ X86::SUBSSrr, X86::SUBSSrm, 0 },
{ X86::SUBSSrr_Int, X86::SUBSSrm_Int, TB_NO_REVERSE },
- // FIXME: TEST*rr -> swapped operand of TEST *mr.
+ // FIXME: TEST*rr -> swapped operand of TEST *mr.
{ X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
{ X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
{ X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 1b61accfb42b..96db8b4e7585 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -7725,7 +7725,7 @@ X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
if (C.CallConstructionID == MachineOutlinerTailCall) {
// Yes, just insert a JMP.
It = MBB.insert(It,
- BuildMI(MF, DebugLoc(), get(X86::JMP_1))
+ BuildMI(MF, DebugLoc(), get(X86::TAILJMPd64))
.addGlobalAddress(M.getNamedValue(MF.getName())));
} else {
// No, insert a call.
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 7509b312c100..bc7afd32d494 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1750,7 +1750,7 @@ def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags
// Bit tests instructions: BT, BTS, BTR, BTC.
let Defs = [EFLAGS] in {
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTest] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>,
@@ -1783,7 +1783,7 @@ let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
[]>, TB, NotMemoryFoldable;
}
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTest] in {
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
@@ -1818,7 +1818,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
} // SchedRW
let hasSideEffects = 0 in {
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1842,7 +1842,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
@@ -1861,7 +1861,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1885,7 +1885,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB;
@@ -1908,7 +1908,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1932,7 +1932,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index ee3b01159174..023137634df1 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -650,9 +650,9 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
// Double shift instructions (generalizations of rotate)
//===----------------------------------------------------------------------===//
-let Constraints = "$src1 = $dst", SchedRW = [WriteShiftDouble] in {
+let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteSHDrrcl] in {
def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
@@ -683,9 +683,9 @@ def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
TB;
-}
+} // SchedRW
-let isCommutable = 1 in { // These instructions commute to each other.
+let isCommutable = 1, SchedRW = [WriteSHDrri] in { // These instructions commute to each other.
def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
(outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, u8imm:$src3),
@@ -728,11 +728,10 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
(i8 imm:$src3)))]>,
TB;
-}
-} // Constraints = "$src = $dst", SchedRW
+} // SchedRW
+} // Constraints = "$src = $dst"
-let SchedRW = [WriteShiftDoubleLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteSHDmrcl] in {
def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
@@ -759,8 +758,9 @@ def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
addr:$dst)]>, TB;
-}
+} // SchedRW
+let SchedRW = [WriteSHDmri] in {
def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td
index c7713fea70fa..6334d9e89a60 100755
--- a/lib/Target/X86/X86SchedBroadwell.td
+++ b/lib/Target/X86/X86SchedBroadwell.td
@@ -119,8 +119,8 @@ defm : BWWriteResPair<WriteIDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteIDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteIDiv64, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteBSWAP32,[BWPort15], 1>; //
-defm : BWWriteResPair<WriteBSWAP64,[BWPort06, BWPort15], 2, [1, 1], 2>; //
+defm : X86WriteRes<WriteBSWAP32, [BWPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [BWPort06, BWPort15], 2, [1, 1], 2>;
defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
@@ -137,6 +137,7 @@ def : WriteRes<WriteSETCCStore, [BWPort06,BWPort4,BWPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [BWPort06]>;
+def : WriteRes<WriteBitTest,[BWPort06]>; // Bit Test instrs
// Bit counts.
defm : BWWriteResPair<WriteBSF, [BWPort1], 3>;
@@ -148,8 +149,11 @@ defm : BWWriteResPair<WritePOPCNT, [BWPort1], 3>;
// Integer shifts and rotates.
defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
-// Double shift instructions.
-defm : BWWriteResPair<WriteShiftDouble, [BWPort06], 1>;
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [BWPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[BWPort1,BWPort06,BWPort0156], 6, [1, 1, 2], 4>;
+defm : X86WriteRes<WriteSHDmri, [BWPort1,BWPort23,BWPort237,BWPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156], 11, [1, 1, 1, 1, 2], 6>;
// BMI1 BEXTR, BMI2 BZHI
defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>;
@@ -600,14 +604,6 @@ def BWWriteResGroup6 : SchedWriteRes<[BWPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[BWWriteResGroup6], (instrs CDQ, CQO)>;
-def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> {
let Latency = 1;
@@ -746,8 +742,6 @@ def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> {
def: InstRW<[BWWriteResGroup27], (instregex "MMX_CVTPI2PSirr",
"PDEP(32|64)rr",
"PEXT(32|64)rr",
- "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8",
"(V?)CVTDQ2PS(Y?)rr")>;
def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> {
@@ -1055,14 +1049,6 @@ def BWWriteResGroup66 : SchedWriteRes<[BWPort23,BWPort0156]> {
def: InstRW<[BWWriteResGroup66], (instrs POP16r, POP32r, POP64r)>;
def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)rmr")>;
-def BWWriteResGroup67 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[BWWriteResGroup67], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def BWWriteResGroup68 : SchedWriteRes<[BWPort1,BWPort6,BWPort06,BWPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
@@ -1307,14 +1293,6 @@ def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> {
def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTB(Y?)rm",
"VPBROADCASTW(Y?)rm")>;
-def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[BWWriteResGroup111], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def BWWriteResGroup112 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
@@ -1380,14 +1358,6 @@ def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
}
def: InstRW<[BWWriteResGroup128], (instregex "VCVTDQ2PDYrm")>;
-def BWWriteResGroup130 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156]> {
- let Latency = 11;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,1,2];
-}
-def: InstRW<[BWWriteResGroup130], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 189dd4183839..876c3e4162cf 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -118,17 +118,26 @@ defm : X86WriteRes<WriteLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteMove, [HWPort0156], 1, [1], 1>;
def : WriteRes<WriteZero, []>;
+// Arithmetic.
defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
-defm : HWWriteResPair<WriteADC, [HWPort06,HWPort0156], 2, [1,1], 2>;
+defm : HWWriteResPair<WriteADC, [HWPort06, HWPort0156], 2, [1,1], 2>;
defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul64, [HWPort1], 3>;
-defm : HWWriteResPair<WriteBSWAP32,[HWPort15], 1>;
-defm : HWWriteResPair<WriteBSWAP64,[HWPort06, HWPort15], 2, [1,1], 2>;
+defm : X86WriteRes<WriteBSWAP32, [HWPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [HWPort06, HWPort15], 2, [1,1], 2>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+
+// Integer shifts and rotates.
defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
-defm : HWWriteResPair<WriteShiftDouble, [HWPort06], 1>;
+
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [HWPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[HWPort1, HWPort06, HWPort0156], 6, [1, 1, 2], 4>;
+defm : X86WriteRes<WriteSHDmri, [HWPort1, HWPort23, HWPort237, HWPort0156], 10, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[HWPort1, HWPort23, HWPort237, HWPort06, HWPort0156], 12, [1, 1, 1, 1, 2], 6>;
+
defm : HWWriteResPair<WriteJump, [HWPort06], 1>;
defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>;
@@ -141,6 +150,7 @@ def : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [HWPort06]>;
+def : WriteRes<WriteBitTest,[HWPort06]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -886,14 +896,6 @@ def HWWriteResGroup7 : SchedWriteRes<[HWPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[HWWriteResGroup7], (instrs CDQ, CQO)>;
-def: InstRW<[HWWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> {
let Latency = 1;
@@ -1240,8 +1242,6 @@ def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPI2PSirr",
"PDEP(32|64)rr",
"PEXT(32|64)rr",
- "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8",
"(V?)CVTDQ2PS(Y?)rr")>;
def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> {
@@ -1513,14 +1513,6 @@ def HWWriteResGroup83 : SchedWriteRes<[HWPort1,HWPort6,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup83], (instregex "LAR(16|32|64)rr")>;
-def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> {
- let Latency = 10;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup86], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def HWWriteResGroup87 : SchedWriteRes<[HWPort1,HWPort6,HWPort23,HWPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
@@ -1638,14 +1630,6 @@ def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
}
def: InstRW<[HWWriteResGroup104], (instregex "VCVTDQ2PDYrm")>;
-def HWWriteResGroup105 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[HWWriteResGroup105], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
@@ -1660,14 +1644,6 @@ def HWWriteResGroup108 : SchedWriteRes<[HWPort6,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup108], (instrs STD)>;
-def HWWriteResGroup109 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort0156]> {
- let Latency = 12;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,1,2];
-}
-def: InstRW<[HWWriteResGroup109], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def HWWriteResGroup114 : SchedWriteRes<[HWPort6,HWPort06,HWPort15,HWPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index 3b543c680ef4..6b7bbdea860a 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -106,13 +106,14 @@ def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
def : WriteRes<WriteMove, [SBPort015]>;
def : WriteRes<WriteZero, []>;
+// Arithmetic.
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
defm : SBWriteResPair<WriteADC, [SBPort05,SBPort015], 2, [1,1], 2>;
defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul64, [SBPort1], 3>;
-defm : SBWriteResPair<WriteBSWAP32,[SBPort1], 1>;
-defm : SBWriteResPair<WriteBSWAP64,[SBPort1,SBPort05], 2, [1,1], 2>;
+defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SBPort1,SBPort05], 2, [1,1], 2>;
defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
@@ -125,8 +126,13 @@ defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>;
+defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>;
+defm : X86WriteRes<WriteSHDmri, [SBPort4,SBPort23,SBPort05,SBPort015], 8, [1, 2, 1, 1], 5>;
+defm : X86WriteRes<WriteSHDmrcl,[SBPort4,SBPort23,SBPort05,SBPort015], 10, [1, 2, 3, 1], 7>;
+
defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
-defm : SBWriteResPair<WriteShiftDouble, [SBPort05], 1>;
defm : SBWriteResPair<WriteJump, [SBPort5], 1>;
defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>;
@@ -139,6 +145,7 @@ def : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SBPort05]>;
+def : WriteRes<WriteBitTest,[SBPort05]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -564,14 +571,6 @@ def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
let ResourceCycles = [1];
}
def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>;
-def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
let Latency = 1;
@@ -630,14 +629,6 @@ def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ)>;
def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>;
-def SBWriteResGroup19 : SchedWriteRes<[SBPort05,SBPort015]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup19], (instregex "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8")>;
-
def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
let Latency = 3;
let NumMicroOps = 1;
@@ -728,14 +719,6 @@ def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
}
def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>;
-def SBWriteResGroup29_3 : SchedWriteRes<[SBPort05,SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [3,1];
-}
-def: InstRW<[SBWriteResGroup29_3], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
let Latency = 5;
let NumMicroOps = 1;
@@ -1027,14 +1010,6 @@ def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
}
def: InstRW<[SBWriteResGroup87], (instrs FARCALL64)>;
-def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 9;
let NumMicroOps = 3;
@@ -1130,14 +1105,6 @@ def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
def: InstRW<[SBWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m")>;
-def SBWriteResGroup103_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
- let Latency = 10;
- let NumMicroOps = 7;
- let ResourceCycles = [1,2,3,1];
-}
-def: InstRW<[SBWriteResGroup103_2], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 11;
let NumMicroOps = 2;
diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td
index 1417799d76be..bda088e1512f 100644
--- a/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/lib/Target/X86/X86SchedSkylakeClient.td
@@ -110,8 +110,8 @@ defm : SKLWriteResPair<WriteADC, [SKLPort06], 1>; // Integer ALU + flags op
defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication.
defm : SKLWriteResPair<WriteIMul64, [SKLPort1], 3>; // Integer 64-bit multiplication.
-defm : SKLWriteResPair<WriteBSWAP32,[SKLPort15], 1>; //
-defm : SKLWriteResPair<WriteBSWAP64,[SKLPort06, SKLPort15], 2, [1,1], 2>; //
+defm : X86WriteRes<WriteBSWAP32, [SKLPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SKLPort06, SKLPort15], 2, [1,1], 2>;
defm : SKLWriteResPair<WriteDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
defm : SKLWriteResPair<WriteDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
@@ -136,6 +136,7 @@ def : WriteRes<WriteSETCCStore, [SKLPort06,SKLPort4,SKLPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SKLPort06]>;
+def : WriteRes<WriteBitTest,[SKLPort06]>; //
// Bit counts.
defm : SKLWriteResPair<WriteBSF, [SKLPort1], 3>;
@@ -147,8 +148,11 @@ defm : SKLWriteResPair<WritePOPCNT, [SKLPort1], 3>;
// Integer shifts and rotates.
defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>;
-// Double shift instructions.
-defm : SKLWriteResPair<WriteShiftDouble, [SKLPort06], 1>;
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [SKLPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[SKLPort1,SKLPort06,SKLPort0156], 6, [1, 2, 1], 4>;
+defm : X86WriteRes<WriteSHDmri, [SKLPort1,SKLPort23,SKLPort237,SKLPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156], 11, [1, 1, 1, 2, 1], 6>;
// BMI1 BEXTR, BMI2 BZHI
defm : SKLWriteResPair<WriteBEXTR, [SKLPort06,SKLPort15], 2, [1,1], 2>;
@@ -602,14 +606,6 @@ def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
-def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> {
let Latency = 1;
@@ -743,9 +739,7 @@ def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> {
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr",
- "PEXT(32|64)rr",
- "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8")>;
+ "PEXT(32|64)rr")>;
def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> {
let Latency = 4;
@@ -1096,14 +1090,6 @@ def SKLWriteResGroup78 : SchedWriteRes<[SKLPort5,SKLPort01]> {
}
def: InstRW<[SKLWriteResGroup78], (instregex "(V?)CVTSI642SSrr")>;
-def SKLWriteResGroup79 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SKLWriteResGroup79], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def SKLWriteResGroup80 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06,SKLPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
@@ -1392,14 +1378,6 @@ def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
def: InstRW<[SKLWriteResGroup128], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
-def SKLWriteResGroup130 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKLWriteResGroup130], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def SKLWriteResGroup131 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
@@ -1519,14 +1497,6 @@ def: InstRW<[SKLWriteResGroup152], (instregex "CVTPD2PSrm",
"CVT(T?)PD2DQrm",
"MMX_CVT(T?)PD2PIirm")>;
-def SKLWriteResGroup153 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
- let Latency = 11;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,2,1];
-}
-def: InstRW<[SKLWriteResGroup153], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td
index 7095ec081bd9..9d5f8555c505 100755
--- a/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/lib/Target/X86/X86SchedSkylakeServer.td
@@ -110,8 +110,8 @@ defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op
defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication.
defm : SKXWriteResPair<WriteIMul64, [SKXPort1], 3>; // Integer 64-bit multiplication.
-defm : SKXWriteResPair<WriteBSWAP32,[SKXPort15], 1>; //
-defm : SKXWriteResPair<WriteBSWAP64,[SKXPort06, SKXPort15], 2, [1,1], 2>; //
+defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>;
defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
defm : SKXWriteResPair<WriteDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
@@ -136,12 +136,16 @@ def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SKXPort06]>;
+def : WriteRes<WriteBitTest,[SKXPort06]>; //
// Integer shifts and rotates.
defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
-// Double shift instructions.
-defm : SKXWriteResPair<WriteShiftDouble, [SKXPort06], 1>;
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[SKXPort1,SKXPort06,SKXPort0156], 6, [1, 2, 1], 4>;
+defm : X86WriteRes<WriteSHDmri, [SKXPort1,SKXPort23,SKXPort237,SKXPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156], 11, [1, 1, 1, 2, 1], 6>;
// Bit counts.
defm : SKXWriteResPair<WriteBSF, [SKXPort1], 3>;
@@ -615,14 +619,6 @@ def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
-def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> {
let Latency = 1;
@@ -783,9 +779,7 @@ def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> {
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr",
- "PEXT(32|64)rr",
- "SHLD(16|32|64)rri8",
- "SHRD(16|32|64)rri8")>;
+ "PEXT(32|64)rr")>;
def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> {
let Latency = 4;
@@ -1270,14 +1264,6 @@ def: InstRW<[SKXWriteResGroup82], (instregex "(V?)CVTSI642SSrr",
"VCVTSI642SSZrr",
"VCVTUSI642SSZrr")>;
-def SKXWriteResGroup83 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,2,1];
-}
-def: InstRW<[SKXWriteResGroup83], (instregex "SHLD(16|32|64)rrCL",
- "SHRD(16|32|64)rrCL")>;
-
def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
@@ -1830,14 +1816,6 @@ def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
-def SKXWriteResGroup145 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKXWriteResGroup145], (instregex "SHLD(16|32|64)mri8",
- "SHRD(16|32|64)mri8")>;
-
def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> {
let Latency = 9;
let NumMicroOps = 5;
@@ -2033,14 +2011,6 @@ def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
}
def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>;
-def SKXWriteResGroup168 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
- let Latency = 11;
- let NumMicroOps = 6;
- let ResourceCycles = [1,1,1,2,1];
-}
-def: InstRW<[SKXWriteResGroup168], (instregex "SHLD(16|32|64)mrCL",
- "SHRD(16|32|64)mrCL")>;
-
def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index d0167753ccd4..ef9ce94706df 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -118,8 +118,8 @@ defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
def WriteIMulH : SchedWrite; // Integer multiplication, high part.
def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
-defm WriteBSWAP32: X86SchedWritePair; // Byte Order (Endiannes) Swap
-defm WriteBSWAP64: X86SchedWritePair; // Byte Order (Endiannes) Swap
+def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
+def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
// Integer division.
defm WriteDiv8 : X86SchedWritePair;
@@ -142,11 +142,15 @@ def WriteFCMOV : SchedWrite; // X87 conditional move.
def WriteSETCC : SchedWrite; // Set register based on condition code.
def WriteSETCCStore : SchedWrite;
def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
+def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support
// Integer shifts and rotates.
defm WriteShift : X86SchedWritePair;
// Double shift instructions.
-defm WriteShiftDouble : X86SchedWritePair;
+def WriteSHDrri : SchedWrite;
+def WriteSHDrrcl : SchedWrite;
+def WriteSHDmri : SchedWrite;
+def WriteSHDmrcl : SchedWrite;
// BMI1 BEXTR, BMI2 BZHI
defm WriteBEXTR : X86SchedWritePair;
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index d1e902e6c43f..a7f461c456bd 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -81,8 +81,8 @@ defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
-defm : AtomWriteResPair<WriteBSWAP32, [AtomPort0], [AtomPort0]>;
-defm : AtomWriteResPair<WriteBSWAP64, [AtomPort0], [AtomPort0]>;
+defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [AtomPort0], 1, [1], 1>;
defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
@@ -108,6 +108,7 @@ def : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
let Latency = 2;
let ResourceCycles = [2];
}
+def : WriteRes<WriteBitTest,[AtomPort01]>;
defm : X86WriteResUnsupported<WriteIMulH>;
@@ -150,11 +151,10 @@ defm : X86WriteResPairUnsupported<WriteBZHI>;
defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
-////////////////////////////////////////////////////////////////////////////////
-// Double shift instructions.
-////////////////////////////////////////////////////////////////////////////////
-
-defm : AtomWriteResPair<WriteShiftDouble, [AtomPort0], [AtomPort0]>;
+defm : X86WriteRes<WriteSHDrri, [AtomPort01], 2, [2], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[AtomPort01], 2, [2], 1>;
+defm : X86WriteRes<WriteSHDmri, [AtomPort01], 4, [4], 1>;
+defm : X86WriteRes<WriteSHDmrcl,[AtomPort01], 4, [4], 1>;
////////////////////////////////////////////////////////////////////////////////
// Loads, stores, and moves, not folded with other operations.
@@ -562,9 +562,7 @@ def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
PUSH16rmm, PUSH32rmm, PUSH64rmm,
LODSB, LODSL, LODSQ, LODSW,
- SCASB, SCASL, SCASQ, SCASW,
- SHLD32rrCL, SHRD32rrCL,
- SHLD32rri8, SHRD32rri8)>;
+ SCASB, SCASL, SCASQ, SCASW)>;
def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
"PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
"XADD(8|16|32|64)rr",
@@ -598,8 +596,6 @@ def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> {
}
def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO,
JCXZ, JECXZ, JRCXZ,
- SHLD32mrCL, SHRD32mrCL,
- SHLD32mri8, SHRD32mri8,
LD_F80m)>;
def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm",
"(MMX_)?PEXTRWrr(_REV)?")>;
diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td
index d78c343ebd5c..719e71cd25e5 100644
--- a/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/lib/Target/X86/X86ScheduleBtVer2.td
@@ -168,8 +168,8 @@ defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32
defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
-defm : JWriteResIntPair<WriteBSWAP32,[JALU01], 1>;
-defm : JWriteResIntPair<WriteBSWAP64,[JALU01], 1>;
+defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
@@ -188,6 +188,7 @@ defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional m
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
def : WriteRes<WriteLAHFSAHF, [JALU01]>;
+def : WriteRes<WriteBitTest,[JALU01]>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [JALU01]>;
@@ -209,33 +210,11 @@ defm : X86WriteResPairUnsupported<WriteBZHI>;
defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
-defm : JWriteResIntPair<WriteShiftDouble, [JALU01], 1>;
-
-def JWriteSHLDrri : SchedWriteRes<[JALU01]> {
- let Latency = 3;
- let ResourceCycles = [6];
- let NumMicroOps = 6;
-}
-def: InstRW<[JWriteSHLDrri], (instrs SHLD16rri8, SHLD32rri8, SHLD64rri8,
- SHRD16rri8, SHRD32rri8, SHRD64rri8)>;
-
-def JWriteSHLDrrCL : SchedWriteRes<[JALU01]> {
- let Latency = 4;
- let ResourceCycles = [8];
- let NumMicroOps = 7;
-}
-def: InstRW<[JWriteSHLDrrCL], (instrs SHLD16rrCL, SHLD32rrCL, SHLD64rrCL,
- SHRD16rrCL, SHRD32rrCL, SHRD64rrCL)>;
-
-def JWriteSHLDm : SchedWriteRes<[JLAGU, JALU01]> {
- let Latency = 9;
- let ResourceCycles = [1, 22];
- let NumMicroOps = 8;
-}
-def: InstRW<[JWriteSHLDm],(instrs SHLD16mri8, SHLD32mri8, SHLD64mri8,
- SHLD16mrCL, SHLD32mrCL, SHLD64mrCL,
- SHRD16mri8, SHRD32mri8, SHRD64mri8,
- SHRD16mrCL, SHRD32mrCL, SHRD64mrCL)>;
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [JALU01], 3, [6], 6>;
+defm : X86WriteRes<WriteSHDrrcl,[JALU01], 4, [8], 7>;
+defm : X86WriteRes<WriteSHDmri, [JLAGU, JALU01], 9, [1, 22], 8>;
+defm : X86WriteRes<WriteSHDmrcl,[JLAGU, JALU01], 9, [1, 22], 8>;
////////////////////////////////////////////////////////////////////////////////
// Loads, stores, and moves, not folded with other operations.
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
index c938a4a8939e..b1e843013707 100644
--- a/lib/Target/X86/X86ScheduleSLM.td
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -98,11 +98,16 @@ defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteBSWAP32,[SLM_IEC_RSV01], 1>;
-defm : SLMWriteResPair<WriteBSWAP64,[SLM_IEC_RSV01], 1>;
+defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
-defm : SLMWriteResPair<WriteShiftDouble, [SLM_IEC_RSV0], 1>;
+
+defm : X86WriteRes<WriteSHDrri, [SLM_IEC_RSV0], 1, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[SLM_IEC_RSV0], 1, [1], 1>;
+defm : X86WriteRes<WriteSHDmri, [SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
+defm : X86WriteRes<WriteSHDmrcl,[SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
+
defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>;
@@ -115,6 +120,7 @@ def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
let ResourceCycles = [2,1];
}
def : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
+def : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td
index d28d58580752..7184b850a195 100644
--- a/lib/Target/X86/X86ScheduleZnver1.td
+++ b/lib/Target/X86/X86ScheduleZnver1.td
@@ -180,11 +180,16 @@ defm : ZnWriteResPair<WriteADC, [ZnALU], 1>;
defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>;
defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
-defm : ZnWriteResPair<WriteBSWAP32,[ZnALU], 1, [4]>;
-defm : ZnWriteResPair<WriteBSWAP64,[ZnALU], 1, [4]>;
+defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>;
+defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>;
defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
-defm : ZnWriteResPair<WriteShiftDouble, [ZnALU], 1>;
+
+defm : X86WriteRes<WriteSHDrri, [ZnALU], 1, [1], 1>;
+defm : X86WriteResUnsupported<WriteSHDrrcl>;
+defm : X86WriteResUnsupported<WriteSHDmri>;
+defm : X86WriteResUnsupported<WriteSHDmrcl>;
+
defm : ZnWriteResPair<WriteJump, [ZnALU], 1>;
defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>;
@@ -193,6 +198,7 @@ defm : ZnWriteResPair<WriteCMOV2, [ZnALU], 1>;
def : WriteRes<WriteSETCC, [ZnALU]>;
def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>;
defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
+def : WriteRes<WriteBitTest,[ZnALU]>;
// Bit counts.
defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index fedb13f89e19..85e8256a6e94 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -51,7 +51,7 @@ enum Style {
} // end namespace PICStyles
class X86Subtarget final : public X86GenSubtargetInfo {
-public:
+public:
enum X86ProcFamilyEnum {
Others,
IntelAtom,
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index bae2ef80c365..865462622627 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2274,8 +2274,8 @@ int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
// Sign-extend all constants to a multiple of 64-bit.
APInt ImmVal = Imm;
- if (BitSize & 0x3f)
- ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
+ if (BitSize % 64 != 0)
+ ImmVal = Imm.sext(alignTo(BitSize, 64));
// Split the constant into 64-bit chunks and calculate the cost for each
// chunk.
@@ -2332,9 +2332,15 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
// immediates here as the normal path expects bit 31 to be sign extended.
if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue()))
return TTI::TCC_Free;
- LLVM_FALLTHROUGH;
+ ImmIdx = 1;
+ break;
case Instruction::Add:
case Instruction::Sub:
+ // For add/sub, we can use the opposite instruction for INT32_MIN.
+ if (Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000)
+ return TTI::TCC_Free;
+ ImmIdx = 1;
+ break;
case Instruction::Mul:
case Instruction::UDiv:
case Instruction::SDiv:
@@ -2366,7 +2372,7 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
}
if (Idx == ImmIdx) {
- int NumConstants = (BitSize + 63) / 64;
+ int NumConstants = divideCeil(BitSize, 64);
int Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<int>(TTI::TCC_Free)
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 8f7c8a82380a..916bca6392de 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -146,7 +146,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
}
EmitAlignment(Align > 2 ? Align : 2, GV);
-
+
if (GV->isThreadLocal()) {
report_fatal_error("TLS is not supported by this target!");
}
@@ -162,7 +162,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// are padded to 32 bits.
if (Size < 4)
OutStreamer->EmitZeros(4 - Size);
-
+
// Mark the end of the global
getTargetStreamer().emitCCBottomData(GVSym->getName());
}
@@ -295,6 +295,6 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
// Force static initialization.
-extern "C" void LLVMInitializeXCoreAsmPrinter() {
+extern "C" void LLVMInitializeXCoreAsmPrinter() {
RegisterAsmPrinter<XCoreAsmPrinter> X(getTheXCoreTarget());
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index d5e276788f71..b0de048672df 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -63,7 +63,7 @@ static bool isZeroImm(const MachineOperand &op) {
unsigned XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
int Opcode = MI.getOpcode();
- if (Opcode == XCore::LDWFI)
+ if (Opcode == XCore::LDWFI)
{
if ((MI.getOperand(1).isFI()) && // is a stack slot
(MI.getOperand(2).isImm()) && // the imm is zero
@@ -74,7 +74,7 @@ unsigned XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
}
return 0;
}
-
+
/// isStoreToStackSlot - If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
/// the source reg along with the FrameIndex of the loaded stack slot. If
@@ -129,9 +129,9 @@ static inline bool IsBR_JT(unsigned BrOpc) {
|| BrOpc == XCore::BR_JT32;
}
-/// GetCondFromBranchOpc - Return the XCore CC that matches
+/// GetCondFromBranchOpc - Return the XCore CC that matches
/// the correspondent Branch instruction opcode.
-static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
+static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
{
if (IsBRT(BrOpc)) {
return XCore::COND_TRUE;
@@ -144,7 +144,7 @@ static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
/// GetCondBranchFromCond - Return the Branch instruction
/// opcode that matches the cc.
-static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
+static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
{
switch (CC) {
default: llvm_unreachable("Illegal condition code!");
@@ -153,7 +153,7 @@ static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
}
}
-/// GetOppositeBranchCondition - Return the inverse of the specified
+/// GetOppositeBranchCondition - Return the inverse of the specified
/// condition, e.g. turning COND_E to COND_NE.
static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
{
@@ -209,11 +209,11 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
TBB = LastInst->getOperand(0).getMBB();
return false;
}
-
+
XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
if (BranchCode == XCore::COND_INVALID)
return true; // Can't handle indirect branch.
-
+
// Conditional branch
// Block ends with fall-through condbranch.
@@ -222,17 +222,17 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
Cond.push_back(LastInst->getOperand(0));
return false;
}
-
+
// Get the instruction before it if it's a terminator.
MachineInstr *SecondLastInst = &*I;
// If there are three terminators, we don't know what sort of block this is.
if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
return true;
-
+
unsigned SecondLastOpc = SecondLastInst->getOpcode();
XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
-
+
// If the block ends with conditional branch followed by unconditional,
// handle it.
if (BranchCode != XCore::COND_INVALID
@@ -245,10 +245,10 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
FBB = LastInst->getOperand(0).getMBB();
return false;
}
-
+
// If the block ends with two unconditional branches, handle it. The second
// one is not executed, so remove it.
- if (IsBRU(SecondLastInst->getOpcode()) &&
+ if (IsBRU(SecondLastInst->getOpcode()) &&
IsBRU(LastInst->getOpcode())) {
TBB = SecondLastInst->getOperand(0).getMBB();
I = LastInst;
@@ -293,7 +293,7 @@ unsigned XCoreInstrInfo::insertBranch(MachineBasicBlock &MBB,
}
return 1;
}
-
+
// Two-way Conditional branch.
assert(Cond.size() == 2 && "Unexpected number of components!");
unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
@@ -313,17 +313,17 @@ XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const {
if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
return 0;
-
+
// Remove the branch.
I->eraseFromParent();
-
+
I = MBB.end();
if (I == MBB.begin()) return 1;
--I;
if (!IsCondBranch(I->getOpcode()))
return 1;
-
+
// Remove the branch.
I->eraseFromParent();
return 2;
@@ -342,7 +342,7 @@ void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(0);
return;
}
-
+
if (GRDest && SrcReg == XCore::SP) {
BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0);
return;
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index cf469ec3cf1a..6c05ab3f10df 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -43,11 +43,11 @@ class XCoreFunctionInfo : public MachineFunctionInfo {
public:
XCoreFunctionInfo() = default;
-
+
explicit XCoreFunctionInfo(MachineFunction &MF) {}
-
+
~XCoreFunctionInfo() override = default;
-
+
void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 1915aaedc35d..e119d9555f9d 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -296,12 +296,12 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// fold constant into offset.
Offset += MI.getOperand(FIOperandNum + 1).getImm();
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
-
+
assert(Offset%4 == 0 && "Misaligned stack offset");
LLVM_DEBUG(errs() << "Offset : " << Offset << "\n"
<< "<--------->\n");
Offset/=4;
-
+
unsigned Reg = MI.getOperand(0).getReg();
assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand");
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index c31f5d5a7c44..9451a05d8d58 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -32,7 +32,7 @@ public:
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
-
+
bool enableMultipleCopyHints() const override { return true; }
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 140ddba68aab..ed9936ebf2b8 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -43,7 +43,7 @@ public:
XCoreSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM);
- /// ParseSubtargetFeatures - Parses features string setting specified
+ /// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 31e771da3bd3..cd2bd734eb26 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -56,7 +56,7 @@ using namespace llvm;
STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
-STATISTIC(NumArgumentsReplacedWithUndef,
+STATISTIC(NumArgumentsReplacedWithUndef,
"Number of unread args replaced with undef");
namespace {
@@ -109,7 +109,7 @@ namespace {
char DAH::ID = 0;
-INITIALIZE_PASS(DAH, "deadarghaX0r",
+INITIALIZE_PASS(DAH, "deadarghaX0r",
"Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
false, false)
@@ -256,7 +256,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
return true;
}
-/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
+/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
/// arguments that are unused, and changes the caller parameters to be undefined
/// instead.
bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
@@ -640,7 +640,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
Result = Live;
} else {
// See what the effect of this use is (recording any uses that cause
- // MaybeLive in MaybeLiveArgUses).
+ // MaybeLive in MaybeLiveArgUses).
Result = SurveyUses(&*AI, MaybeLiveArgUses);
}
@@ -777,7 +777,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// argument.
// 2) Retain the 'returned' attribute and treat the return value (but not the
// entire function) as live so that it is not eliminated.
- //
+ //
// It's not clear in the general case which option is more profitable because,
// even in the absence of explicit uses of the return value, code generation
// is free to use the 'returned' attribute to do things like eliding
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 2797da6c0abd..010b0a29807d 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -617,7 +617,7 @@ static bool addArgumentAttrsFromCallsites(Function &F) {
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
break;
}
-
+
return Changed;
}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 1af7e6894777..1761d7faff57 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -357,6 +357,41 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
return Changed;
}
+static bool isSafeSROAElementUse(Value *V);
+
+/// Return true if the specified GEP is a safe user of a derived
+/// expression from a global that we want to SROA.
+static bool isSafeSROAGEP(User *U) {
+ // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
+ // don't like < 3 operand CE's, and we don't like non-constant integer
+ // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
+ // value of C.
+ if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
+ !cast<Constant>(U->getOperand(1))->isNullValue())
+ return false;
+
+ gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
+ ++GEPI; // Skip over the pointer index.
+
+ // For all other level we require that the indices are constant and inrange.
+ // In particular, consider: A[0][i]. We cannot know that the user isn't doing
+ // invalid things like allowing i to index an out-of-range subscript that
+ // accesses A[1]. This can also happen between different members of a struct
+ // in llvm IR.
+ for (; GEPI != E; ++GEPI) {
+ if (GEPI.isStruct())
+ continue;
+
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
+ if (!IdxVal || (GEPI.isBoundedSequential() &&
+ IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
+ return false;
+ }
+
+ return llvm::all_of(U->users(),
+ [](User *UU) { return isSafeSROAElementUse(UU); });
+}
+
/// Return true if the specified instruction is a safe user of a derived
/// expression from a global that we want to SROA.
static bool isSafeSROAElementUse(Value *V) {
@@ -374,84 +409,25 @@ static bool isSafeSROAElementUse(Value *V) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getOperand(0) != V;
- // Otherwise, it must be a GEP.
- GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
- if (!GEPI) return false;
-
- if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
- !cast<Constant>(GEPI->getOperand(1))->isNullValue())
- return false;
-
- for (User *U : GEPI->users())
- if (!isSafeSROAElementUse(U))
- return false;
- return true;
-}
-
-/// U is a direct user of the specified global value. Look at it and its uses
-/// and decide whether it is safe to SROA this global.
-static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
- // The user of the global must be a GEP Inst or a ConstantExpr GEP.
- if (!isa<GetElementPtrInst>(U) &&
- (!isa<ConstantExpr>(U) ||
- cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
- return false;
-
- // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
- // don't like < 3 operand CE's, and we don't like non-constant integer
- // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
- // value of C.
- if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
- !cast<Constant>(U->getOperand(1))->isNullValue() ||
- !isa<ConstantInt>(U->getOperand(2)))
- return false;
-
- gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
- ++GEPI; // Skip over the pointer index.
-
- // If this is a use of an array allocation, do a bit more checking for sanity.
- if (GEPI.isSequential()) {
- ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
-
- // Check to make sure that index falls within the array. If not,
- // something funny is going on, so we won't do the optimization.
- //
- if (GEPI.isBoundedSequential() &&
- Idx->getZExtValue() >= GEPI.getSequentialNumElements())
- return false;
-
- // We cannot scalar repl this level of the array unless any array
- // sub-indices are in-range constants. In particular, consider:
- // A[0][i]. We cannot know that the user isn't doing invalid things like
- // allowing i to index an out-of-range subscript that accesses A[1].
- //
- // Scalar replacing *just* the outer index of the array is probably not
- // going to be a win anyway, so just give up.
- for (++GEPI; // Skip array index.
- GEPI != E;
- ++GEPI) {
- if (GEPI.isStruct())
- continue;
-
- ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
- if (!IdxVal ||
- (GEPI.isBoundedSequential() &&
- IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
- return false;
- }
- }
-
- return llvm::all_of(U->users(),
- [](User *UU) { return isSafeSROAElementUse(UU); });
+ // Otherwise, it must be a GEP. Check it and its users are safe to SRA.
+ return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I);
}
/// Look at all uses of the global and decide whether it is safe for us to
/// perform this transformation.
static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
- for (User *U : GV->users())
- if (!IsUserOfGlobalSafeForSRA(U, GV))
+ for (User *U : GV->users()) {
+ // The user of the global must be a GEP Inst or a ConstantExpr GEP.
+ if (!isa<GetElementPtrInst>(U) &&
+ (!isa<ConstantExpr>(U) ||
+ cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
return false;
+ // Check the gep and it's users are safe to SRA
+ if (!isSafeSROAGEP(U))
+ return false;
+ }
+
return true;
}
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index f79b61037f1d..7d55ebecbf92 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -61,12 +61,12 @@ static bool PropagateConstantsIntoArguments(Function &F) {
User *UR = U.getUser();
// Ignore blockaddress uses.
if (isa<BlockAddress>(UR)) continue;
-
+
// Used by a non-instruction, or not the callee of a function, do not
// transform.
if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR))
return false;
-
+
CallSite CS(cast<Instruction>(UR));
if (!CS.isCallee(&U))
return false;
@@ -77,11 +77,11 @@ static bool PropagateConstantsIntoArguments(Function &F) {
Function::arg_iterator Arg = F.arg_begin();
for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
++i, ++AI, ++Arg) {
-
+
// If this argument is known non-constant, ignore it.
if (ArgumentConstants[i].second)
continue;
-
+
Constant *C = dyn_cast<Constant>(*AI);
if (C && ArgumentConstants[i].first == nullptr) {
ArgumentConstants[i].first = C; // First constant seen.
@@ -108,7 +108,7 @@ static bool PropagateConstantsIntoArguments(Function &F) {
if (ArgumentConstants[i].second || AI->use_empty() ||
AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory()))
continue;
-
+
Value *V = ArgumentConstants[i].first;
if (!V) V = UndefValue::get(AI->getType());
AI->replaceAllUsesWith(V);
@@ -147,7 +147,7 @@ static bool PropagateConstantReturn(Function &F) {
SmallVector<Value *,4> RetVals;
StructType *STy = dyn_cast<StructType>(F.getReturnType());
if (STy)
- for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
+ for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
RetVals.push_back(UndefValue::get(STy->getElementType(i)));
else
RetVals.push_back(UndefValue::get(F.getReturnType()));
@@ -172,7 +172,7 @@ static bool PropagateConstantReturn(Function &F) {
// Ignore undefs, we can change them into anything
if (isa<UndefValue>(V))
continue;
-
+
// Try to see if all the rets return the same constant or argument.
if (isa<Constant>(V) || isa<Argument>(V)) {
if (isa<UndefValue>(RV)) {
@@ -206,7 +206,7 @@ static bool PropagateConstantReturn(Function &F) {
// directly?
if (!Call || !CS.isCallee(&U))
continue;
-
+
// Call result not used?
if (Call->use_empty())
continue;
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 139941127dee..3bebb96c6d35 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -27,7 +27,7 @@
// -- We define Function* container class with custom "operator<" (FunctionPtr).
// -- "FunctionPtr" instances are stored in std::set collection, so every
// std::set::insert operation will give you result in log(N) time.
-//
+//
// As an optimization, a hash of the function structure is calculated first, and
// two functions are only compared if they have the same hash. This hash is
// cheap to compute, and has the property that if function F == G according to
@@ -383,7 +383,7 @@ bool MergeFunctions::runOnModule(Module &M) {
for (Function &Func : M) {
if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
- }
+ }
}
std::stable_sort(
@@ -402,7 +402,7 @@ bool MergeFunctions::runOnModule(Module &M) {
Deferred.push_back(WeakTrackingVH(I->second));
}
}
-
+
do {
std::vector<WeakTrackingVH> Worklist;
Deferred.swap(Worklist);
@@ -802,11 +802,11 @@ void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
Function *F = FN.getFunc();
assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
"The two functions must be equal");
-
+
auto I = FNodesInTree.find(F);
assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
-
+
FnTreeType::iterator IterToFNInFnTree = I->second;
assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
// Remove F -> FN and insert G -> FN
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 27d791857314..2be654258aa8 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -77,13 +77,13 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
// Next, check to see if any callees might throw or if there are any external
// functions in this SCC: if so, we cannot prune any functions in this SCC.
- // Definitions that are weak and not declared non-throwing might be
+ // Definitions that are weak and not declared non-throwing might be
// overridden at linktime with something that throws, so assume that.
// If this SCC includes the unwind instruction, we KNOW it throws, so
// obviously the SCC might throw.
//
bool SCCMightUnwind = false, SCCMightReturn = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();
(!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
Function *F = (*I)->getFunction();
if (!F) {
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index aa31e0d850dd..83054588a9aa 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -926,7 +926,13 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add))
return NV;
- Value *X;
+ Value *X, *Y;
+
+ // add (sub X, Y), -1 --> add (not Y), X
+ if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) &&
+ match(Op1, m_AllOnes()))
+ return BinaryOperator::CreateAdd(Builder.CreateNot(Y), X);
+
// zext(bool) + C -> bool ? C + 1 : C
if (match(Op0, m_ZExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
@@ -1608,6 +1614,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (match(Op0, m_Not(m_Value(X))) && match(Op1, m_Not(m_Value(Y))))
return BinaryOperator::CreateSub(Y, X);
+ // (X + -1) - Y --> ~Y + X
+ if (match(Op0, m_OneUse(m_Add(m_Value(X), m_AllOnes()))))
+ return BinaryOperator::CreateAdd(Builder.CreateNot(Op1), X);
+
+ // Y - (X + 1) --> ~X + Y
+ if (match(Op1, m_OneUse(m_Add(m_Value(X), m_One()))))
+ return BinaryOperator::CreateAdd(Builder.CreateNot(X), Op0);
+
if (Constant *C = dyn_cast<Constant>(Op0)) {
bool IsNegate = match(C, m_ZeroInt());
Value *X;
@@ -1858,7 +1872,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
Constant *C;
if (match(Op1, m_Constant(C)) && !isa<ConstantExpr>(Op1))
return BinaryOperator::CreateFAddFMF(Op0, ConstantExpr::getFNeg(C), &I);
-
+
// X - (-Y) --> X + Y
if (match(Op1, m_FNeg(m_Value(Y))))
return BinaryOperator::CreateFAddFMF(Op0, Y, &I);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 372bc41f780e..3d758e2fe7c9 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1550,31 +1550,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return DeMorgan;
{
- Value *A = nullptr, *B = nullptr, *C = nullptr;
- // A&(A^B) => A & ~B
- {
- Value *tmpOp0 = Op0;
- Value *tmpOp1 = Op1;
- if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) {
- if (A == Op1 || B == Op1 ) {
- tmpOp1 = Op0;
- tmpOp0 = Op1;
- // Simplify below
- }
- }
-
- if (match(tmpOp1, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) {
- if (B == tmpOp0) {
- std::swap(A, B);
- }
- // Notice that the pattern (A&(~B)) is actually (A&(-1^B)), so if
- // A is originally -1 (or a vector of -1 and undefs), then we enter
- // an endless loop. By checking that A is non-constant we ensure that
- // we will never get to the loop.
- if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
- return BinaryOperator::CreateAnd(A, Builder.CreateNot(B));
- }
- }
+ Value *A, *B, *C;
+ // A & (A ^ B) --> A & ~B
+ if (match(Op1, m_OneUse(m_c_Xor(m_Specific(Op0), m_Value(B)))))
+ return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(B));
+ // (A ^ B) & A --> A & ~B
+ if (match(Op0, m_OneUse(m_c_Xor(m_Specific(Op1), m_Value(B)))))
+ return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(B));
// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index e8ea7396a96a..fd59c3a7c0c3 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2243,6 +2243,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
Type *DstElTy = DstPTy->getElementType();
Type *SrcElTy = SrcPTy->getElementType();
+ // Casting pointers between the same type, but with different address spaces
+ // is an addrspace cast rather than a bitcast.
+ if ((DstElTy == SrcElTy) &&
+ (DstPTy->getAddressSpace() != SrcPTy->getAddressSpace()))
+ return new AddrSpaceCastInst(Src, DestTy);
+
// If we are casting a alloca to a pointer to a type of the same
// size, rewrite the allocation instruction to allocate the "right" type.
// There is no need to modify malloc calls because it is their bitcast that
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 742caf649007..62769f077b47 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -518,7 +518,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
"can't fold an atomic store of requested type");
-
+
Value *Ptr = SI.getPointerOperand();
unsigned AS = SI.getPointerAddressSpace();
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 4867808478a3..796b4021d273 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -54,6 +54,36 @@ static Value *createMinMax(InstCombiner::BuilderTy &Builder,
return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
}
+/// Fold
+/// %A = icmp eq/ne i8 %x, 0
+/// %B = op i8 %x, %z
+/// %C = select i1 %A, i8 %B, i8 %y
+/// To
+/// %C = select i1 %A, i8 %z, i8 %y
+/// OP: binop with an identity constant
+/// TODO: support for non-commutative and FP opcodes
+static Instruction *foldSelectBinOpIdentity(SelectInst &Sel) {
+
+ Value *Cond = Sel.getCondition();
+ Value *X, *Z;
+ Constant *C;
+ CmpInst::Predicate Pred;
+ if (!match(Cond, m_ICmp(Pred, m_Value(X), m_Constant(C))) ||
+ !ICmpInst::isEquality(Pred))
+ return nullptr;
+
+ bool IsEq = Pred == ICmpInst::ICMP_EQ;
+ auto *BO =
+ dyn_cast<BinaryOperator>(IsEq ? Sel.getTrueValue() : Sel.getFalseValue());
+ // TODO: support for undefs
+ if (BO && match(BO, m_c_BinOp(m_Specific(X), m_Value(Z))) &&
+ ConstantExpr::getBinOpIdentity(BO->getOpcode(), X->getType()) == C) {
+ Sel.setOperand(IsEq ? 1 : 2, Z);
+ return &Sel;
+ }
+ return nullptr;
+}
+
/// This folds:
/// select (icmp eq (and X, C1)), TC, FC
/// iff C1 is a power 2 and the difference between TC and FC is a power-of-2.
@@ -1961,5 +1991,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *Select = foldSelectCmpXchg(SI))
return Select;
+ if (Instruction *Select = foldSelectBinOpIdentity(SI))
+ return Select;
+
return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 34f8037e519f..1ca75f3989d4 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -570,7 +570,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
m_OneUse(m_BinOp(FBO))))) {
const APInt *C;
if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal &&
- match(FBO->getOperand(1), m_APInt(C)) &&
+ match(FBO->getOperand(1), m_APInt(C)) &&
canShiftBinOpWithConstantRHS(I, FBO, *C)) {
Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
cast<Constant>(FBO->getOperand(1)), Op1);
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 2560feb37d66..1c2de6352fa5 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -605,7 +605,7 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
return nullptr;
Value *SplatVal = InsElt.getOperand(1);
- InsertElementInst *CurrIE = &InsElt;
+ InsertElementInst *CurrIE = &InsElt;
SmallVector<bool, 16> ElementPresent(NumElements, false);
InsertElementInst *FirstIE = nullptr;
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 12fcc8752ea9..cff0d5447290 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1424,7 +1424,7 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
bool ConstOp1 = isa<Constant>(Inst.getOperand(1));
if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
NewC = getSafeVectorConstantForBinop(Inst.getOpcode(), NewC, ConstOp1);
-
+
// Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
// Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
Value *NewLHS = isa<Constant>(LHS) ? NewC : V1;
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b3f659194558..6af44354225c 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2464,10 +2464,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {
// If needed, insert __asan_init before checking for SanitizeAddress attr.
// This function needs to be called even if the function body is not
- // instrumented.
+ // instrumented.
if (maybeInsertAsanInitAtFunctionEntry(F))
FunctionModified = true;
-
+
// Leave if the function doesn't need instrumentation.
if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified;
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index acd27c2e226f..132e8089fe3b 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -148,7 +148,7 @@ public:
}
StringRef getPassName() const override { return "GCOV Profiler"; }
- bool runOnModule(Module &M) override {
+ bool runOnModule(Module &M) override {
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
return Profiler.runOnModule(M, TLI);
}
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 22076f04d6ad..4d5dfb0aa66b 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -898,7 +898,7 @@ void InstrProfiling::emitRegistration() {
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
for (Value *Data : UsedVars)
- if (Data != NamesVar)
+ if (Data != NamesVar && !isa<Function>(Data))
IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
if (NamesVar) {
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index fa7bcec677f7..0830ff5dd042 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -280,7 +280,7 @@ bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
return false;
// Sign extend the offset to 64 bits (so that it is like all of the other
- // expressions).
+ // expressions).
unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();
if (OffSCEVBits < 64)
OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty);
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index 3a675b979017..55759e8b1661 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -781,7 +781,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
this->TTI = &TTI;
this->DT = &DT;
this->BFI = BFI;
- this->Entry = &Entry;
+ this->Entry = &Entry;
// Collect all constant candidates.
collectConstantCandidates(Fn);
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index ea148b728a10..2f2d7f620a29 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -473,7 +473,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
// relatively expensive analysis for constants which are obviously either
// null or non-null to start with.
if (Type && !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
- !isa<Constant>(V) &&
+ !isa<Constant>(V) &&
LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
ConstantPointerNull::get(Type),
CS.getInstruction()) == LazyValueInfo::False)
@@ -670,12 +670,12 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
Value *Op0 = C->getOperand(0);
Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
if (!Op1) return nullptr;
-
+
LazyValueInfo::Tristate Result =
LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At);
if (Result == LazyValueInfo::Unknown)
return nullptr;
-
+
return (Result == LazyValueInfo::True) ?
ConstantInt::getTrue(C->getContext()) :
ConstantInt::getFalse(C->getContext());
@@ -747,7 +747,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
if (auto *C = getConstantAt(RetVal, RI, LVI)) {
++NumReturns;
RI->replaceUsesOfWith(RetVal, C);
- BBChanged = true;
+ BBChanged = true;
}
}
}
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index dd1a2a6adb82..9a7405e98e7d 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -188,7 +188,7 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
/// returns true, this function and getLocForRead completely describe the memory
/// operations for this instruction.
static MemoryLocation getLocForWrite(Instruction *Inst) {
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return MemoryLocation::get(SI);
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 565745d12e99..533d16e088c8 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -384,7 +384,7 @@ public:
LoadMapAllocator>;
LoadHTType AvailableLoads;
-
+
// A scoped hash table mapping memory locations (represented as typed
// addresses) to generation numbers at which that memory location became
// (henceforth indefinitely) invariant.
@@ -844,7 +844,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// start a scope in the current generaton which is true for all future
// generations. Also, we dont need to consume the last store since the
// semantics of invariant.start allow us to perform DSE of the last
- // store, if there was a store following invariant.start. Consider:
+ // store, if there was a store following invariant.start. Consider:
//
// store 30, i8* p
// invariant.start(p)
@@ -852,7 +852,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// We can DSE the store to 30, since the store 40 to invariant location p
// causes undefined behaviour.
if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
- // If there are any uses, the scope might end.
+ // If there are any uses, the scope might end.
if (!Inst->use_empty())
continue;
auto *CI = cast<CallInst>(Inst);
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
index 28c5940db1e0..8959038de596 100644
--- a/lib/Transforms/Scalar/GVNSink.cpp
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -568,7 +568,7 @@ public:
ReversePostOrderTraversal<Function*> RPOT(&F);
for (auto *N : RPOT)
NumSunk += sinkBB(N);
-
+
return NumSunk > 0;
}
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index ad1598d7b8bf..055fcbc8436f 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -43,6 +43,7 @@
#include <functional>
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/PostDominators.h"
@@ -61,6 +62,8 @@ using namespace llvm;
#define DEBUG_TYPE "guard-widening"
+STATISTIC(GuardsEliminated, "Number of eliminated guards");
+
namespace {
class GuardWideningImpl {
@@ -75,21 +78,33 @@ class GuardWideningImpl {
/// The set of guards whose conditions have been widened into dominating
/// guards.
- SmallVector<IntrinsicInst *, 16> EliminatedGuards;
+ SmallVector<Instruction *, 16> EliminatedGuards;
/// The set of guards which have been widened to include conditions to other
/// guards.
- DenseSet<IntrinsicInst *> WidenedGuards;
+ DenseSet<Instruction *> WidenedGuards;
/// Try to eliminate guard \p Guard by widening it into an earlier dominating
/// guard. \p DFSI is the DFS iterator on the dominator tree that is
/// currently visiting the block containing \p Guard, and \p GuardsPerBlock
/// maps BasicBlocks to the set of guards seen in that block.
bool eliminateGuardViaWidening(
- IntrinsicInst *Guard, const df_iterator<DomTreeNode *> &DFSI,
- const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> &
+ Instruction *Guard, const df_iterator<DomTreeNode *> &DFSI,
+ const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
GuardsPerBlock);
+ // Get the condition from \p GuardInst.
+ Value *getGuardCondition(Instruction *GuardInst);
+
+ // Set the condition for \p GuardInst.
+ void setGuardCondition(Instruction *GuardInst, Value *NewCond);
+
+ // Whether or not the particular instruction is a guard.
+ bool isGuard(const Instruction *I);
+
+ // Eliminates the guard instruction properly.
+ void eliminateGuard(Instruction *GuardInst);
+
/// Used to keep track of which widening potential is more effective.
enum WideningScore {
/// Don't widen.
@@ -113,9 +128,9 @@ class GuardWideningImpl {
/// Compute the score for widening the condition in \p DominatedGuard
/// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in
/// \p DominatingGuardLoop).
- WideningScore computeWideningScore(IntrinsicInst *DominatedGuard,
+ WideningScore computeWideningScore(Instruction *DominatedGuard,
Loop *DominatedGuardLoop,
- IntrinsicInst *DominatingGuard,
+ Instruction *DominatingGuard,
Loop *DominatingGuardLoop);
/// Helper to check if \p V can be hoisted to \p InsertPos.
@@ -206,10 +221,10 @@ class GuardWideningImpl {
/// Widen \p ToWiden to fail if \p NewCondition is false (in addition to
/// whatever it is already checking).
- void widenGuard(IntrinsicInst *ToWiden, Value *NewCondition) {
+ void widenGuard(Instruction *ToWiden, Value *NewCondition) {
Value *Result;
- widenCondCommon(ToWiden->getArgOperand(0), NewCondition, ToWiden, Result);
- ToWiden->setArgOperand(0, Result);
+ widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result);
+ setGuardCondition(ToWiden, Result);
}
public:
@@ -225,9 +240,7 @@ public:
}
bool GuardWideningImpl::run() {
- using namespace llvm::PatternMatch;
-
- DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> GuardsInBlock;
+ DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
bool Changed = false;
for (auto DFI = df_begin(Root), DFE = df_end(Root);
@@ -239,8 +252,8 @@ bool GuardWideningImpl::run() {
auto &CurrentList = GuardsInBlock[BB];
for (auto &I : *BB)
- if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
- CurrentList.push_back(cast<IntrinsicInst>(&I));
+ if (isGuard(&I))
+ CurrentList.push_back(cast<Instruction>(&I));
for (auto *II : CurrentList)
Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock);
@@ -249,16 +262,16 @@ bool GuardWideningImpl::run() {
assert(EliminatedGuards.empty() || Changed);
for (auto *II : EliminatedGuards)
if (!WidenedGuards.count(II))
- II->eraseFromParent();
+ eliminateGuard(II);
return Changed;
}
bool GuardWideningImpl::eliminateGuardViaWidening(
- IntrinsicInst *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
- const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> &
+ Instruction *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
+ const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
GuardsInBlock) {
- IntrinsicInst *BestSoFar = nullptr;
+ Instruction *BestSoFar = nullptr;
auto BestScoreSoFar = WS_IllegalOrNegative;
auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent());
@@ -302,8 +315,8 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
for (auto *Candidate : make_range(I, E)) {
auto Score =
computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop);
- LLVM_DEBUG(dbgs() << "Score between " << *GuardInst->getArgOperand(0)
- << " and " << *Candidate->getArgOperand(0) << " is "
+ LLVM_DEBUG(dbgs() << "Score between " << *getGuardCondition(GuardInst)
+ << " and " << *getGuardCondition(Candidate) << " is "
<< scoreTypeToString(Score) << "\n");
if (Score > BestScoreSoFar) {
BestScoreSoFar = Score;
@@ -323,16 +336,41 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
LLVM_DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar
<< " with score " << scoreTypeToString(BestScoreSoFar)
<< "\n");
- widenGuard(BestSoFar, GuardInst->getArgOperand(0));
- GuardInst->setArgOperand(0, ConstantInt::getTrue(GuardInst->getContext()));
+ widenGuard(BestSoFar, getGuardCondition(GuardInst));
+ setGuardCondition(GuardInst, ConstantInt::getTrue(GuardInst->getContext()));
EliminatedGuards.push_back(GuardInst);
WidenedGuards.insert(BestSoFar);
return true;
}
+Value *GuardWideningImpl::getGuardCondition(Instruction *GuardInst) {
+ IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst);
+ assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
+ "Bad guard intrinsic?");
+ return GI->getArgOperand(0);
+}
+
+void GuardWideningImpl::setGuardCondition(Instruction *GuardInst,
+ Value *NewCond) {
+ IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst);
+ assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
+ "Bad guard intrinsic?");
+ GI->setArgOperand(0, NewCond);
+}
+
+bool GuardWideningImpl::isGuard(const Instruction* I) {
+ using namespace llvm::PatternMatch;
+ return match(I, m_Intrinsic<Intrinsic::experimental_guard>());
+}
+
+void GuardWideningImpl::eliminateGuard(Instruction *GuardInst) {
+ GuardInst->eraseFromParent();
+ ++GuardsEliminated;
+}
+
GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
- IntrinsicInst *DominatedGuard, Loop *DominatedGuardLoop,
- IntrinsicInst *DominatingGuard, Loop *DominatingGuardLoop) {
+ Instruction *DominatedGuard, Loop *DominatedGuardLoop,
+ Instruction *DominatingGuard, Loop *DominatingGuardLoop) {
bool HoistingOutOfLoop = false;
if (DominatingGuardLoop != DominatedGuardLoop) {
@@ -345,7 +383,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
HoistingOutOfLoop = true;
}
- if (!isAvailableAt(DominatedGuard->getArgOperand(0), DominatingGuard))
+ if (!isAvailableAt(getGuardCondition(DominatedGuard), DominatingGuard))
return WS_IllegalOrNegative;
// If the guard was conditional executed, it may never be reached
@@ -355,9 +393,9 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
// case. At the moment, we really only consider the second in our heuristic
// here. TODO: evaluate cost model for spurious deopt
// NOTE: As written, this also lets us hoist right over another guard which
- // is essentially just another spelling for control flow.
- if (isWideningCondProfitable(DominatedGuard->getArgOperand(0),
- DominatingGuard->getArgOperand(0)))
+ // is essentially just another spelling for control flow.
+ if (isWideningCondProfitable(getGuardCondition(DominatedGuard),
+ getGuardCondition(DominatingGuard)))
return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
if (HoistingOutOfLoop)
@@ -369,7 +407,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
auto MaybeHoistingOutOfIf = [&]() {
auto *DominatingBlock = DominatingGuard->getParent();
auto *DominatedBlock = DominatedGuard->getParent();
-
+
// Same Block?
if (DominatedBlock == DominatingBlock)
return false;
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index e2f29705f2dd..c5ed6d5c1b87 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -735,7 +735,7 @@ static bool isSafeDecreasingBound(const SCEV *Start,
assert(LatchBrExitIdx == 0 &&
"LatchBrExitIdx should be either 0 or 1");
-
+
const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType()));
unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth) :
@@ -786,7 +786,7 @@ static bool isSafeIncreasingBound(const SCEV *Start,
const SCEV *StepMinusOne =
SE.getMinusSCEV(Step, SE.getOne(Step->getType()));
unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
- APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) :
+ APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) :
APInt::getMaxValue(BitWidth);
const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne);
@@ -798,7 +798,7 @@ static bool isSafeIncreasingBound(const SCEV *Start,
static bool CannotBeMinInLoop(const SCEV *BoundSCEV, Loop *L,
ScalarEvolution &SE, bool Signed) {
unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
- APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
+ APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
APInt::getMinValue(BitWidth);
auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index ff66632f0391..c4ea43a43249 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -455,7 +455,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// Keep track of whether the prefix of instructions visited so far are such
// that the next instruction visited is guaranteed to execute if the loop
- // is entered.
+ // is entered.
bool IsMustExecute = CurLoop->getHeader() == BB;
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
@@ -1186,9 +1186,9 @@ bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
if (isa<AllocaInst>(Object))
// Since the alloca goes out of scope, we know the caller can't retain a
// reference to it and be well defined. Thus, we don't need to check for
- // capture.
+ // capture.
return true;
-
+
// For all other objects we need to know that the caller can't possibly
// have gotten a reference to the object. There are two components of
// that:
@@ -1282,7 +1282,7 @@ bool llvm::promoteLoopAccessesToScalars(
// That said, we can't actually make the unwind edge explicit. Therefore,
// we have to prove that the store is dead along the unwind edge. We do
// this by proving that the caller can't have a reference to the object
- // after return and thus can't possibly load from the object.
+ // after return and thus can't possibly load from the object.
Value *Object = GetUnderlyingObject(SomePtr, MDL);
if (!isKnownNonEscaping(Object, TLI))
return false;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index d8692198f7a3..653948717fb9 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1573,7 +1573,7 @@ void LoopIdiomRecognize::transformLoopToCountable(
InitXNext =
Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
else
- llvm_unreachable("Unexpected opcode!");
+ llvm_unreachable("Unexpected opcode!");
} else
InitXNext = InitX;
CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp
index 561ceea1d880..cbb6594cf8f4 100644
--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -74,7 +74,7 @@
// }
//
// One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step)
-//
+//
// Informal proof that the transformation above is correct:
//
// By the definition of guards we can rewrite the guard condition to:
@@ -83,7 +83,7 @@
// Let's prove that for each iteration of the loop:
// G(0) && M => G(I)
// And the condition above can be simplified to G(Start) && M.
-//
+//
// Induction base.
// G(0) && M => G(0)
//
@@ -379,7 +379,7 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander,
ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS, Instruction *InsertAt) {
// TODO: we can check isLoopEntryGuardedByCond before emitting the check
-
+
Type *Ty = LHS->getType();
assert(Ty == RHS->getType() && "expandCheck operands have different types?");
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 634215c9770f..e955821effa0 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -888,7 +888,7 @@ bool llvm::computeUnrollCount(
UP.Count = 0;
return false;
}
-
+
// Check if the runtime trip count is too small when profile is available.
if (L->getHeader()->getParent()->hasProfileData()) {
if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
@@ -897,7 +897,7 @@ bool llvm::computeUnrollCount(
else
UP.AllowExpensiveTripCount = true;
}
- }
+ }
// Reduce count based on the type of unrolling and the threshold values.
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index b12586758925..6aad077ff19e 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -708,7 +708,7 @@ bool LoopUnswitch::processCurrentLoop() {
// Unswitch only those branches that are reachable.
if (isUnreachableDueToPreviousUnswitching(*I))
continue;
-
+
// If this isn't branching on an invariant condition, we can't unswitch
// it.
if (BI->isConditional()) {
@@ -754,7 +754,7 @@ bool LoopUnswitch::processCurrentLoop() {
// We are unswitching ~0 out.
UnswitchVal = AllOne;
} else {
- assert(OpChain == OC_OpChainNone &&
+ assert(OpChain == OC_OpChainNone &&
"Expect to unswitch on trivial chain");
// Do not process same value again and again.
// At this point we have some cases already unswitched and
@@ -1440,11 +1440,11 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// This in-loop instruction has been simplified w.r.t. its context,
// i.e. LIC != Val, make sure we propagate its replacement value to
// all its users.
- //
+ //
// We can not yet delete UI, the LIC user, yet, because that would invalidate
// the LIC->users() iterator !. However, we can make this instruction
// dead by replacing all its users and push it onto the worklist so that
- // it can be properly deleted and its operands simplified.
+ // it can be properly deleted and its operands simplified.
UI->replaceAllUsesWith(Replacement);
}
}
@@ -1609,7 +1609,7 @@ Value *LoopUnswitch::SimplifyInstructionWithNotEqual(Instruction *Inst,
LLVMContext &Ctx = Inst->getContext();
if (CI->getPredicate() == CmpInst::ICMP_EQ)
return ConstantInt::getFalse(Ctx);
- else
+ else
return ConstantInt::getTrue(Ctx);
}
}
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 2eb887c986be..3e47e9441d15 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -2007,7 +2007,7 @@ NewGVN::performSymbolicEvaluation(Value *V,
case Instruction::Load:
E = performSymbolicLoadEvaluation(I);
break;
- case Instruction::BitCast:
+ case Instruction::BitCast:
E = createExpression(I);
break;
case Instruction::ICmp:
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index c81ac70d99e6..1df0a9c49fb1 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1179,7 +1179,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
// and both "Res" and "ConstOpnd" remain unchanged.
bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt &ConstOpnd, Value *&Res) {
- // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
+ // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
// = ((x | c1) ^ c1) ^ (c1 ^ c2)
// = (x & ~c1) ^ (c1 ^ c2)
// It is useful only when c1 == c2.
@@ -1202,12 +1202,12 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
RedoInsts.insert(T);
return true;
}
-
+
// Helper function of OptimizeXor(). It tries to simplify
// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
-// symbolic value.
-//
-// If it was successful, true is returned, and the "R" and "C" is returned
+// symbolic value.
+//
+// If it was successful, true is returned, and the "R" and "C" is returned
// via "Res" and "ConstOpnd", respectively (If the entire expression is
// evaluated to a constant, the Res is set to NULL); otherwise, false is
// returned, and both "Res" and "ConstOpnd" remain unchanged.
@@ -1254,7 +1254,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
const APInt &C1 = Opnd1->getConstPart();
const APInt &C2 = Opnd2->getConstPart();
APInt C3 = C1 ^ C2;
-
+
// Do not increase code size
if (!C3.isNullValue() && !C3.isAllOnesValue()) {
int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
@@ -1290,7 +1290,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
SmallVectorImpl<ValueEntry> &Ops) {
if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
return V;
-
+
if (Ops.size() == 1)
return nullptr;
@@ -1365,7 +1365,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
}
// step 3.2: When previous and current operands share the same symbolic
- // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
+ // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
// Remove previous operand
PrevOpnd->Invalidate();
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 391e43f79121..0de2bc72b522 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -401,7 +401,7 @@ namespace {
/// defining value. The 'base defining value' for 'Def' is the transitive
/// closure of this relation stopping at the first instruction which has no
/// immediate base defining value. The b.d.v. might itself be a base pointer,
-/// but it can also be an arbitrary derived pointer.
+/// but it can also be an arbitrary derived pointer.
struct BaseDefiningValueResult {
/// Contains the value which is the base defining value.
Value * const BDV;
@@ -427,13 +427,13 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I);
/// Return a base defining value for the 'Index' element of the given vector
/// instruction 'I'. If Index is null, returns a BDV for the entire vector
-/// 'I'. As an optimization, this method will try to determine when the
+/// 'I'. As an optimization, this method will try to determine when the
/// element is known to already be a base pointer. If this can be established,
/// the second value in the returned pair will be true. Note that either a
/// vector or a pointer typed value can be returned. For the former, the
/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
/// If the later, the return pointer is a BDV (or possibly a base) for the
-/// particular element in 'I'.
+/// particular element in 'I'.
static BaseDefiningValueResult
findBaseDefiningValueOfVector(Value *I) {
// Each case parallels findBaseDefiningValue below, see that code for
@@ -444,7 +444,7 @@ findBaseDefiningValueOfVector(Value *I) {
return BaseDefiningValueResult(I, true);
if (isa<Constant>(I))
- // Base of constant vector consists only of constant null pointers.
+ // Base of constant vector consists only of constant null pointers.
// For reasoning see similar case inside 'findBaseDefiningValue' function.
return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()),
true);
@@ -508,11 +508,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
if (isa<Constant>(I)) {
// We assume that objects with a constant base (e.g. a global) can't move
// and don't need to be reported to the collector because they are always
- // live. Besides global references, all kinds of constants (e.g. undef,
+ // live. Besides global references, all kinds of constants (e.g. undef,
// constant expressions, null pointers) can be introduced by the inliner or
// the optimizer, especially on dynamically dead paths.
// Here we treat all of them as having single null base. By doing this we
- // trying to avoid problems reporting various conflicts in a form of
+ // trying to avoid problems reporting various conflicts in a form of
// "phi (const1, const2)" or "phi (const, regular gc ptr)".
// See constant.ll file for relevant test cases.
@@ -1285,14 +1285,14 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
return Index;
};
Module *M = StatepointToken->getModule();
-
+
// All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
// element type is i8 addrspace(1)*). We originally generated unique
// declarations for each pointer type, but this proved problematic because
// the intrinsic mangling code is incomplete and fragile. Since we're moving
// towards a single unified pointer type anyways, we can just cast everything
// to an i8* of the right address space. A bitcast is added later to convert
- // gc_relocate to the actual value's type.
+ // gc_relocate to the actual value's type.
auto getGCRelocateDecl = [&] (Type *Ty) {
assert(isHandledGCPointerType(Ty));
auto AS = Ty->getScalarType()->getPointerAddressSpace();
@@ -1413,7 +1413,7 @@ static StringRef getDeoptLowering(CallSite CS) {
}
return "live-through";
}
-
+
static void
makeStatepointExplicitImpl(const CallSite CS, /* to replace */
const SmallVectorImpl<Value *> &BasePtrs,
@@ -2570,7 +2570,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
}
// Before we start introducing relocations, we want to tweak the IR a bit to
- // avoid unfortunate code generation effects. The main example is that we
+ // avoid unfortunate code generation effects. The main example is that we
// want to try to make sure the comparison feeding a branch is after any
// safepoints. Otherwise, we end up with a comparison of pre-relocation
// values feeding a branch after relocation. This is semantically correct,
@@ -2593,7 +2593,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
TerminatorInst *TI = BB.getTerminator();
if (auto *Cond = getConditionInst(TI))
// TODO: Handle more than just ICmps here. We should be able to move
- // most instructions without side effects or memory access.
+ // most instructions without side effects or memory access.
if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
MadeChange = true;
Cond->moveBefore(TI);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 6c3f012c6280..de16b608f752 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -3730,7 +3730,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
PartPtrTy, BasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
- PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
+ PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
// Append this load onto the list of split loads so we can find it later
// to rewrite the stores.
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 34510cb40732..5834b619046b 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -459,9 +459,11 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
*ParentBB, *OldPH, FullUnswitch);
// Now we need to update the dominator tree.
- DT.insertEdge(OldPH, UnswitchedBB);
+ SmallVector<DominatorTree::UpdateType, 2> DTUpdates;
+ DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
if (FullUnswitch)
- DT.deleteEdge(ParentBB, UnswitchedBB);
+ DTUpdates.push_back({DT.Delete, ParentBB, LoopExitBB});
+ DT.applyUpdates(DTUpdates);
// The constant we can replace all of our invariants with inside the loop
// body. If any of the invariants have a value other than this the loop won't
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 5f5c4150d3bb..d0396e6ce47d 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -911,7 +911,7 @@ static void appendTypeSuffix(Value *Op, StringRef &Name,
NameBuffer += 'l';
Name = NameBuffer;
- }
+ }
}
Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp
index 4d9c22e57a68..6d18d0614611 100644
--- a/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -392,7 +392,7 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
auto CalleeType = Callee->getFunctionType();
auto CalleeParamNum = CalleeType->getNumParams();
for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) {
- auto *Arg = CS.getArgument(ArgNo);
+ auto *Arg = CS.getArgument(ArgNo);
Type *FormalTy = CalleeType->getParamType(ArgNo);
Type *ActualTy = Arg->getType();
if (FormalTy != ActualTy) {
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 61448e9acb57..807360340055 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -290,7 +290,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// Have we already cloned this block?
if (BBEntry) return;
-
+
// Nope, clone it now.
BasicBlock *NewBB;
BBEntry = NewBB = BasicBlock::Create(BB->getContext());
@@ -363,7 +363,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
hasDynamicAllocas = true;
}
}
-
+
// Finally, clone over the terminator.
const TerminatorInst *OldTI = BB->getTerminator();
bool TerminatorDone = false;
@@ -400,7 +400,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
TerminatorDone = true;
}
}
-
+
if (!TerminatorDone) {
Instruction *NewInst = OldTI->clone();
if (OldTI->hasName())
@@ -418,11 +418,11 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
for (const BasicBlock *Succ : TI->successors())
ToClone.push_back(Succ);
}
-
+
if (CodeInfo) {
CodeInfo->ContainsCalls |= hasCalls;
CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
- CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
BB != &BB->getParent()->front();
}
}
@@ -468,7 +468,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
CloneWorklist.pop_back();
PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
}
-
+
// Loop over all of the basic blocks in the old function. If the block was
// reachable, we have cloned it and the old block is now in the value map:
// insert it into the new function in the right order. If not, ignore it.
@@ -500,7 +500,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
TypeMapper, Materializer);
}
-
+
// Defer PHI resolution until rest of function is resolved, PHI resolution
// requires the CFG to be up-to-date.
for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
@@ -519,7 +519,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
Value *V = VMap.lookup(PN->getIncomingBlock(pred));
if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
Value *InVal = MapValue(PN->getIncomingValue(pred),
- VMap,
+ VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
assert(InVal && "Unknown input value?");
PN->setIncomingValue(pred, InVal);
@@ -529,9 +529,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
--pred; // Revisit the next entry.
--e;
}
- }
+ }
}
-
+
// The loop above has removed PHI entries for those blocks that are dead
// and has updated others. However, if a block is live (i.e. copied over)
// but its terminator has been changed to not go to this block, then our
@@ -546,11 +546,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
PI != E; ++PI)
--PredCount[*PI];
-
+
// Figure out how many entries to remove from each PHI.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
++PredCount[PN->getIncomingBlock(i)];
-
+
// At this point, the excess predecessor entries are positive in the
// map. Loop over all of the PHIs and remove excess predecessor
// entries.
@@ -563,7 +563,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
}
}
}
-
+
// If the loops above have made these phi nodes have 0 or 1 operand,
// replace them with undef or the input value. We must do this for
// correctness, because 0-operand phis are not valid.
@@ -655,7 +655,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
if (!BI || BI->isConditional()) { ++I; continue; }
-
+
BasicBlock *Dest = BI->getSuccessor(0);
if (!Dest->getSinglePredecessor()) {
++I; continue;
@@ -668,16 +668,16 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// We know all single-entry PHI nodes in the inlined function have been
// removed, so we just need to splice the blocks.
BI->eraseFromParent();
-
+
// Make all PHI nodes that referred to Dest now refer to I as their source.
Dest->replaceAllUsesWith(&*I);
// Move all the instructions in the succ to the pred.
I->getInstList().splice(I->end(), Dest->getInstList());
-
+
// Remove the dest block.
Dest->eraseFromParent();
-
+
// Do not increment I, iteratively merge all things this block branches to.
}
@@ -703,7 +703,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
- const char *NameSuffix,
+ const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
Instruction *TheCall) {
CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
@@ -730,7 +730,7 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
const Twine &NameSuffix, LoopInfo *LI,
DominatorTree *DT,
SmallVectorImpl<BasicBlock *> &Blocks) {
- assert(OrigLoop->getSubLoops().empty() &&
+ assert(OrigLoop->getSubLoops().empty() &&
"Loop to be cloned cannot have inner loop");
Function *F = OrigLoop->getHeader()->getParent();
Loop *ParentLoop = OrigLoop->getParentLoop();
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 35c7511a24b9..c7d68bab8170 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -61,7 +61,7 @@ std::unique_ptr<Module> llvm::CloneModule(
//
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
- GlobalVariable *GV = new GlobalVariable(*New,
+ GlobalVariable *GV = new GlobalVariable(*New,
I->getValueType(),
I->isConstant(), I->getLinkage(),
(Constant*) nullptr, I->getName(),
@@ -110,7 +110,7 @@ std::unique_ptr<Module> llvm::CloneModule(
GA->copyAttributesFrom(&*I);
VMap[&*I] = GA;
}
-
+
// Now that all of the things that global variable initializer can refer to
// have been created, loop through and copy the global variable referrers
// over... We also set the attributes on the global now.
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index f31dab9f96af..cb349e34606c 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1020,7 +1020,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
} else {
// Otherwise we must have code extracted an unwind or something, just
// return whatever we want.
- ReturnInst::Create(Context,
+ ReturnInst::Create(Context,
Constant::getNullValue(OldFnRetTy), TheSwitch);
}
@@ -1158,13 +1158,13 @@ Function *CodeExtractor::extractCodeRegion() {
splitReturnBlocks();
// This takes place of the original loop
- BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
+ BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
"codeRepl", oldFunction,
header);
// The new function needs a root node because other nodes can branch to the
// head of the region, but the entry node of a function cannot have preds.
- BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
+ BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
"newFuncRoot");
auto *BranchI = BranchInst::Create(header);
// If the original function has debug info, we have to add a debug location
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 0315aac1cf84..ddc6e07e2f59 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1199,7 +1199,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
// Only copy the edge if the call was inlined!
if (VMI == VMap.end() || VMI->second == nullptr)
continue;
-
+
// If the call was inlined, but then constant folded, there is no edge to
// add. Check for this case.
Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
@@ -1211,7 +1211,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
CallSite CS = CallSite(NewCall);
if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic())
continue;
-
+
// Remember that this call site got inlined for the client of
// InlineFunction.
IFI.InlinedCalls.push_back(NewCall);
@@ -1231,7 +1231,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
CallerNode->addCalledFunction(CallSite(NewCall), I->second);
}
-
+
// Update the call graph by deleting the edge from Callee to Caller. We must
// do this after the loop above in case Caller and Callee are the same.
CallerNode->removeCallEdgeFor(CS);
@@ -1380,7 +1380,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
if (CalleeHasDebugInfo)
continue;
-
+
// If the inlined instruction has no line number, make it look as if it
// originates from the call location. This is important for
// ((__always_inline__, __nodebug__)) functions which must use caller
@@ -1777,7 +1777,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
E = FirstNewBlock->end(); I != E; ) {
AllocaInst *AI = dyn_cast<AllocaInst>(I++);
if (!AI) continue;
-
+
// If the alloca is now dead, remove it. This often occurs due to code
// specialization.
if (AI->use_empty()) {
@@ -1787,10 +1787,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (!allocaWouldBeStaticInEntry(AI))
continue;
-
+
// Keep track of the static allocas that we inline into the caller.
IFI.StaticAllocas.push_back(AI);
-
+
// Scan for the block of allocas that we can move over, and move them
// all at once.
while (isa<AllocaInst>(I) &&
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 3fbb3487884b..4a359b99bebd 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -476,10 +476,10 @@ bool llvm::expandDivision(BinaryOperator *Div) {
return true;
}
-/// Generate code to compute the remainder of two integers of bitwidth up to
+/// Generate code to compute the remainder of two integers of bitwidth up to
/// 32 bits. Uses the above routines and extends the inputs/truncates the
/// outputs to operate in 32 bits; that is, these routines are good for targets
-/// that have no or very little suppport for smaller than 32 bit integer
+/// that have no or very little suppport for smaller than 32 bit integer
/// arithmetic.
///
/// Replace Rem with emulation code.
@@ -527,7 +527,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
return expandRemainder(cast<BinaryOperator>(ExtRem));
}
-/// Generate code to compute the remainder of two integers of bitwidth up to
+/// Generate code to compute the remainder of two integers of bitwidth up to
/// 64 bits. Uses the above routines and extends the inputs/truncates the
/// outputs to operate in 64 bits.
///
@@ -613,7 +613,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
} else {
ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty);
ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty);
- ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
}
Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
@@ -662,7 +662,7 @@ bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
} else {
ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty);
ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty);
- ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
}
Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 956d0387c7a8..a1f8e7484bcf 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -10,7 +10,7 @@
// This pass transforms loops by placing phi nodes at the end of the loops for
// all values that are live across the loop boundary. For example, it turns
// the left into the right code:
-//
+//
// for (...) for (...)
// if (c) if (c)
// X1 = ... X1 = ...
@@ -21,8 +21,8 @@
// ... = X4 + 4
//
// This is still valid LLVM; the extra phi nodes are purely redundant, and will
-// be trivially eliminated by InstCombine. The major benefit of this
-// transformation is that it makes many other loop optimizations, such as
+// be trivially eliminated by InstCombine. The major benefit of this
+// transformation is that it makes many other loop optimizations, such as
// LoopUnswitching, simpler.
//
//===----------------------------------------------------------------------===//
@@ -144,7 +144,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
I->getName() + ".lcssa", &ExitBB->front());
-
+ // Get the debug location from the original instruction.
+ PN->setDebugLoc(I->getDebugLoc());
// Add inputs from inside the loop for this PHI.
for (BasicBlock *Pred : PredCache.get(ExitBB)) {
PN->addIncoming(I, Pred);
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 13794c53f24b..78afe748e596 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -344,7 +344,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
/// Update the branch weights of the latch of a peeled-off loop
/// iteration.
/// This sets the branch weights for the latch of the recently peeled off loop
-/// iteration correctly.
+/// iteration correctly.
/// Our goal is to make sure that:
/// a) The total weight of all the copies of the loop body is preserved.
/// b) The total weight of the loop exit is preserved.
@@ -544,7 +544,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
//
// Each following iteration will split the current bottom anchor in two,
// and put the new copy of the loop body between these two blocks. That is,
- // after peeling another iteration from the example above, we'll split
+ // after peeling another iteration from the example above, we'll split
// InsertBot, and get:
//
// InsertTop:
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 323f2552ca80..88d595ee02ab 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -68,7 +68,7 @@ namespace {
PRNG prng;
};
-
+
struct MetaRenamer : public ModulePass {
// Pass identification, replacement for typeid
static char ID;
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index ca184ed7c4e3..4a1fd8d571aa 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -201,13 +201,13 @@ void SSAUpdater::RewriteUse(Use &U) {
void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
Instruction *User = cast<Instruction>(U.getUser());
-
+
Value *V;
if (PHINode *UserPN = dyn_cast<PHINode>(User))
V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
else
V = GetValueAtEndOfBlock(User->getParent());
-
+
U.set(V);
}
@@ -235,7 +235,7 @@ public:
PHI_iterator(PHINode *P, bool) // end iterator
: PHI(P), idx(PHI->getNumIncomingValues()) {}
- PHI_iterator &operator++() { ++idx; return *this; }
+ PHI_iterator &operator++() { ++idx; return *this; }
bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
@@ -333,7 +333,7 @@ LoadAndStorePromoter::
LoadAndStorePromoter(ArrayRef<const Instruction *> Insts,
SSAUpdater &S, StringRef BaseName) : SSA(S) {
if (Insts.empty()) return;
-
+
const Value *SomeVal;
if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
SomeVal = LI;
@@ -354,7 +354,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
for (Instruction *User : Insts)
UsesByBlock[User->getParent()].push_back(User);
-
+
// Okay, now we can iterate over all the blocks in the function with uses,
// processing them. Keep track of which loads are loading a live-in value.
// Walk the uses in the use-list order to be determinstic.
@@ -364,10 +364,10 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
for (Instruction *User : Insts) {
BasicBlock *BB = User->getParent();
TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB];
-
+
// If this block has already been processed, ignore this repeat use.
if (BlockUses.empty()) continue;
-
+
// Okay, this is the first use in the block. If this block just has a
// single user in it, we can rewrite it trivially.
if (BlockUses.size() == 1) {
@@ -375,13 +375,13 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
updateDebugInfo(SI);
SSA.AddAvailableValue(BB, SI->getOperand(0));
- } else
+ } else
// Otherwise it is a load, queue it to rewrite as a live-in load.
LiveInLoads.push_back(cast<LoadInst>(User));
BlockUses.clear();
continue;
}
-
+
// Otherwise, check to see if this block is all loads.
bool HasStore = false;
for (Instruction *I : BlockUses) {
@@ -390,7 +390,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
break;
}
}
-
+
// If so, we can queue them all as live in loads. We don't have an
// efficient way to tell which on is first in the block and don't want to
// scan large blocks, so just add all loads as live ins.
@@ -400,7 +400,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
BlockUses.clear();
continue;
}
-
+
// Otherwise, we have mixed loads and stores (or just a bunch of stores).
// Since SSAUpdater is purely for cross-block values, we need to determine
// the order of these instructions in the block. If the first use in the
@@ -411,7 +411,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
// If this is a load from an unrelated pointer, ignore it.
if (!isInstInList(L, Insts)) continue;
-
+
// If we haven't seen a store yet, this is a live in use, otherwise
// use the stored value.
if (StoredValue) {
@@ -433,13 +433,13 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
StoredValue = SI->getOperand(0);
}
}
-
+
// The last stored value that happened is the live-out for the block.
assert(StoredValue && "Already checked that there is a store in block");
SSA.AddAvailableValue(BB, StoredValue);
BlockUses.clear();
}
-
+
// Okay, now we rewrite all loads that use live-in values in the loop,
// inserting PHI nodes as necessary.
for (LoadInst *ALoad : LiveInLoads) {
@@ -451,10 +451,10 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
ALoad->replaceAllUsesWith(NewVal);
ReplacedLoads[ALoad] = NewVal;
}
-
+
// Allow the client to do stuff before we start nuking things.
doExtraRewritesBeforeFinalDeletion();
-
+
// Now that everything is rewritten, delete the old instructions from the
// function. They should all be dead now.
for (Instruction *User : Insts) {
@@ -465,7 +465,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
if (!User->use_empty()) {
Value *NewVal = ReplacedLoads[User];
assert(NewVal && "not a replaced load?");
-
+
// Propagate down to the ultimate replacee. The intermediately loads
// could theoretically already have been deleted, so we don't want to
// dereference the Value*'s.
@@ -474,11 +474,11 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
NewVal = RLI->second;
RLI = ReplacedLoads.find(NewVal);
}
-
+
replaceLoadWithValue(cast<LoadInst>(User), NewVal);
User->replaceAllUsesWith(NewVal);
}
-
+
instructionDeleted(User);
User->eraseFromParent();
}
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index e381fbc34ab4..65b23f4d94a1 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -196,7 +196,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
SmallDenseMap<const SCEV*, Value*> CheapExpansions;
CheapExpansions[S] = ICmp->getOperand(IVOperIdx);
CheapExpansions[X] = ICmp->getOperand(1 - IVOperIdx);
-
+
// TODO: Support multiple entry loops? (We currently bail out of these in
// the IndVarSimplify pass)
if (auto *BB = L->getLoopPredecessor()) {
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 8c48597fc2e4..15e035874002 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -890,7 +890,7 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
return nullptr;
// Replace the malloc with a calloc. We need the data layout to know what the
- // actual size of a 'size_t' parameter is.
+ // actual size of a 'size_t' parameter is.
B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
const DataLayout &DL = Malloc->getModule()->getDataLayout();
IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
@@ -970,7 +970,7 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
Value *V = valueHasFloatPrecision(CI->getArgOperand(0));
if (V == nullptr)
return nullptr;
-
+
// If call isn't an intrinsic, check that it isn't within a function with the
// same name as the float version of this call.
//
@@ -1126,165 +1126,164 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
if (!Pow->isFast())
return nullptr;
- const APFloat *Arg1C;
- if (!match(Pow->getArgOperand(1), m_APFloat(Arg1C)))
- return nullptr;
- if (!Arg1C->isExactlyValue(0.5) && !Arg1C->isExactlyValue(-0.5))
+ Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+ Type *Ty = Pow->getType();
+
+ const APFloat *ExpoF;
+ if (!match(Expo, m_APFloat(ExpoF)) ||
+ (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
return nullptr;
- // Fast-math flags from the pow() are propagated to all replacement ops.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(Pow->getFastMathFlags());
- Type *Ty = Pow->getType();
- Value *Sqrt;
+ // If errno is never set, then use the intrinsic for sqrt().
if (Pow->hasFnAttr(Attribute::ReadNone)) {
- // We know that errno is never set, so replace with an intrinsic:
- // pow(x, 0.5) --> llvm.sqrt(x)
- // llvm.pow(x, 0.5) --> llvm.sqrt(x)
- auto *F = Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::sqrt, Ty);
- Sqrt = B.CreateCall(F, Pow->getArgOperand(0));
- } else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf,
- LibFunc_sqrtl)) {
- // Errno could be set, so we must use a sqrt libcall.
- // TODO: We also should check that the target can in fact lower the sqrt
- // libcall. We currently have no way to ask this question, so we ask
- // whether the target has a sqrt libcall which is not exactly the same.
- Sqrt = emitUnaryFloatFnCall(Pow->getArgOperand(0),
- TLI->getName(LibFunc_sqrt), B,
+ Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(),
+ Intrinsic::sqrt, Ty);
+ Sqrt = B.CreateCall(SqrtFn, Base);
+ }
+ // Otherwise, use the libcall for sqrt().
+ else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl))
+ // TODO: We also should check that the target can in fact lower the sqrt()
+ // libcall. We currently have no way to ask this question, so we ask if
+ // the target has a sqrt() libcall, which is not exactly the same.
+ Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), B,
Pow->getCalledFunction()->getAttributes());
- } else {
- // We can't replace with an intrinsic or a libcall.
+ else
return nullptr;
- }
- // If this is pow(x, -0.5), get the reciprocal.
- if (Arg1C->isExactlyValue(-0.5))
- Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt);
+ // If the exponent is negative, then get the reciprocal.
+ if (ExpoF->isNegative())
+ Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
return Sqrt;
}
-Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- Value *Ret = nullptr;
+Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
+ Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+ Function *Callee = Pow->getCalledFunction();
+ AttributeList Attrs = Callee->getAttributes();
StringRef Name = Callee->getName();
- if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
+ Module *Module = Pow->getModule();
+ Type *Ty = Pow->getType();
+ Value *Shrunk = nullptr;
+ bool Ignored;
+
+ if (UnsafeFPShrink &&
+ Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
+ Shrunk = optimizeUnaryDoubleFP(Pow, B, true);
+
+ // Propagate the math semantics from the call to any created instructions.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(Pow->getFastMathFlags());
- Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+ // Evaluate special cases related to the base.
// pow(1.0, x) -> 1.0
- if (match(Op1, m_SpecificFP(1.0)))
- return Op1;
- // pow(2.0, x) -> llvm.exp2(x)
- if (match(Op1, m_SpecificFP(2.0))) {
- Value *Exp2 = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::exp2,
- CI->getType());
- return B.CreateCall(Exp2, Op2, "exp2");
- }
-
- // There's no llvm.exp10 intrinsic yet, but, maybe, some day there will
- // be one.
- if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
- // pow(10.0, x) -> exp10(x)
- if (Op1C->isExactlyValue(10.0) &&
- hasUnaryFloatFn(TLI, Op1->getType(), LibFunc_exp10, LibFunc_exp10f,
- LibFunc_exp10l))
- return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc_exp10), B,
- Callee->getAttributes());
+ if (match(Base, m_SpecificFP(1.0)))
+ return Base;
+
+ // pow(2.0, x) -> exp2(x)
+ if (match(Base, m_SpecificFP(2.0))) {
+ Value *Exp2 = Intrinsic::getDeclaration(Module, Intrinsic::exp2, Ty);
+ return B.CreateCall(Exp2, Expo, "exp2");
}
+ // pow(10.0, x) -> exp10(x)
+ if (ConstantFP *BaseC = dyn_cast<ConstantFP>(Base))
+ // There's no exp10 intrinsic yet, but, maybe, some day there shall be one.
+ if (BaseC->isExactlyValue(10.0) &&
+ hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
+ return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs);
+
// pow(exp(x), y) -> exp(x * y)
// pow(exp2(x), y) -> exp2(x * y)
// We enable these only with fast-math. Besides rounding differences, the
// transformation changes overflow and underflow behavior quite dramatically.
// Example: x = 1000, y = 0.001.
// pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
- auto *OpC = dyn_cast<CallInst>(Op1);
- if (OpC && OpC->isFast() && CI->isFast()) {
- LibFunc Func;
- Function *OpCCallee = OpC->getCalledFunction();
- if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
- TLI->has(Func) && (Func == LibFunc_exp || Func == LibFunc_exp2)) {
+ auto *BaseFn = dyn_cast<CallInst>(Base);
+ if (BaseFn && BaseFn->isFast() && Pow->isFast()) {
+ LibFunc LibFn;
+ Function *CalleeFn = BaseFn->getCalledFunction();
+ if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
+ (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) {
IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
- Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul");
- return emitUnaryFloatFnCall(FMul, OpCCallee->getName(), B,
- OpCCallee->getAttributes());
+ B.setFastMathFlags(Pow->getFastMathFlags());
+
+ Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
+ return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B,
+ CalleeFn->getAttributes());
}
}
- if (Value *Sqrt = replacePowWithSqrt(CI, B))
+ // Evaluate special cases related to the exponent.
+
+ if (Value *Sqrt = replacePowWithSqrt(Pow, B))
return Sqrt;
- ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
- if (!Op2C)
- return Ret;
+ ConstantFP *ExpoC = dyn_cast<ConstantFP>(Expo);
+ if (!ExpoC)
+ return Shrunk;
- if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
- return ConstantFP::get(CI->getType(), 1.0);
+ // pow(x, -1.0) -> 1.0 / x
+ if (ExpoC->isExactlyValue(-1.0))
+ return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
- // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
- if (Op2C->isExactlyValue(0.5) &&
- hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
- LibFunc_sqrtl)) {
- // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
- // This is faster than calling pow, and still handles negative zero
- // and negative infinity correctly.
- // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
- Value *Inf = ConstantFP::getInfinity(CI->getType());
- Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+ // pow(x, 0.0) -> 1.0
+ if (ExpoC->getValueAPF().isZero())
+ return ConstantFP::get(Ty, 1.0);
- // TODO: As above, we should lower to the sqrt intrinsic if the pow is an
- // intrinsic, to match errno semantics.
- Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes());
+ // pow(x, 1.0) -> x
+ if (ExpoC->isExactlyValue(1.0))
+ return Base;
- Module *M = Callee->getParent();
- Function *FabsF = Intrinsic::getDeclaration(M, Intrinsic::fabs,
- CI->getType());
- Value *FAbs = B.CreateCall(FabsF, Sqrt);
+ // pow(x, 2.0) -> x * x
+ if (ExpoC->isExactlyValue(2.0))
+ return B.CreateFMul(Base, Base, "square");
- Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
- Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
- return Sel;
+ // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
+ if (ExpoC->isExactlyValue(0.5) &&
+ hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) {
+ // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+ // This is faster than calling pow(), and still handles -0.0 and
+ // negative infinity correctly.
+ // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+ Value *PosInf = ConstantFP::getInfinity(Ty);
+ Value *NegInf = ConstantFP::getInfinity(Ty, true);
+
+ // TODO: As above, we should lower to the sqrt() intrinsic if the pow() is
+ // an intrinsic, to match errno semantics.
+ Value *Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt),
+ B, Attrs);
+ Function *FAbsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty);
+ Value *FAbs = B.CreateCall(FAbsFn, Sqrt, "abs");
+ Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
+ Sqrt = B.CreateSelect(FCmp, PosInf, FAbs);
+ return Sqrt;
}
- // Propagate fast-math-flags from the call to any created instructions.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
- // pow(x, 1.0) --> x
- if (Op2C->isExactlyValue(1.0))
- return Op1;
- // pow(x, 2.0) --> x * x
- if (Op2C->isExactlyValue(2.0))
- return B.CreateFMul(Op1, Op1, "pow2");
- // pow(x, -1.0) --> 1.0 / x
- if (Op2C->isExactlyValue(-1.0))
- return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
-
- // In -ffast-math, generate repeated fmul instead of generating pow(x, n).
- if (CI->isFast()) {
- APFloat V = abs(Op2C->getValueAPF());
- // We limit to a max of 7 fmul(s). Thus max exponent is 32.
+ // pow(x, n) -> x * x * x * ....
+ if (Pow->isFast()) {
+ APFloat ExpoA = abs(ExpoC->getValueAPF());
+ // We limit to a max of 7 fmul(s). Thus the maximum exponent is 32.
// This transformation applies to integer exponents only.
- if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan ||
- !V.isInteger())
+ if (!ExpoA.isInteger() ||
+ ExpoA.compare
+ (APFloat(ExpoA.getSemantics(), 32.0)) == APFloat::cmpGreaterThan)
return nullptr;
// We will memoize intermediate products of the Addition Chain.
Value *InnerChain[33] = {nullptr};
- InnerChain[1] = Op1;
- InnerChain[2] = B.CreateFMul(Op1, Op1);
+ InnerChain[1] = Base;
+ InnerChain[2] = B.CreateFMul(Base, Base, "square");
// We cannot readily convert a non-double type (like float) to a double.
- // So we first convert V to something which could be converted to double.
- bool Ignored;
- V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
-
- Value *FMul = getPow(InnerChain, V.convertToDouble(), B);
- // For negative exponents simply compute the reciprocal.
- if (Op2C->isNegative())
- FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul);
+ // So we first convert it to something which could be converted to double.
+ ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
+ Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
+
+ // If the exponent is negative, then get the reciprocal.
+ if (ExpoC->isNegative())
+ FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
return FMul;
}
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index 3640541e63cc..fd0da79487f1 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -536,7 +536,7 @@ private:
char RewriteSymbolsLegacyPass::ID = 0;
RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) {
- initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());
+ initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());
}
RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass(
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index e633ac0c874d..d49b26472548 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -61,7 +61,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
} else if (UnreachableBlocks.size() == 1) {
UnreachableBlock = UnreachableBlocks.front();
} else {
- UnreachableBlock = BasicBlock::Create(F.getContext(),
+ UnreachableBlock = BasicBlock::Create(F.getContext(),
"UnifiedUnreachableBlock", &F);
new UnreachableInst(F.getContext(), UnreachableBlock);
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3c693f5d5ee0..859d0c92ca5a 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -535,13 +535,13 @@ protected:
/// Returns true if we should generate a scalar version of \p IV.
bool needsScalarInduction(Instruction *IV) const;
- /// If there is a cast involved in the induction variable \p ID, which should
- /// be ignored in the vectorized loop body, this function records the
- /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the
- /// cast. We had already proved that the casted Phi is equal to the uncasted
- /// Phi in the vectorized loop (under a runtime guard), and therefore
- /// there is no need to vectorize the cast - the same value can be used in the
- /// vector loop for both the Phi and the cast.
+ /// If there is a cast involved in the induction variable \p ID, which should
+ /// be ignored in the vectorized loop body, this function records the
+ /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the
+ /// cast. We had already proved that the casted Phi is equal to the uncasted
+ /// Phi in the vectorized loop (under a runtime guard), and therefore
+ /// there is no need to vectorize the cast - the same value can be used in the
+ /// vector loop for both the Phi and the cast.
/// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,
/// Otherwise, \p VectorLoopValue is a widened/vectorized value.
///
@@ -5443,7 +5443,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){
// high enough value to practically disable vectorization with such
// operations, except where previously deployed legality hack allowed
// using very low cost values. This is to avoid regressions coming simply
- // from moving "masked load/store" check from legality to cost model.
+ // from moving "masked load/store" check from legality to cost model.
// Masked Load/Gather emulation was previously never allowed.
// Limited number of Masked Store/Scatter emulation was allowed.
assert(isScalarWithPredication(I) &&
@@ -6412,12 +6412,12 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
}))
DeadInstructions.insert(IndUpdate);
- // We record as "Dead" also the type-casting instructions we had identified
+ // We record as "Dead" also the type-casting instructions we had identified
// during induction analysis. We don't need any handling for them in the
- // vectorized loop because we have proven that, under a proper runtime
- // test guarding the vectorized loop, the value of the phi, and the casted
+ // vectorized loop because we have proven that, under a proper runtime
+ // test guarding the vectorized loop, the value of the phi, and the casted
// value of the phi, are the same. The last instruction in this casting chain
- // will get its scalar/vector/widened def from the scalar/vector/widened def
+ // will get its scalar/vector/widened def from the scalar/vector/widened def
// of the respective phi node. Any other casts in the induction def-use chain
// have no other uses outside the phi update chain, and will be ignored.
InductionDescriptor &IndDes = Induction.second;
@@ -7060,8 +7060,8 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
auto Plan = llvm::make_unique<VPlan>();
// Build hierarchical CFG
- VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI);
- HCFGBuilder.buildHierarchicalCFG(*Plan.get());
+ VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
+ HCFGBuilder.buildHierarchicalCFG();
return Plan;
}
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ac8c4f046c6f..5c2efe885e22 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -345,7 +345,7 @@ static Value *isOneOf(const InstructionsState &S, Value *Op) {
}
/// \returns analysis of the Instructions in \p VL described in
-/// InstructionsState, the Opcode that we suppose the whole list
+/// InstructionsState, the Opcode that we suppose the whole list
/// could be vectorized even if its structure is diverse.
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
unsigned BaseIndex = 0) {
@@ -3111,6 +3111,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// TODO: Merge this shuffle with the ReorderShuffleMask.
if (!E->ReorderIndices.empty())
Builder.SetInsertPoint(VL0);
+ else if (auto *I = dyn_cast<Instruction>(V))
+ Builder.SetInsertPoint(I->getParent(),
+ std::next(I->getIterator()));
+ else
+ Builder.SetInsertPoint(&F->getEntryBlock(),
+ F->getEntryBlock().getFirstInsertionPt());
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}
diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp
index f7b07b722bb1..0780e70809d0 100644
--- a/lib/Transforms/Vectorize/VPlan.cpp
+++ b/lib/Transforms/Vectorize/VPlan.cpp
@@ -18,6 +18,7 @@
//===----------------------------------------------------------------------===//
#include "VPlan.h"
+#include "VPlanDominatorTree.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -25,7 +26,6 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -34,6 +34,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -576,3 +577,5 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
}
O << "\\l\"";
}
+
+template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h
index 866951cb79a4..883e6f52369a 100644
--- a/lib/Transforms/Vectorize/VPlan.h
+++ b/lib/Transforms/Vectorize/VPlan.h
@@ -26,8 +26,10 @@
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
+#include "VPlanLoopInfo.h"
#include "VPlanValue.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -51,7 +53,6 @@ class BasicBlock;
class DominatorTree;
class InnerLoopVectorizer;
class InterleaveGroup;
-class LoopInfo;
class raw_ostream;
class Value;
class VPBasicBlock;
@@ -516,6 +517,23 @@ public:
/// Delete all blocks reachable from a given VPBlockBase, inclusive.
static void deleteCFG(VPBlockBase *Entry);
+
+ void printAsOperand(raw_ostream &OS, bool PrintType) const {
+ OS << getName();
+ }
+
+ void print(raw_ostream &OS) const {
+ // TODO: Only printing VPBB name for now since we only have dot printing
+ // support for VPInstructions/Recipes.
+ printAsOperand(OS, false);
+ }
+
+ /// Return true if it is legal to hoist instructions into this block.
+ bool isLegalToHoistInto() {
+ // There are currently no constraints that prevent an instruction to be
+ // hoisted into a VPBlockBase.
+ return true;
+ }
};
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
@@ -1037,6 +1055,12 @@ public:
EntryBlock->setParent(this);
}
+ // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a
+ // specific interface of llvm::Function, instead of using
+ // GraphTraints::getEntryNode. We should add a new template parameter to
+ // DominatorTreeBase representing the Graph type.
+ VPBlockBase &front() const { return *Entry; }
+
const VPBlockBase *getExit() const { return Exit; }
VPBlockBase *getExit() { return Exit; }
@@ -1087,6 +1111,9 @@ private:
/// VPlan.
Value2VPValueTy Value2VPValue;
+ /// Holds the VPLoopInfo analysis for this VPlan.
+ VPLoopInfo VPLInfo;
+
public:
VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {}
@@ -1133,6 +1160,10 @@ public:
return Value2VPValue[V];
}
+ /// Return the VPLoopInfo analysis for this VPlan.
+ VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
+ const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
+
private:
/// Add to the given dominator tree the header block and every new basic block
/// that was created between it and the latch block, inclusive.
@@ -1210,12 +1241,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) {
return OS;
}
-//===--------------------------------------------------------------------===//
-// GraphTraits specializations for VPlan/VPRegionBlock Control-Flow Graphs //
-//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs //
+//===----------------------------------------------------------------------===//
-// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a
-// graph of VPBlockBase nodes...
+// The following set of template specializations implement GraphTraits to treat
+// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
+// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
+// VPBlockBase is a VPRegionBlock, this specialization provides access to its
+// successors/predecessors but not to the blocks inside the region.
template <> struct GraphTraits<VPBlockBase *> {
using NodeRef = VPBlockBase *;
@@ -1247,17 +1281,13 @@ template <> struct GraphTraits<const VPBlockBase *> {
}
};
-// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a
-// graph of VPBlockBase nodes... and to walk it in inverse order. Inverse order
-// for a VPBlockBase is considered to be when traversing the predecessors of a
-// VPBlockBase instead of its successors.
+// Inverse order specialization for VPBasicBlocks. Predecessors are used instead
+// of successors for the inverse traversal.
template <> struct GraphTraits<Inverse<VPBlockBase *>> {
using NodeRef = VPBlockBase *;
using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
- static Inverse<VPBlockBase *> getEntryNode(Inverse<VPBlockBase *> B) {
- return B;
- }
+ static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
static inline ChildIteratorType child_begin(NodeRef N) {
return N->getPredecessors().begin();
@@ -1268,6 +1298,71 @@ template <> struct GraphTraits<Inverse<VPBlockBase *>> {
}
};
+// The following set of template specializations implement GraphTraits to
+// treat VPRegionBlock as a graph and recurse inside its nodes. It's important
+// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
+// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
+// there won't be automatic recursion into other VPBlockBases that turn to be
+// VPRegionBlocks.
+
+template <>
+struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
+ using GraphRef = VPRegionBlock *;
+ using nodes_iterator = df_iterator<NodeRef>;
+
+ static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
+
+ static nodes_iterator nodes_begin(GraphRef N) {
+ return nodes_iterator::begin(N->getEntry());
+ }
+
+ static nodes_iterator nodes_end(GraphRef N) {
+ // df_iterator::end() returns an empty iterator so the node used doesn't
+ // matter.
+ return nodes_iterator::end(N);
+ }
+};
+
+template <>
+struct GraphTraits<const VPRegionBlock *>
+ : public GraphTraits<const VPBlockBase *> {
+ using GraphRef = const VPRegionBlock *;
+ using nodes_iterator = df_iterator<NodeRef>;
+
+ static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
+
+ static nodes_iterator nodes_begin(GraphRef N) {
+ return nodes_iterator::begin(N->getEntry());
+ }
+
+ static nodes_iterator nodes_end(GraphRef N) {
+ // df_iterator::end() returns an empty iterator so the node used doesn't
+ // matter.
+ return nodes_iterator::end(N);
+ }
+};
+
+template <>
+struct GraphTraits<Inverse<VPRegionBlock *>>
+ : public GraphTraits<Inverse<VPBlockBase *>> {
+ using GraphRef = VPRegionBlock *;
+ using nodes_iterator = df_iterator<NodeRef>;
+
+ static NodeRef getEntryNode(Inverse<GraphRef> N) {
+ return N.Graph->getExit();
+ }
+
+ static nodes_iterator nodes_begin(GraphRef N) {
+ return nodes_iterator::begin(N->getExit());
+ }
+
+ static nodes_iterator nodes_end(GraphRef N) {
+ // df_iterator::end() returns an empty iterator so the node used doesn't
+ // matter.
+ return nodes_iterator::end(N);
+ }
+};
+
//===----------------------------------------------------------------------===//
// VPlan Utilities
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Vectorize/VPlanDominatorTree.h b/lib/Transforms/Vectorize/VPlanDominatorTree.h
new file mode 100644
index 000000000000..1b81097b6d31
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -0,0 +1,41 @@
+//===-- VPlanDominatorTree.h ------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements dominator tree analysis for a single level of a VPlan's
+/// H-CFG.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H
+
+#include "VPlan.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/IR/Dominators.h"
+
+namespace llvm {
+
+/// Template specialization of the standard LLVM dominator tree utility for
+/// VPBlockBases.
+using VPDominatorTree = DomTreeBase<VPBlockBase>;
+
+using VPDomTreeNode = DomTreeNodeBase<VPBlockBase>;
+
+/// Template specializations of GraphTraits for VPDomTreeNode.
+template <>
+struct GraphTraits<VPDomTreeNode *>
+ : public DomTreeGraphTraitsBase<VPDomTreeNode, VPDomTreeNode::iterator> {};
+
+template <>
+struct GraphTraits<const VPDomTreeNode *>
+ : public DomTreeGraphTraitsBase<const VPDomTreeNode,
+ VPDomTreeNode::const_iterator> {};
+} // namespace llvm
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 08129b74cddf..b6307acb9474 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -324,13 +324,28 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {
return TopRegion;
}
+VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() {
+ PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
+ return PCFGBuilder.buildPlainCFG();
+}
+
// Public interface to build a H-CFG.
-void VPlanHCFGBuilder::buildHierarchicalCFG(VPlan &Plan) {
+void VPlanHCFGBuilder::buildHierarchicalCFG() {
// Build Top Region enclosing the plain CFG and set it as VPlan entry.
- PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
- VPRegionBlock *TopRegion = PCFGBuilder.buildPlainCFG();
+ VPRegionBlock *TopRegion = buildPlainCFG();
Plan.setEntry(TopRegion);
LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
Verifier.verifyHierarchicalCFG(TopRegion);
+
+ // Compute plain CFG dom tree for VPLInfo.
+ VPDomTree.recalculate(*TopRegion);
+ LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n";
+ VPDomTree.print(dbgs()));
+
+ // Compute VPLInfo and keep it in Plan.
+ VPLoopInfo &VPLInfo = Plan.getVPLoopInfo();
+ VPLInfo.analyze(VPDomTree);
+ LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n";
+ VPLInfo.print(dbgs()));
}
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index c4e69843615a..3f11dcb5164d 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -26,14 +26,18 @@
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H
#include "VPlan.h"
+#include "VPlanDominatorTree.h"
#include "VPlanVerifier.h"
namespace llvm {
class Loop;
+class VPlanTestBase;
/// Main class to build the VPlan H-CFG for an incoming IR.
class VPlanHCFGBuilder {
+ friend VPlanTestBase;
+
private:
// The outermost loop of the input loop nest considered for vectorization.
Loop *TheLoop;
@@ -41,14 +45,27 @@ private:
// Loop Info analysis.
LoopInfo *LI;
+ // The VPlan that will contain the H-CFG we are building.
+ VPlan &Plan;
+
// VPlan verifier utility.
VPlanVerifier Verifier;
+ // Dominator analysis for VPlan plain CFG to be used in the
+ // construction of the H-CFG. This analysis is no longer valid once regions
+ // are introduced.
+ VPDominatorTree VPDomTree;
+
+ /// Build plain CFG for TheLoop. Return a new VPRegionBlock (TopRegion)
+ /// enclosing the plain CFG.
+ VPRegionBlock *buildPlainCFG();
+
public:
- VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI) : TheLoop(Lp), LI(LI) {}
+ VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
+ : TheLoop(Lp), LI(LI), Plan(P) {}
- /// Build H-CFG for TheLoop and update \p Plan accordingly.
- void buildHierarchicalCFG(VPlan &Plan);
+ /// Build H-CFG for TheLoop and update Plan accordingly.
+ void buildHierarchicalCFG();
};
} // namespace llvm
diff --git a/lib/Transforms/Vectorize/VPlanLoopInfo.h b/lib/Transforms/Vectorize/VPlanLoopInfo.h
new file mode 100644
index 000000000000..5c2485fc2145
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanLoopInfo.h
@@ -0,0 +1,45 @@
+//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines VPLoopInfo analysis and VPLoop class. VPLoopInfo is a
+/// specialization of LoopInfoBase for VPBlockBase. VPLoops is a specialization
+/// of LoopBase that is used to hold loop metadata from VPLoopInfo. Further
+/// information can be found in VectorizationPlanner.rst.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
+
+#include "llvm/Analysis/LoopInfoImpl.h"
+
+namespace llvm {
+class VPBlockBase;
+
+/// Hold analysis information for every loop detected by VPLoopInfo. It is an
+/// instantiation of LoopBase.
+class VPLoop : public LoopBase<VPBlockBase, VPLoop> {
+private:
+ friend class LoopInfoBase<VPBlockBase, VPLoop>;
+ explicit VPLoop(VPBlockBase *VPB) : LoopBase<VPBlockBase, VPLoop>(VPB) {}
+};
+
+/// VPLoopInfo provides analysis of natural loop for VPBlockBase-based
+/// Hierarchical CFG. It is a specialization of LoopInfoBase class.
+// TODO: VPLoopInfo is initially computed on top of the VPlan plain CFG, which
+// is the same as the incoming IR CFG. If it's more efficient than running the
+// whole loop detection algorithm, we may want to create a mechanism to
+// translate LoopInfo into VPLoopInfo. However, that would require significant
+// changes in LoopInfoBase class.
+typedef LoopInfoBase<VPBlockBase, VPLoop> VPLoopInfo;
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
diff --git a/test/Analysis/BasicAA/invalidation.ll b/test/Analysis/BasicAA/invalidation.ll
index 0eaf7752f89b..27e94cb6a2e2 100644
--- a/test/Analysis/BasicAA/invalidation.ll
+++ b/test/Analysis/BasicAA/invalidation.ll
@@ -24,6 +24,18 @@
; CHECK-LI-INVALIDATE: Invalidating analysis: BasicAA
; CHECK-LI-INVALIDATE: Running pass: AAEvaluator
; CHECK-LI-INVALIDATE: Running analysis: BasicAA
+;
+; Check PhiValues specifically.
+; RUN: opt -disable-output -disable-verify -debug-pass-manager %s 2>&1 \
+; RUN: -passes='require<phi-values>,require<aa>,invalidate<phi-values>,aa-eval' -aa-pipeline='basic-aa' \
+; RUN: | FileCheck %s --check-prefix=CHECK-PV-INVALIDATE
+; CHECK-PV-INVALIDATE: Running pass: RequireAnalysisPass
+; CHECK-PV-INVALIDATE: Running analysis: BasicAA
+; CHECK-PV-INVALIDATE: Running pass: InvalidateAnalysisPass
+; CHECK-PV-INVALIDATE: Invalidating analysis: PhiValuesAnalysis
+; CHECK-PV-INVALIDATE: Invalidating analysis: BasicAA
+; CHECK-PV-INVALIDATE: Running pass: AAEvaluator
+; CHECK-PV-INVALIDATE: Running analysis: BasicAA
; Some code that will result in actual AA queries, including inside of a loop.
; FIXME: Sadly, none of these queries managed to use either the domtree or
diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll
index e410520bc0fc..e57dd40f0047 100644
--- a/test/Analysis/BasicAA/phi-aa.ll
+++ b/test/Analysis/BasicAA/phi-aa.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -phi-values -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<phi-values>,aa-eval' -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -78,3 +78,39 @@ declare i1 @cond(i32*)
declare void @inc(i32*)
+; When we have a chain of phis in nested loops we should recognise if there's
+; actually only one underlying value.
+; CHECK-LABEL: loop_phi_chain
+; CHECK: NoAlias: i32* %val1, i32* @Y
+; CHECK: NoAlias: i32* %val2, i32* @Y
+; CHECK: NoAlias: i32* %val3, i32* @Y
+define void @loop_phi_chain(i32 %a, i32 %b, i32 %c) {
+entry:
+ br label %loop1
+
+loop1:
+ %n1 = phi i32 [ 0, %entry ], [ %add1, %loop2 ]
+ %val1 = phi i32* [ @X, %entry ], [ %val2, %loop2 ]
+ %add1 = add i32 %n1, 1
+ %cmp1 = icmp ne i32 %n1, 32
+ br i1 %cmp1, label %loop2, label %end
+
+loop2:
+ %n2 = phi i32 [ 0, %loop1 ], [ %add2, %loop3 ]
+ %val2 = phi i32* [ %val1, %loop1 ], [ %val3, %loop3 ]
+ %add2 = add i32 %n2, 1
+ %cmp2 = icmp ne i32 %n2, 32
+ br i1 %cmp2, label %loop3, label %loop1
+
+loop3:
+ %n3 = phi i32 [ 0, %loop2 ], [ %add3, %loop3 ]
+ %val3 = phi i32* [ %val2, %loop2 ], [ %val3, %loop3 ]
+ store i32 0, i32* %val3, align 4
+ store i32 0, i32* @Y, align 4
+ %add3 = add i32 %n3, 1
+ %cmp3 = icmp ne i32 %n3, 32
+ br i1 %cmp3, label %loop3, label %loop2
+
+end:
+ ret void
+}
diff --git a/test/Analysis/BasicAA/phi-values-usage.ll b/test/Analysis/BasicAA/phi-values-usage.ll
new file mode 100644
index 000000000000..c5120a31f43c
--- /dev/null
+++ b/test/Analysis/BasicAA/phi-values-usage.ll
@@ -0,0 +1,50 @@
+; RUN: opt -debug-pass=Executions -phi-values -memcpyopt -instcombine -disable-output < %s 2>&1 | FileCheck %s
+
+; Check that phi values is not run when it's not already available, and that
+; basicaa is freed after a pass that preserves CFG.
+
+; CHECK: Executing Pass 'Phi Values Analysis'
+; CHECK: Executing Pass 'Basic Alias Analysis (stateless AA impl)'
+; CHECK: Executing Pass 'Memory Dependence Analysis'
+; CHECK: Executing Pass 'MemCpy Optimization'
+; CHECK-DAG: Freeing Pass 'MemCpy Optimization'
+; CHECK-DAG: Freeing Pass 'Phi Values Analysis'
+; CHECK-DAG: Freeing Pass 'Memory Dependence Analysis'
+; CHECK-DAG: Freeing Pass 'Basic Alias Analysis (stateless AA impl)'
+; CHECK-NOT: Executing Pass 'Phi Values Analysis'
+; CHECK: Executing Pass 'Basic Alias Analysis (stateless AA impl)'
+; CHECK: Executing Pass 'Combine redundant instructions'
+
+declare void @otherfn([4 x i8]*)
+declare i32 @__gxx_personality_v0(...)
+
+; This function is one where if we didn't free basicaa after memcpyopt then the
+; usage of basicaa in instcombine would cause a segfault due to stale phi-values
+; results being used.
+define void @fn(i8* %this, i64* %ptr) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %arr = alloca [4 x i8], align 8
+ %gep1 = getelementptr inbounds [4 x i8], [4 x i8]* %arr, i64 0, i32 0
+ br i1 undef, label %then, label %if
+
+if:
+ br label %then
+
+then:
+ %phi = phi i64* [ %ptr, %if ], [ null, %entry ]
+ store i8 1, i8* %gep1, align 8
+ %load = load i64, i64* %phi, align 8
+ %gep2 = getelementptr inbounds i8, i8* undef, i64 %load
+ %gep3 = getelementptr inbounds i8, i8* %gep2, i64 40
+ invoke i32 undef(i8* undef)
+ to label %invoke unwind label %lpad
+
+invoke:
+ unreachable
+
+lpad:
+ landingpad { i8*, i32 }
+ catch i8* null
+ call void @otherfn([4 x i8]* nonnull %arr)
+ unreachable
+}
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index 9c9d22d8ff6c..c0a1a2a149db 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -141,7 +141,7 @@ define fp128 @test_quad_dump() {
ret fp128 0xL00000000000000004000000000000000
}
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %0:_(p0) = G_EXTRACT_VECTOR_ELT %1:_(<2 x p0>), %2:_(s32) (in function: vector_of_pointers_extractelement)
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %2:_(p0) = G_EXTRACT_VECTOR_ELT %0:_(<2 x p0>), %3:_(s32) (in function: vector_of_pointers_extractelement)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_extractelement
; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_extractelement:
@var = global <2 x i16*> zeroinitializer
@@ -158,7 +158,7 @@ end:
br label %block
}
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %0:_(<2 x p0>), %5:_(p0) :: (store 16 into `<2 x i16*>* undef`) (in function: vector_of_pointers_insertelement)
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %2:_(<2 x p0>), %1:_(p0) :: (store 16 into `<2 x i16*>* undef`) (in function: vector_of_pointers_insertelement)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_insertelement
; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_insertelement:
define void @vector_of_pointers_insertelement() {
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
index 65c6a4f90c70..2c8ee439d247 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -138,9 +138,9 @@ false:
; CHECK: %0:_(s32) = COPY $w0
; CHECK: %[[reg100:[0-9]+]]:_(s32) = G_CONSTANT i32 100
; CHECK: %[[reg200:[0-9]+]]:_(s32) = G_CONSTANT i32 200
-; CHECK: %[[reg0:[0-9]+]]:_(s32) = G_CONSTANT i32 0
-; CHECK: %[[reg1:[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: %[[reg2:[0-9]+]]:_(s32) = G_CONSTANT i32 2
+; CHECK: %[[reg1:[0-9]+]]:_(s32) = G_CONSTANT i32 1
+; CHECK: %[[reg0:[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: %[[regicmp100:[0-9]+]]:_(s1) = G_ICMP intpred(eq), %[[reg100]](s32), %0
; CHECK: G_BRCOND %[[regicmp100]](s1), %[[BB_CASE100]]
; CHECK: G_BR %[[BB_NOTCASE100_CHECKNEXT]]
@@ -413,9 +413,9 @@ define i64* @trivial_bitcast(i8* %a) {
; CHECK: G_BR %[[CAST:bb\.[0-9]+]]
; CHECK: [[END:bb\.[0-9]+]].{{[a-zA-Z0-9.]+}}:
+; CHECK: $x0 = COPY [[A]]
; CHECK: [[CAST]].{{[a-zA-Z0-9.]+}}:
-; CHECK: {{%[0-9]+}}:_(p0) = COPY [[A]]
; CHECK: G_BR %[[END]]
define i64* @trivial_bitcast_with_copy(i8* %a) {
br label %cast
@@ -2147,3 +2147,15 @@ define i32 @test_atomicrmw_umax(i256* %addr) {
%oldval.trunc = trunc i256 %oldval to i32
ret i32 %oldval.trunc
}
+
+@addr = global i8* null
+
+define void @test_blockaddress() {
+; CHECK-LABEL: name: test_blockaddress
+; CHECK: [[BADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block)
+; CHECK: G_STORE [[BADDR]](p0)
+ store i8* blockaddress(@test_blockaddress, %block), i8** @addr
+ indirectbr i8* blockaddress(@test_blockaddress, %block), [label %block]
+block:
+ ret void
+}
diff --git a/test/CodeGen/AArch64/GlobalISel/irtranslator-block-order.ll b/test/CodeGen/AArch64/GlobalISel/irtranslator-block-order.ll
new file mode 100644
index 000000000000..89e2fecbe0f5
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/irtranslator-block-order.ll
@@ -0,0 +1,19 @@
+; RUN: llc -O0 -o - %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; CHECK-LABEL: testfn
+; CHECK: ret
+define void @testfn() {
+start:
+ br label %bb2
+
+bb1:
+ store i8 %0, i8* undef, align 4
+ ret void
+
+bb2:
+ %0 = extractvalue { i32, i8 } undef, 1
+ br label %bb1
+}
+
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir b/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir
new file mode 100644
index 000000000000..60b47db98ed6
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir
@@ -0,0 +1,45 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer %s -o - | FileCheck %s
+--- |
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64-none-linux-gnu"
+
+ @addr = global i8* null
+
+ define void @test_blockaddress() {
+ store i8* blockaddress(@test_blockaddress, %block), i8** @addr
+ indirectbr i8* blockaddress(@test_blockaddress, %block), [label %block]
+
+ block: ; preds = %0
+ ret void
+ }
+
+...
+---
+name: test_blockaddress
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ ; CHECK-LABEL: name: test_blockaddress
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: [[BLOCK_ADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block)
+ ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @addr
+ ; CHECK: G_STORE [[BLOCK_ADDR]](p0), [[GV]](p0) :: (store 8 into @addr)
+ ; CHECK: G_BRINDIRECT [[BLOCK_ADDR]](p0)
+ ; CHECK: bb.1.block (address-taken):
+ ; CHECK: RET_ReallyLR
+ bb.1 (%ir-block.0):
+ %0:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block)
+ %1:_(p0) = G_GLOBAL_VALUE @addr
+ G_STORE %0(p0), %1(p0) :: (store 8 into @addr)
+ G_BRINDIRECT %0(p0)
+
+ bb.2.block (address-taken):
+ RET_ReallyLR
+
+...
+
diff --git a/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir b/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
new file mode 100644
index 000000000000..43e77eba48f3
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select -code-model=large %s | FileCheck %s --check-prefix=LARGE
+--- |
+ ; ModuleID = 'blockaddress.ll'
+ source_filename = "blockaddress.ll"
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64-none-linux-gnu"
+
+ @addr = global i8* null
+
+ define void @test_blockaddress() {
+ store i8* blockaddress(@test_blockaddress, %block), i8** @addr
+ indirectbr i8* blockaddress(@test_blockaddress, %block), [label %block]
+
+ block: ; preds = %0
+ ret void
+ }
+
+...
+---
+name: test_blockaddress
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+body: |
+ ; CHECK-LABEL: name: test_blockaddress
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: [[MOVaddrBA:%[0-9]+]]:gpr64 = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block)
+ ; CHECK: [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @addr, target-flags(aarch64-pageoff, aarch64-nc) @addr
+ ; CHECK: STRXui [[MOVaddrBA]], [[MOVaddr]], 0 :: (store 8 into @addr)
+ ; CHECK: BR [[MOVaddrBA]]
+ ; CHECK: bb.1.block (address-taken):
+ ; CHECK: RET_ReallyLR
+ ; LARGE-LABEL: name: test_blockaddress
+ ; LARGE: bb.0 (%ir-block.0):
+ ; LARGE: successors: %bb.1(0x80000000)
+ ; LARGE: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 0
+ ; LARGE: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 16
+ ; LARGE: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 32
+ ; LARGE: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) blockaddress(@test_blockaddress, %ir-block.block), 48
+ ; LARGE: [[MOVZXi1:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @addr, 0
+ ; LARGE: [[MOVKXi3:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi1]], target-flags(aarch64-g1, aarch64-nc) @addr, 16
+ ; LARGE: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @addr, 32
+ ; LARGE: [[MOVKXi5:%[0-9]+]]:gpr64common = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @addr, 48
+ ; LARGE: STRXui [[MOVKXi2]], [[MOVKXi5]], 0 :: (store 8 into @addr)
+ ; LARGE: BR [[MOVKXi2]]
+ ; LARGE: bb.1.block (address-taken):
+ ; LARGE: RET_ReallyLR
+ bb.1 (%ir-block.0):
+ %0:gpr(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block)
+ %1:gpr(p0) = G_GLOBAL_VALUE @addr
+ G_STORE %0(p0), %1(p0) :: (store 8 into @addr)
+ G_BRINDIRECT %0(p0)
+
+ bb.2.block (address-taken):
+ RET_ReallyLR
+
+...
diff --git a/test/CodeGen/AArch64/O3-pipeline.ll b/test/CodeGen/AArch64/O3-pipeline.ll
index e482682fc9d9..f0c7e4e67c12 100644
--- a/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/test/CodeGen/AArch64/O3-pipeline.ll
@@ -154,6 +154,8 @@
; CHECK-NEXT: Insert fentry calls
; CHECK-NEXT: Insert XRay ops
; CHECK-NEXT: Implement the 'patchable-function' attribute
+; CHECK-NEXT: Machine Outliner
+; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: AArch64 Assembly Printer
diff --git a/test/CodeGen/AArch64/arm64-cse.ll b/test/CodeGen/AArch64/arm64-cse.ll
index 030857df7779..36aa036c3c0f 100644
--- a/test/CodeGen/AArch64/arm64-cse.ll
+++ b/test/CodeGen/AArch64/arm64-cse.ll
@@ -12,7 +12,7 @@ entry:
; CHECK-NOT: sub
; CHECK: b.ge
; CHECK: sub
-; CHECK: sub
+; CHECK-NEXT: add
; CHECK-NOT: sub
; CHECK: ret
%0 = load i32, i32* %offset, align 4
diff --git a/test/CodeGen/AArch64/arm64-memset-to-bzero.ll b/test/CodeGen/AArch64/arm64-memset-to-bzero.ll
index 0afe1c006b0f..ab819a42729a 100644
--- a/test/CodeGen/AArch64/arm64-memset-to-bzero.ll
+++ b/test/CodeGen/AArch64/arm64-memset-to-bzero.ll
@@ -1,6 +1,6 @@
-; RUN: llc %s -mtriple=arm64-apple-darwin -o - | \
-; RUN: FileCheck --check-prefixes=CHECK,CHECK-DARWIN %s
-; RUN: llc %s -mtriple=arm64-linux-gnu -o - | \
+; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-apple-darwin -o - \
+; RUN: | FileCheck --check-prefixes=CHECK,CHECK-DARWIN %s
+; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-linux-gnu -o - | \
; RUN: FileCheck --check-prefixes=CHECK,CHECK-LINUX %s
; <rdar://problem/14199482> ARM64: Calls to bzero() replaced with calls to memset()
diff --git a/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll b/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll
index 7efb4bf6d596..79cf99008433 100644
--- a/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll
+++ b/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll
@@ -1,9 +1,11 @@
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -pass-remarks-analysis=asm-printer \
+; RUN: -verify-machineinstrs \
; RUN: -pass-remarks-with-hotness=1 -asm-verbose=0 \
; RUN: -debug-only=lazy-machine-block-freq,block-freq \
; RUN: -debug-pass=Executions 2>&1 | FileCheck %s -check-prefix=HOTNESS
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -pass-remarks-analysis=asm-printer \
+; RUN: -verify-machineinstrs \
; RUN: -pass-remarks-with-hotness=0 -asm-verbose=0 \
; RUN: -debug-only=lazy-machine-block-freq,block-freq \
; RUN: -debug-pass=Executions 2>&1 | FileCheck %s -check-prefix=NO_HOTNESS
@@ -26,8 +28,10 @@
; requested. (This hard-codes the previous pass to the Assembly Printer,
; please adjust accordingly.)
-; HOTNESS: Executing Pass 'Implement the 'patchable-function' attribute'
-; HOTNESS-NEXT: Freeing Pass 'Implement the 'patchable-function' attribute'
+; HOTNESS: Freeing Pass 'Machine Outliner'
+; HOTNESS-NEXT: Executing Pass 'Function Pass Manager'
+; HOTNESS-NEXT: Executing Pass 'Verify generated machine code'
+; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code'
; HOTNESS-NEXT: Executing Pass 'Lazy Machine Block Frequency Analysis'
; HOTNESS-NEXT: Executing Pass 'Machine Optimization Remark Emitter'
; HOTNESS-NEXT: Building MachineBlockFrequencyInfo on the fly
@@ -41,8 +45,10 @@
; HOTNESS: arm64-summary-remarks.ll:5:0: 1 instructions in function (hotness: 33)
-; NO_HOTNESS: Executing Pass 'Implement the 'patchable-function' attribute'
-; NO_HOTNESS-NEXT: Freeing Pass 'Implement the 'patchable-function' attribute'
+; NO_HOTNESS: Freeing Pass 'Machine Outliner'
+; NO_HOTNESS-NEXT: Executing Pass 'Function Pass Manager'
+; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code'
+; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code'
; NO_HOTNESS-NEXT: Executing Pass 'Lazy Machine Block Frequency Analysis'
; NO_HOTNESS-NEXT: Executing Pass 'Machine Optimization Remark Emitter'
; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Assembly Printer'
diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll
index b39cea1f6192..691cbcf1a5df 100644
--- a/test/CodeGen/AArch64/cond-sel.ll
+++ b/test/CodeGen/AArch64/cond-sel.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -enable-machine-outliner=never -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s
+; RUN: llc -enable-machine-outliner=never -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
@var32 = global i32 0
@var64 = global i64 0
diff --git a/test/CodeGen/AArch64/machine-outliner-default.mir b/test/CodeGen/AArch64/machine-outliner-default.mir
new file mode 100644
index 000000000000..698a2fc55b58
--- /dev/null
+++ b/test/CodeGen/AArch64/machine-outliner-default.mir
@@ -0,0 +1,71 @@
+# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner \
+# RUN: -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+ define void @outline_1() #0 { ret void }
+ define void @outline_2() #0 { ret void }
+ define void @outline_3() #0 { ret void }
+ define void @dont_outline() #1 { ret void }
+
+ attributes #0 = { noredzone minsize optsize }
+ attributes #1 = { noredzone }
+...
+---
+
+name: outline_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: bb.0:
+ ; CHECK: OUTLINED
+ liveins: $w8, $wzr
+ $w8 = ORRWri $wzr, 1
+ $w8 = ORRWri $wzr, 2
+ $w8 = ORRWri $wzr, 3
+ $w8 = ORRWri $wzr, 4
+ RET undef $lr
+...
+---
+
+name: outline_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: bb.0:
+ ; CHECK: OUTLINED
+ liveins: $w8, $wzr
+ $w8 = ORRWri $wzr, 1
+ $w8 = ORRWri $wzr, 2
+ $w8 = ORRWri $wzr, 3
+ $w8 = ORRWri $wzr, 4
+ RET undef $lr
+...
+---
+
+name: outline_3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: bb.0:
+ ; CHECK: OUTLINED
+ liveins: $w8, $wzr
+ $w8 = ORRWri $wzr, 1
+ $w8 = ORRWri $wzr, 2
+ $w8 = ORRWri $wzr, 3
+ $w8 = ORRWri $wzr, 4
+ RET undef $lr
+...
+---
+
+name: dont_outline
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: bb.0:
+ ; CHECK-NOT: BL
+ liveins: $w8, $wzr
+ $w8 = ORRWri $wzr, 1
+ $w8 = ORRWri $wzr, 2
+ $w8 = ORRWri $wzr, 3
+ $w8 = ORRWri $wzr, 4
+ RET undef $lr
diff --git a/test/CodeGen/AArch64/machine-outliner-flags.ll b/test/CodeGen/AArch64/machine-outliner-flags.ll
index e00a19099cf2..c435093b794e 100644
--- a/test/CodeGen/AArch64/machine-outliner-flags.ll
+++ b/test/CodeGen/AArch64/machine-outliner-flags.ll
@@ -14,7 +14,7 @@
; RUN: | FileCheck %s -check-prefix=NEVER
; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
-; RUN: -mtriple arm64---- -o /dev/null 2>&1 \
+; RUN: --debug-only=machine-outliner -mtriple arm64---- -o /dev/null 2>&1 \
; RUN: | FileCheck %s -check-prefix=NOT-ADDED
; RUN: llc %s -O=0 -debug-pass=Structure -verify-machineinstrs \
@@ -27,10 +27,11 @@
; Cases where it should be added:
; * -enable-machine-outliner
; * -enable-machine-outliner=always
+; * -enable-machine-outliner is not passed (AArch64 supports
+; target-default outlining)
;
; Cases where it should not be added:
; * -O0 or equivalent
-; * -enable-machine-outliner is not passed
; * -enable-machine-outliner=never is passed
; ALWAYS: Machine Outliner
@@ -38,7 +39,8 @@
; ENABLE: Machine Outliner
; ENABLE: Machine Outliner: Running on all functions
; NEVER-NOT: Machine Outliner
-; NOT-ADDED-NOT: Machine Outliner
+; NOT-ADDED: Machine Outliner
+; NOT-ADDED: Machine Outliner: Running on target-default functions
; OPTNONE-NOT: Machine Outliner
define void @foo() {
diff --git a/test/CodeGen/AArch64/machine-outliner-regsave.mir b/test/CodeGen/AArch64/machine-outliner-regsave.mir
new file mode 100644
index 000000000000..6d00bd39cde7
--- /dev/null
+++ b/test/CodeGen/AArch64/machine-outliner-regsave.mir
@@ -0,0 +1,112 @@
+# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=prologepilog \
+# RUN: -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s
+# Check that we save LR to a callee-saved register when possible.
+# foo() should use a callee-saved register. However, bar() should not.
+--- |
+
+ define void @foo() #0 {
+ ret void
+ }
+
+ define void @bar() #0 {
+ ret void
+ }
+
+ attributes #0 = { minsize noinline noredzone "no-frame-pointer-elim"="true" }
+...
+---
+# Make sure that when we outline and a register is available, we
+# use it to save + restore LR instead of SP.
+# CHECK: name: foo
+# CHECK-DAG: bb.0
+# CHECK-DAG: $x[[REG:[0-9]+]] = ORRXrs $xzr, $lr, 0
+# CHECK-NEXT: BL
+# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
+# CHECK-DAG: bb.1
+# CHECK-DAG: $x[[REG]] = ORRXrs $xzr, $lr, 0
+# CHECK-NEXT: BL
+# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
+# CHECK-DAG: bb.2
+# CHECK-DAG: $x[[REG]] = ORRXrs $xzr, $lr, 0
+# CHECK-NEXT: BL
+# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
+name: foo
+tracksRegLiveness: true
+fixedStack:
+body: |
+ bb.0:
+ liveins: $lr, $w9
+ $x25 = ORRXri $xzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 2
+ bb.1:
+ liveins: $lr, $w9
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 2
+ bb.2:
+ liveins: $lr, $w9
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 2
+ RET undef $lr
+
+...
+---
+# Convoluted case that shows that we'll still save to the stack when there are
+# no approprate registers available.
+# The live-in lists do not contain x16 or x17 since including them would cause
+# nothing to be outlined.
+# They also deliberately don't contain x18 to show that on Darwin we won't store
+# to that.
+# CHECK-LABEL: name: bar
+# CHECK: early-clobber $sp = STRXpre $lr, $sp, -16
+# CHECK-NEXT: BL
+# CHECK-DAG: early-clobber $sp, $lr = LDRXpost $sp, 16
+# CHECK: early-clobber $sp = STRXpre $lr, $sp, -16
+# CHECK-NEXT: BL
+# CHECK-DAG: early-clobber $sp, $lr = LDRXpost $sp, 16
+# CHECK: early-clobber $sp = STRXpre $lr, $sp, -16
+# CHECK-NEXT: BL
+# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16
+name: bar
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w12 = ORRWri $wzr, 2
+ bb.1:
+ liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w12 = ORRWri $wzr, 2
+ bb.2:
+ liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w12 = ORRWri $wzr, 2
+ bb.3:
+ liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28
+ RET undef $lr
+
diff --git a/test/CodeGen/AArch64/machine-outliner.ll b/test/CodeGen/AArch64/machine-outliner.ll
index 1b45409b799a..9d922c27f884 100644
--- a/test/CodeGen/AArch64/machine-outliner.ll
+++ b/test/CodeGen/AArch64/machine-outliner.ll
@@ -82,17 +82,17 @@ define void @dog() #0 {
; CHECK: .p2align 2
; CHECK-NEXT: [[OUTLINED]]:
; CHECK: orr w8, wzr, #0x1
-; CHECK-NEXT: str w8, [sp, #44]
+; CHECK-NEXT: str w8, [sp, #28]
; CHECK-NEXT: orr w8, wzr, #0x2
-; CHECK-NEXT: str w8, [sp, #40]
+; CHECK-NEXT: str w8, [sp, #24]
; CHECK-NEXT: orr w8, wzr, #0x3
-; CHECK-NEXT: str w8, [sp, #36]
+; CHECK-NEXT: str w8, [sp, #20]
; CHECK-NEXT: orr w8, wzr, #0x4
-; CHECK-NEXT: str w8, [sp, #32]
+; CHECK-NEXT: str w8, [sp, #16]
; CHECK-NEXT: mov w8, #5
-; CHECK-NEXT: str w8, [sp, #28]
+; CHECK-NEXT: str w8, [sp, #12]
; CHECK-NEXT: orr w8, wzr, #0x6
-; CHECK-NEXT: str w8, [sp, #24]
+; CHECK-NEXT: str w8, [sp, #8]
; CHECK-NEXT: ret
attributes #0 = { noredzone "target-cpu"="cyclone" }
diff --git a/test/CodeGen/AArch64/machine-outliner.mir b/test/CodeGen/AArch64/machine-outliner.mir
index 024bee47075b..bd1abdccd44c 100644
--- a/test/CodeGen/AArch64/machine-outliner.mir
+++ b/test/CodeGen/AArch64/machine-outliner.mir
@@ -28,19 +28,19 @@
# CHECK-LABEL: name: main
# CHECK: BL @OUTLINED_FUNCTION_[[F0:[0-9]+]]
-# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16
+# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG:[0-9]+]], 0
# CHECK-NEXT: $x16 = ADDXri $sp, 48, 0
# CHECK-NEXT: STRHHroW $w16, $x9, $w30, 1, 1
# CHECK-NEXT: $lr = ORRXri $xzr, 1
# CHECK: BL @OUTLINED_FUNCTION_[[F0]]
-# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16
+# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
# CHECK-NEXT: $x16 = ADDXri $sp, 48, 0
# CHECK-NEXT: STRHHroW $w16, $x9, $w30, 1, 1
# CHECK-NEXT: $lr = ORRXri $xzr, 1
# CHECK: BL @OUTLINED_FUNCTION_[[F0]]
-# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16
+# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
# CHECK-NEXT: $x16 = ADDXri $sp, 48, 0
# CHECK-NEXT: STRHHroW $w16, $x9, $w30, 1, 1
# CHECK-NEXT: $lr = ORRXri $xzr, 1
diff --git a/test/CodeGen/AArch64/max-jump-table.ll b/test/CodeGen/AArch64/max-jump-table.ll
index 612eba8f2ceb..44dde7b1cd06 100644
--- a/test/CodeGen/AArch64/max-jump-table.ll
+++ b/test/CodeGen/AArch64/max-jump-table.ll
@@ -89,6 +89,7 @@ entry:
; CHECKM1-NOT: %jump-table.1
; CHECKM3-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.5 %bb.6{{$}}
; CHECKM3-NOT: %jump-table.1
+; CHECK-DAG: End machine code for function jt2.
bb1: tail call void @ext(i32 1) br label %return
bb2: tail call void @ext(i32 2) br label %return
diff --git a/test/CodeGen/AArch64/rotate-extract.ll b/test/CodeGen/AArch64/rotate-extract.ll
index 4f5313ca4a77..41664294b1c2 100644
--- a/test/CodeGen/AArch64/rotate-extract.ll
+++ b/test/CodeGen/AArch64/rotate-extract.ll
@@ -11,9 +11,8 @@
define i64 @ror_extract_shl(i64 %i) nounwind {
; CHECK-LABEL: ror_extract_shl:
; CHECK: // %bb.0:
-; CHECK-NEXT: lsl x8, x0, #10
-; CHECK-NEXT: bfxil x8, x0, #54, #7
-; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: lsl x8, x0, #3
+; CHECK-NEXT: ror x0, x8, #57
; CHECK-NEXT: ret
%lhs_mul = shl i64 %i, 3
%rhs_mul = shl i64 %i, 10
@@ -25,8 +24,8 @@ define i64 @ror_extract_shl(i64 %i) nounwind {
define i32 @ror_extract_shrl(i32 %i) nounwind {
; CHECK-LABEL: ror_extract_shrl:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror w8, w0, #7
-; CHECK-NEXT: and w0, w8, #0xf1ffffff
+; CHECK-NEXT: lsr w8, w0, #3
+; CHECK-NEXT: ror w0, w8, #4
; CHECK-NEXT: ret
%lhs_div = lshr i32 %i, 7
%rhs_div = lshr i32 %i, 3
@@ -54,8 +53,8 @@ define i64 @ror_extract_udiv(i64 %i) nounwind {
; CHECK-NEXT: mov x8, #-6148914691236517206
; CHECK-NEXT: movk x8, #43691
; CHECK-NEXT: umulh x8, x0, x8
-; CHECK-NEXT: ror x8, x8, #5
-; CHECK-NEXT: and x0, x8, #0xf7ffffffffffffff
+; CHECK-NEXT: lsr x8, x8, #1
+; CHECK-NEXT: ror x0, x8, #4
; CHECK-NEXT: ret
%lhs_div = udiv i64 %i, 3
%rhs_div = udiv i64 %i, 48
@@ -67,11 +66,9 @@ define i64 @ror_extract_udiv(i64 %i) nounwind {
define i64 @ror_extract_mul_with_mask(i64 %i) nounwind {
; CHECK-LABEL: ror_extract_mul_with_mask:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w0, lsl #3
-; CHECK-NEXT: lsl w8, w8, #7
-; CHECK-NEXT: add x9, x0, x0, lsl #3
-; CHECK-NEXT: and x0, x8, #0x80
-; CHECK-NEXT: bfxil x0, x9, #57, #7
+; CHECK-NEXT: add x8, x0, x0, lsl #3
+; CHECK-NEXT: ror x8, x8, #57
+; CHECK-NEXT: and x0, x8, #0xff
; CHECK-NEXT: ret
%lhs_mul = mul i64 %i, 1152
%rhs_mul = mul i64 %i, 9
diff --git a/test/CodeGen/AArch64/signbit-shift.ll b/test/CodeGen/AArch64/signbit-shift.ll
index b554ce15872c..250290aa2348 100644
--- a/test/CodeGen/AArch64/signbit-shift.ll
+++ b/test/CodeGen/AArch64/signbit-shift.ll
@@ -150,8 +150,8 @@ define i32 @sext_ifneg(i32 %x) {
define i32 @add_sext_ifneg(i32 %x) {
; CHECK-LABEL: add_sext_ifneg:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #42
-; CHECK-NEXT: sub w0, w8, w0, lsr #31
+; CHECK-NEXT: asr w8, w0, #31
+; CHECK-NEXT: add w0, w8, #42 // =42
; CHECK-NEXT: ret
%c = icmp slt i32 %x, 0
%e = sext i1 %c to i32
@@ -225,7 +225,7 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
define i32 @sub_lshr(i32 %x, i32 %y) {
; CHECK-LABEL: sub_lshr:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w0, w1, w0, lsr #31
+; CHECK-NEXT: add w0, w1, w0, asr #31
; CHECK-NEXT: ret
%sh = lshr i32 %x, 31
%r = sub i32 %y, %sh
@@ -235,8 +235,8 @@ define i32 @sub_lshr(i32 %x, i32 %y) {
define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: sub_lshr_vec:
; CHECK: // %bb.0:
-; CHECK-NEXT: ushr v0.4s, v0.4s, #31
-; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ssra v1.4s, v0.4s, #31
+; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%r = sub <4 x i32> %y, %sh
@@ -246,8 +246,8 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
define i32 @sub_const_op_lshr(i32 %x) {
; CHECK-LABEL: sub_const_op_lshr:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #43
-; CHECK-NEXT: sub w0, w8, w0, lsr #31
+; CHECK-NEXT: asr w8, w0, #31
+; CHECK-NEXT: add w0, w8, #43 // =43
; CHECK-NEXT: ret
%sh = lshr i32 %x, 31
%r = sub i32 43, %sh
@@ -257,9 +257,9 @@ define i32 @sub_const_op_lshr(i32 %x) {
define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) {
; CHECK-LABEL: sub_const_op_lshr_vec:
; CHECK: // %bb.0:
-; CHECK-NEXT: ushr v0.4s, v0.4s, #31
; CHECK-NEXT: movi v1.4s, #42
-; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ssra v1.4s, v0.4s, #31
+; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh
diff --git a/test/CodeGen/AMDGPU/bfi_int.ll b/test/CodeGen/AMDGPU/bfi_int.ll
index 77c5e53481e7..66f8a2b111a5 100644
--- a/test/CodeGen/AMDGPU/bfi_int.ll
+++ b/test/CodeGen/AMDGPU/bfi_int.ll
@@ -54,8 +54,8 @@ entry:
; FUNC-LABEL: {{^}}v_bitselect_v2i32_pat1:
; GCN: s_waitcnt
-; GCN-NEXT: v_bfi_b32 v1, v3, v1, v5
; GCN-NEXT: v_bfi_b32 v0, v2, v0, v4
+; GCN-NEXT: v_bfi_b32 v1, v3, v1, v5
; GCN-NEXT: s_setpc_b64
define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) {
%xor.0 = xor <2 x i32> %a, %mask
diff --git a/test/CodeGen/AMDGPU/call-argument-types.ll b/test/CodeGen/AMDGPU/call-argument-types.ll
index b0998355395d..2cea1414507b 100644
--- a/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -17,16 +17,27 @@ declare void @external_void_func_i16_zeroext(i16 zeroext) #0
declare void @external_void_func_i32(i32) #0
declare void @external_void_func_i64(i64) #0
+declare void @external_void_func_v2i64(<2 x i64>) #0
+declare void @external_void_func_v3i64(<3 x i64>) #0
+declare void @external_void_func_v4i64(<4 x i64>) #0
declare void @external_void_func_f16(half) #0
declare void @external_void_func_f32(float) #0
declare void @external_void_func_f64(double) #0
+declare void @external_void_func_v2f32(<2 x float>) #0
+declare void @external_void_func_v2f64(<2 x double>) #0
+declare void @external_void_func_v3f64(<3 x double>) #0
declare void @external_void_func_v2i16(<2 x i16>) #0
declare void @external_void_func_v2f16(<2 x half>) #0
+declare void @external_void_func_v3i16(<3 x i16>) #0
+declare void @external_void_func_v3f16(<3 x half>) #0
+declare void @external_void_func_v4i16(<4 x i16>) #0
+declare void @external_void_func_v4f16(<4 x half>) #0
declare void @external_void_func_v2i32(<2 x i32>) #0
declare void @external_void_func_v3i32(<3 x i32>) #0
+declare void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
declare void @external_void_func_v4i32(<4 x i32>) #0
declare void @external_void_func_v8i32(<8 x i32>) #0
declare void @external_void_func_v16i32(<16 x i32>) #0
@@ -255,6 +266,57 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
ret void
}
+; GCN-LABEL: {{^}}test_call_external_void_func_v2i64:
+; GCN: buffer_load_dwordx4 v[0:3]
+; GCN: s_waitcnt
+; GCN-NEXT: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
+ %val = load <2 x i64>, <2 x i64> addrspace(1)* null
+ call void @external_void_func_v2i64(<2 x i64> %val)
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_call_external_void_func_v2i64_imm:
+; GCN-DAG: v_mov_b32_e32 v0, 1
+; GCN-DAG: v_mov_b32_e32 v1, 2
+; GCN-DAG: v_mov_b32_e32 v2, 3
+; GCN-DAG: v_mov_b32_e32 v3, 4
+; GCN: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
+ call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_call_external_void_func_v3i64:
+; GCN: buffer_load_dwordx4 v[0:3]
+; GCN: v_mov_b32_e32 v4, 1
+; GCN: v_mov_b32_e32 v5, 2
+; GCN: s_waitcnt
+; GCN-NEXT: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
+ %load = load <2 x i64>, <2 x i64> addrspace(1)* null
+ %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
+
+ call void @external_void_func_v3i64(<3 x i64> %val)
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_call_external_void_func_v4i64:
+; GCN: buffer_load_dwordx4 v[0:3]
+; GCN-DAG: v_mov_b32_e32 v4, 1
+; GCN-DAG: v_mov_b32_e32 v5, 2
+; GCN-DAG: v_mov_b32_e32 v6, 3
+; GCN-DAG: v_mov_b32_e32 v7, 4
+
+; GCN: s_waitcnt
+; GCN-NEXT: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
+ %load = load <2 x i64>, <2 x i64> addrspace(1)* null
+ %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @external_void_func_v4i64(<4 x i64> %val)
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_external_void_func_f16_imm:
; VI: v_mov_b32_e32 v0, 0x4400
; CI: v_mov_b32_e32 v0, 4.0
@@ -274,6 +336,15 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
ret void
}
+; GCN-LABEL: {{^}}test_call_external_void_func_v2f32_imm:
+; GCN-DAG: v_mov_b32_e32 v0, 1.0
+; GCN-DAG: v_mov_b32_e32 v1, 2.0
+; GCN: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
+ call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_external_void_func_f64_imm:
; GCN: v_mov_b32_e32 v0, 0{{$}}
; GCN: v_mov_b32_e32 v1, 0x40100000
@@ -283,6 +354,30 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
ret void
}
+; GCN-LABEL: {{^}}test_call_external_void_func_v2f64_imm:
+; GCN: v_mov_b32_e32 v0, 0{{$}}
+; GCN: v_mov_b32_e32 v1, 2.0
+; GCN: v_mov_b32_e32 v2, 0{{$}}
+; GCN: v_mov_b32_e32 v3, 0x40100000
+; GCN: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
+ call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_call_external_void_func_v3f64_imm:
+; GCN-DAG: v_mov_b32_e32 v0, 0{{$}}
+; GCN-DAG: v_mov_b32_e32 v1, 2.0
+; GCN-DAG: v_mov_b32_e32 v2, 0{{$}}
+; GCN-DAG: v_mov_b32_e32 v3, 0x40100000
+; GCN-DAG: v_mov_b32_e32 v4, 0{{$}}
+; GCN-DAG: v_mov_b32_e32 v5, 0x40200000
+; GCN-DAG: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
+ call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_external_void_func_v2i16:
; GFX9: buffer_load_dword v0
; GFX9-NOT: v0
@@ -293,6 +388,49 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
ret void
}
+; GCN-LABEL: {{^}}test_call_external_void_func_v3i16:
+; GFX9: buffer_load_dwordx2 v[0:1]
+; GFX9-NOT: v0
+; GFX9-NOT: v1
+; GFX9: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
+ %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
+ call void @external_void_func_v3i16(<3 x i16> %val)
+ ret void
+}
+
+; FIXME: materialize constant directly in VGPR
+; GCN-LABEL: {{^}}test_call_external_void_func_v3i16_imm:
+; GFX9-DAG: s_mov_b32 [[K01:s[0-9]+]], 0x20001
+; GFX9-DAG: s_pack_ll_b32_b16 [[K23:s[0-9]+]], 3, s{{[0-9]+}}
+; GFX9: v_mov_b32_e32 v0, [[K01]]
+; GFX9: v_mov_b32_e32 v1, [[K23]]
+; GFX9: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
+ call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_call_external_void_func_v4i16:
+; GFX9: buffer_load_dwordx2 v[0:1]
+; GFX9-NOT: v0
+; GFX9-NOT: v1
+; GFX9: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
+ %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
+ call void @external_void_func_v4i16(<4 x i16> %val)
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_call_external_void_func_v4i16_imm:
+; GFX9-DAG: v_mov_b32_e32 v0, 0x20001
+; GFX9-DAG: v_mov_b32_e32 v1, 0x40003
+; GFX9: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
+ call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_external_void_func_v2f16:
; GFX9: buffer_load_dword v0
; GFX9-NOT: v0
@@ -313,15 +451,23 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
ret void
}
-; FIXME: Passing 4th
+; GCN-LABEL: {{^}}test_call_external_void_func_v2i32_imm:
+; GCN-DAG: v_mov_b32_e32 v0, 1
+; GCN-DAG: v_mov_b32_e32 v1, 2
+; GCN: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
+ call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_external_void_func_v3i32_imm:
; HSA-DAG: s_mov_b32 s33, s9
; MESA-DAG: s_mov_b32 s33, s3{{$}}
-; GCN-DAG: v_mov_b32_e32 v0
-; GCN-DAG: v_mov_b32_e32 v1
-; GCN-DAG: v_mov_b32_e32 v2
-; GCN-DAG: v_mov_b32_e32 v3
+; GCN-DAG: v_mov_b32_e32 v0, 3
+; GCN-DAG: v_mov_b32_e32 v1, 4
+; GCN-DAG: v_mov_b32_e32 v2, 5
+; GCN-NOT: v3
; GCN: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
@@ -329,6 +475,16 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
ret void
}
+; GCN-LABEL: {{^}}test_call_external_void_func_v3i32_i32:
+; GCN-DAG: v_mov_b32_e32 v0, 3
+; GCN-DAG: v_mov_b32_e32 v1, 4
+; GCN-DAG: v_mov_b32_e32 v2, 5
+; GCN-DAG: v_mov_b32_e32 v3, 6
+define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
+ call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_external_void_func_v4i32:
; GCN: buffer_load_dwordx4 v[0:3]
; GCN: s_waitcnt
@@ -339,6 +495,17 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
ret void
}
+; GCN-LABEL: {{^}}test_call_external_void_func_v4i32_imm:
+; GCN-DAG: v_mov_b32_e32 v0, 1
+; GCN-DAG: v_mov_b32_e32 v1, 2
+; GCN-DAG: v_mov_b32_e32 v2, 3
+; GCN-DAG: v_mov_b32_e32 v3, 4
+; GCN: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
+ call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_external_void_func_v8i32:
; GCN-DAG: buffer_load_dwordx4 v[0:3], off
; GCN-DAG: buffer_load_dwordx4 v[4:7], off
@@ -351,6 +518,21 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
ret void
}
+; GCN-LABEL: {{^}}test_call_external_void_func_v8i32_imm:
+; GCN-DAG: v_mov_b32_e32 v0, 1
+; GCN-DAG: v_mov_b32_e32 v1, 2
+; GCN-DAG: v_mov_b32_e32 v2, 3
+; GCN-DAG: v_mov_b32_e32 v3, 4
+; GCN-DAG: v_mov_b32_e32 v4, 5
+; GCN-DAG: v_mov_b32_e32 v5, 6
+; GCN-DAG: v_mov_b32_e32 v6, 7
+; GCN-DAG: v_mov_b32_e32 v7, 8
+; GCN: s_swappc_b64
+define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
+ call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_external_void_func_v16i32:
; GCN-DAG: buffer_load_dwordx4 v[0:3], off
; GCN-DAG: buffer_load_dwordx4 v[4:7], off
diff --git a/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index a4db46b47500..d72dbf9e6ec0 100644
--- a/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -225,7 +225,7 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(<2 x
}
; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32:
-; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, -v{{[0-9]+}}
+; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, -1.0, v{{[0-9]+}}
; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index 7cc556ce168d..52d891964c48 100644
--- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -1,5 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s
+; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
declare half @llvm.fabs.f16(half) #0
declare half @llvm.canonicalize.f16(half) #0
@@ -9,18 +10,21 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
; GCN-LABEL: {{^}}v_test_canonicalize_var_f16:
-; GCN: v_max_f16_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_max_f16_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
+
+; CI: v_cvt_f32_f16_e32
+; CI: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
define amdgpu_kernel void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 {
%val = load half, half addrspace(1)* %out
%canonicalized = call half @llvm.canonicalize.f16(half %val)
- store half %canonicalized, half addrspace(1)* %out
+ store half %canonicalized, half addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}s_test_canonicalize_var_f16:
-; GCN: v_max_f16_e64 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_max_f16_e64 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @s_test_canonicalize_var_f16(half addrspace(1)* %out, i16 zeroext %val.arg) #1 {
%val = bitcast i16 %val.arg to half
%canonicalized = call half @llvm.canonicalize.f16(half %val)
@@ -29,8 +33,8 @@ define amdgpu_kernel void @s_test_canonicalize_var_f16(half addrspace(1)* %out,
}
; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f16:
-; GCN: v_max_f16_e64 [[REG:v[0-9]+]], |{{v[0-9]+}}|, |{{v[0-9]+}}|
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_max_f16_e64 [[REG:v[0-9]+]], |{{v[0-9]+}}|, |{{v[0-9]+}}|
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %out) #1 {
%val = load half, half addrspace(1)* %out
%val.fabs = call half @llvm.fabs.f16(half %val)
@@ -40,8 +44,11 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %
}
; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f16:
-; GCN: v_max_f16_e64 [[REG:v[0-9]+]], -|{{v[0-9]+}}|, -|{{v[0-9]+}}|
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_max_f16_e64 [[REG:v[0-9]+]], -|{{v[0-9]+}}|, -|{{v[0-9]+}}|
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
+
+; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
+; CI: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 {
%val = load half, half addrspace(1)* %out
%val.fabs = call half @llvm.fabs.f16(half %val)
@@ -52,8 +59,11 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(
}
; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f16:
-; GCN: v_max_f16_e64 [[REG:v[0-9]+]], -{{v[0-9]+}}, -{{v[0-9]+}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_max_f16_e64 [[REG:v[0-9]+]], -{{v[0-9]+}}, -{{v[0-9]+}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
+
+; CI: v_cvt_f32_f16_e64 {{v[0-9]+}}, -{{v[0-9]+}}
+; CI: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 {
%val = load half, half addrspace(1)* %out
%val.fneg = fsub half -0.0, %val
@@ -62,9 +72,35 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %
ret void
}
+; GCN-LABEL: {{^}}v_test_no_denormals_canonicalize_fneg_var_f16:
+; GFX89: v_mul_f16_e32 [[REG:v[0-9]+]], -1.0, v{{[0-9]+}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
+define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half addrspace(1)* %out) #2 {
+ %val = load half, half addrspace(1)* %out
+ %val.fneg = fsub half -0.0, %val
+ %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
+ store half %canonicalized, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_no_denormals_canonicalize_fneg_fabs_var_f16:
+; GFX89: v_mul_f16_e64 [[REG:v[0-9]+]], -1.0, |v{{[0-9]+}}|
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
+
+; CI: v_cvt_f32_f16_e64 {{v[0-9]+}}, -|{{v[0-9]+}}|
+; CI: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
+define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #2 {
+ %val = load half, half addrspace(1)* %out
+ %val.fabs = call half @llvm.fabs.f16(half %val)
+ %val.fabs.fneg = fsub half -0.0, %val.fabs
+ %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
+ store half %canonicalized, half addrspace(1)* %out
+ ret void
+}
+
; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half 0.0)
store half %canonicalized, half addrspace(1)* %out
@@ -72,8 +108,8 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out
}
; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half -0.0)
store half %canonicalized, half addrspace(1)* %out
@@ -81,8 +117,8 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out
}
; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half 1.0)
store half %canonicalized, half addrspace(1)* %out
@@ -90,8 +126,8 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out
}
; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half -1.0)
store half %canonicalized, half addrspace(1)* %out
@@ -99,8 +135,8 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out
}
; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c00{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c00{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half 16.0)
store half %canonicalized, half addrspace(1)* %out
@@ -108,8 +144,8 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_f16(half addrspace(1)*
}
; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal0_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
store half %canonicalized, half addrspace(1)* %out
@@ -117,8 +153,8 @@ define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f1
}
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #3 {
%canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
store half %canonicalized, half addrspace(1)* %out
@@ -126,8 +162,8 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(half a
}
; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal1_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
store half %canonicalized, half addrspace(1)* %out
@@ -135,8 +171,8 @@ define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f1
}
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #3 {
%canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
store half %canonicalized, half addrspace(1)* %out
@@ -144,8 +180,8 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(half a
}
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c00{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c00{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half 0xH7C00)
store half %canonicalized, half addrspace(1)* %out
@@ -153,8 +189,8 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %o
}
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -1 to half))
store half %canonicalized, half addrspace(1)* %out
@@ -162,8 +198,8 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(half addrs
}
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -2 to half))
store half %canonicalized, half addrspace(1)* %out
@@ -171,8 +207,8 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(half addrs
}
; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half 0xH7C01)
store half %canonicalized, half addrspace(1)* %out
@@ -180,8 +216,8 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(half addrspace
}
; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half 0xH7DFF)
store half %canonicalized, half addrspace(1)* %out
@@ -189,8 +225,8 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(half addrspace
}
; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half 0xHFDFF)
store half %canonicalized, half addrspace(1)* %out
@@ -198,8 +234,8 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(half addrspace
}
; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
-; GCN: buffer_store_short [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
+; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1 {
%canonicalized = call half @llvm.canonicalize.f16(half 0xHFC01)
store half %canonicalized, half addrspace(1)* %out
@@ -212,7 +248,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace
; VI-NOT: v_and_b32
; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+$}}
-; GFX9: buffer_store_dword [[REG]]
+; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
@@ -230,7 +266,7 @@ define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)
; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], [[ABS]], [[ABS]]{{$}}
-; GCN: buffer_store_dword
+; GFX89: {{flat|global}}_store_dword
define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
@@ -248,7 +284,12 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspa
; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], [[ABS]], [[ABS]] neg_lo:[1,1] neg_hi:[1,1]{{$}}
-; GCN: buffer_store_dword
+; GFX89: {{flat|global}}_store_dword
+
+; CI: v_cvt_f32_f16
+; CI: v_cvt_f32_f16
+; CI: v_mul_f32_e32 v{{[0-9]+}}, 1.0
+; CI: v_mul_f32_e32 v{{[0-9]+}}, 1.0
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
@@ -266,7 +307,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> ad
; VI-NOT: 0xffff
; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} neg_lo:[1,1] neg_hi:[1,1]{{$}}
-; GFX9: buffer_store_dword [[REG]]
+; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
@@ -283,7 +324,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspa
; VI-NOT: v_and_b32
; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+$}}
-; GFX9: buffer_store_dword [[REG]]
+; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @s_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out, i32 zeroext %val.arg) #1 {
%val = bitcast i32 %val.arg to <2 x half>
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
@@ -292,8 +333,8 @@ define amdgpu_kernel void @s_test_canonicalize_var_v2f16(<2 x half> addrspace(1)
}
; GCN-LABEL: {{^}}test_fold_canonicalize_p0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> zeroinitializer)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -301,8 +342,8 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(<2 x half> addrspace(
}
; GCN-LABEL: {{^}}test_fold_canonicalize_n0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -0.0, half -0.0>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -310,8 +351,8 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(<2 x half> addrspace(
}
; GCN-LABEL: {{^}}test_fold_canonicalize_p1_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 1.0, half 1.0>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -319,8 +360,8 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(<2 x half> addrspace(
}
; GCN-LABEL: {{^}}test_fold_canonicalize_n1_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -1.0, half -1.0>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -328,8 +369,8 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(<2 x half> addrspace(
}
; GCN-LABEL: {{^}}test_fold_canonicalize_literal_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c004c00{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c004c00{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 16.0, half 16.0>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -337,8 +378,8 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(<2 x half> addrs
}
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -346,8 +387,8 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<
}
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #3 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -355,8 +396,8 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(<2 x
}
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -364,8 +405,8 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<
}
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #3 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -373,8 +414,8 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(<2 x
}
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c007c00{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c007c00{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C00, half 0xH7C00>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -382,8 +423,8 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(<2 x half> addrspac
}
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> bitcast (i32 -1 to <2 x half>))
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -391,8 +432,8 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(<2 x hal
}
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half bitcast (i16 -2 to half), half bitcast (i16 -2 to half)>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -400,8 +441,8 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(<2 x hal
}
; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C01, half 0xH7C01>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -409,8 +450,8 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(<2 x half> a
}
; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7DFF, half 0xH7DFF>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -418,8 +459,8 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(<2 x half> a
}
; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFDFF, half 0xHFDFF>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
@@ -427,8 +468,8 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(<2 x half> a
}
; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
-; GCN: buffer_store_dword [[REG]]
+; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
+; GFX89: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFC01, half 0xHFC01>)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
diff --git a/test/CodeGen/AMDGPU/fcanonicalize.ll b/test/CodeGen/AMDGPU/fcanonicalize.ll
index 1c6d176c6762..6b2d58db804e 100644
--- a/test/CodeGen/AMDGPU/fcanonicalize.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize.ll
@@ -40,7 +40,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)*
}
; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f32:
-; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, -|{{v[0-9]+}}|
+; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], -1.0, |{{v[0-9]+}}|
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 {
%val = load float, float addrspace(1)* %out
@@ -52,7 +52,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace
}
; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f32:
-; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, -{{v[0-9]+}}
+; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], -1.0, {{v[0-9]+}}
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 {
%val = load float, float addrspace(1)* %out
@@ -62,6 +62,15 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)*
ret void
}
+; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 {
+ %canonicalized = call float @llvm.canonicalize.f32(float undef)
+ store float %canonicalized, float addrspace(1)* %out
+ ret void
+}
+
; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
; GCN: buffer_store_dword [[REG]]
diff --git a/test/CodeGen/AMDGPU/fmax3.ll b/test/CodeGen/AMDGPU/fmax3.ll
index 39455acad484..1f67ace72df7 100644
--- a/test/CodeGen/AMDGPU/fmax3.ll
+++ b/test/CodeGen/AMDGPU/fmax3.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}test_fmax3_olt_0_f32:
; GCN: buffer_load_dword [[REGC:v[0-9]+]]
@@ -38,20 +38,23 @@ define amdgpu_kernel void @test_fmax3_olt_1_f32(float addrspace(1)* %out, float
}
; GCN-LABEL: {{^}}test_fmax3_olt_0_f16:
-; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
-; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
; GCN: buffer_load_ushort [[REGA:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
-; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]],
-; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]]
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], [[REGA]]
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], [[REGB]]
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], [[REGC]]
+; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]], [[CVT_A]], [[CVT_B]], [[CVT_C]]
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT_F32]]
; VI: v_max_f16_e32
; VI: v_max_f16_e32 [[RESULT:v[0-9]+]],
-; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]]
; GCN: buffer_store_short [[RESULT]],
define amdgpu_kernel void @test_fmax3_olt_0_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
- %a = load volatile half, half addrspace(1)* %aptr, align 2
+ %a = load volatile half, half addrspace(1)* %aptr, align 2
%b = load volatile half, half addrspace(1)* %bptr, align 2
%c = load volatile half, half addrspace(1)* %cptr, align 2
%f0 = call half @llvm.maxnum.f16(half %a, half %b)
@@ -62,17 +65,20 @@ define amdgpu_kernel void @test_fmax3_olt_0_f16(half addrspace(1)* %out, half ad
; Commute operand of second fmax
; GCN-LABEL: {{^}}test_fmax3_olt_1_f16:
-; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
; GCN: buffer_load_ushort [[REGA:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
-; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]],
-; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]]
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], [[REGA]]
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], [[REGB]]
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], [[REGC]]
+; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]], [[CVT_C]], [[CVT_A]], [[CVT_B]]
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT_F32]]
; VI: v_max_f16_e32
; VI: v_max_f16_e32 [[RESULT:v[0-9]+]],
-; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGA]], [[REGB]]
; GCN: buffer_store_short [[RESULT]],
define amdgpu_kernel void @test_fmax3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
%a = load volatile half, half addrspace(1)* %aptr, align 2
diff --git a/test/CodeGen/AMDGPU/fmaxnum.ll b/test/CodeGen/AMDGPU/fmaxnum.ll
index 277b8ce04c4e..58b5b5282b09 100644
--- a/test/CodeGen/AMDGPU/fmaxnum.ll
+++ b/test/CodeGen/AMDGPU/fmaxnum.ll
@@ -1,283 +1,214 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-declare float @llvm.maxnum.f32(float, float) #0
-declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0
-declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0
-declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #0
-declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #0
-
-declare double @llvm.maxnum.f64(double, double)
-
-; FUNC-LABEL: @test_fmax_f32
-; SI: v_max_f32_e32
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG: MAX_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
- %val = call float @llvm.maxnum.f32(float %a, float %b) #0
+; GCN-LABEL: {{^}}test_fmax_f32:
+; GCN: v_max_f32_e32
+define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+ %val = call float @llvm.maxnum.f32(float %a, float %b)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @test_fmax_v2f32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
-; EG: MAX_DX10 {{.*}}[[OUT]]
-; EG: MAX_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
- %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0
+; GCN-LABEL: {{^}}test_fmax_v2f32:
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+ %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b)
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @test_fmax_v4f32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
+; GCN-LABEL: {{^}}test_fmax_v3f32:
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN-NOT: v_max_f32
+define amdgpu_kernel void @test_fmax_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %a, <3 x float> %b) nounwind {
+ %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0
+ store <3 x float> %val, <3 x float> addrspace(1)* %out, align 16
+ ret void
+}
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
-; EG: MAX_DX10 {{.*}}[[OUT]]
-; EG: MAX_DX10 {{.*}}[[OUT]]
-; EG: MAX_DX10 {{.*}}[[OUT]]
-; EG: MAX_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
- %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0
+; GCN-LABEL: {{^}}test_fmax_v4f32:
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 {
+ %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: @test_fmax_v8f32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
-; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
-; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
-; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
-; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
-; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
-; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
-; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
-; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
-define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
- %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0
+; GCN-LABEL: {{^}}test_fmax_v8f32:
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
+ %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b)
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
ret void
}
-; FUNC-LABEL: @test_fmax_v16f32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-; SI: v_max_f32_e32
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
-; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
-; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
-; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
-; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
-; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
-; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
-; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
-; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
-; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].X
-; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Y
-; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Z
-; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].W
-; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].X
-; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y
-; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z
-; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W
-define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
- %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0
+; GCN-LABEL: {{^}}test_fmax_v16f32:
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 {
+ %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b)
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
ret void
}
-; FUNC-LABEL: @constant_fold_fmax_f32
-; SI-NOT: v_max_f32_e32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MAX_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
+; GCN-LABEL: {{^}}constant_fold_fmax_f32:
+; GCN-NOT: v_max_f32_e32
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float 1.0, float 2.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmax_f32_nan_nan
-; SI-NOT: v_max_f32_e32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MAX_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-; EG: 2143289344(nan)
-define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
+; GCN-LABEL: {{^}}constant_fold_fmax_f32_nan_nan:
+; GCN-NOT: v_max_f32_e32
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmax_f32_val_nan
-; SI-NOT: v_max_f32_e32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MAX_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0
+; GCN-LABEL: {{^}}constant_fold_fmax_f32_val_nan:
+; GCN-NOT: v_max_f32_e32
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmax_f32_nan_val
-; SI-NOT: v_max_f32_e32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MAX_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0
+; GCN-LABEL: {{^}}constant_fold_fmax_f32_nan_val:
+; GCN-NOT: v_max_f32_e32
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmax_f32_p0_p0
-; SI-NOT: v_max_f32_e32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MAX_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
+; GCN-LABEL: {{^}}constant_fold_fmax_f32_p0_p0:
+; GCN-NOT: v_max_f32_e32
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float 0.0, float 0.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmax_f32_p0_n0
-; SI-NOT: v_max_f32_e32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MAX_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
+; GCN-LABEL: {{^}}constant_fold_fmax_f32_p0_n0:
+; GCN-NOT: v_max_f32_e32
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float 0.0, float -0.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmax_f32_n0_p0
-; SI-NOT: v_max_f32_e32
-; SI: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MAX_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
+; GCN-LABEL: {{^}}constant_fold_fmax_f32_n0_p0:
+; GCN-NOT: v_max_f32_e32
+; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float -0.0, float 0.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmax_f32_n0_n0
-; SI-NOT: v_max_f32_e32
-; SI: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MAX_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
+; GCN-LABEL: {{^}}constant_fold_fmax_f32_n0_n0:
+; GCN-NOT: v_max_f32_e32
+; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float -0.0, float -0.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @fmax_var_immediate_f32
-; SI: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MAX_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
- %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
+; GCN-LABEL: {{^}}fmax_var_immediate_f32:
+; GCN: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
+define amdgpu_kernel void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.maxnum.f32(float %a, float 2.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @fmax_immediate_var_f32
-; SI: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
- %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
+; GCN-LABEL: {{^}}fmax_immediate_var_f32:
+; GCN: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
+define amdgpu_kernel void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.maxnum.f32(float 2.0, float %a)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @fmax_var_literal_f32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
-; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
- %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
+; GCN-LABEL: {{^}}fmax_var_literal_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; GCN: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+define amdgpu_kernel void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.maxnum.f32(float %a, float 99.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @fmax_literal_var_f32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
-; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
- %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
+; GCN-LABEL: {{^}}fmax_literal_var_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; GCN: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+define amdgpu_kernel void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.maxnum.f32(float 99.0, float %a)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-attributes #0 = { nounwind readnone }
+; GCN-LABEL: {{^}}test_func_fmax_v3f32:
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN: v_max_f32_e32
+; GCN-NOT: v_max_f32
+define <3 x float> @test_func_fmax_v3f32(<3 x float> %a, <3 x float> %b) nounwind {
+ %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0
+ ret <3 x float> %val
+}
+
+declare float @llvm.maxnum.f32(float, float) #1
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
+declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) #1
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1
+declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1
+declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #1
+declare double @llvm.maxnum.f64(double, double)
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/fmaxnum.r600.ll b/test/CodeGen/AMDGPU/fmaxnum.r600.ll
new file mode 100644
index 000000000000..71bb4afa64ef
--- /dev/null
+++ b/test/CodeGen/AMDGPU/fmaxnum.r600.ll
@@ -0,0 +1,203 @@
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG %s
+
+; EG-LABEL: {{^}}test_fmax_f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+ %val = call float @llvm.maxnum.f32(float %a, float %b)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}test_fmax_v2f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+ %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b)
+ store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
+ ret void
+}
+
+; EG-LABEL: {{^}}test_fmax_v4f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 {
+ %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
+ store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; EG-LABEL: {{^}}test_fmax_v8f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
+define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
+ %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b)
+ store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
+ ret void
+}
+
+; EG-LABEL: {{^}}test_fmax_v16f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W
+define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 {
+ %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b)
+ store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmax_f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float 1.0, float 2.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmax_f32_nan_nan:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+; EG: 2143289344(nan)
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmax_f32_val_nan:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmax_f32_nan_val:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmax_f32_p0_p0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float 0.0, float 0.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmax_f32_p0_n0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float 0.0, float -0.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmax_f32_n0_p0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float -0.0, float 0.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmax_f32_n0_n0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.maxnum.f32(float -0.0, float -0.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}fmax_var_immediate_f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 * [[OUT]]
+define amdgpu_kernel void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.maxnum.f32(float %a, float 2.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}fmax_immediate_var_f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
+define amdgpu_kernel void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.maxnum.f32(float 2.0, float %a)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}fmax_var_literal_f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
+define amdgpu_kernel void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.maxnum.f32(float %a, float 99.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}fmax_literal_var_f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
+define amdgpu_kernel void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.maxnum.f32(float 99.0, float %a)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+declare float @llvm.maxnum.f32(float, float) #1
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1
+declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1
+declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #1
+declare double @llvm.maxnum.f64(double, double)
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/fmin3.ll b/test/CodeGen/AMDGPU/fmin3.ll
index 06befaa64b5c..fa93fbcfb917 100644
--- a/test/CodeGen/AMDGPU/fmin3.ll
+++ b/test/CodeGen/AMDGPU/fmin3.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}test_fmin3_olt_0_f32:
; GCN: buffer_load_dword [[REGC:v[0-9]+]]
@@ -60,17 +60,20 @@ define amdgpu_kernel void @test_fmin3_olt_0_f16(half addrspace(1)* %out, half ad
; Commute operand of second fmin
; GCN-LABEL: {{^}}test_fmin3_olt_1_f16:
-; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
; GCN: buffer_load_ushort [[REGA:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
-; SI: v_min3_f32 [[RESULT_F32:v[0-9]+]],
-; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]]
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], [[REGA]]
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], [[REGB]]
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], [[REGC]]
+; SI: v_min3_f32 [[RESULT_F32:v[0-9]+]], [[CVT_C]], [[CVT_A]], [[CVT_B]]
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT_F32]]
; VI: v_min_f16_e32
; VI: v_min_f16_e32 [[RESULT:v[0-9]+]],
-; GFX9: v_min3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GFX9: v_min3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGA]], [[REGB]]
; GCN: buffer_store_short [[RESULT]],
define amdgpu_kernel void @test_fmin3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
%a = load volatile half, half addrspace(1)* %aptr, align 2
diff --git a/test/CodeGen/AMDGPU/fminnum.ll b/test/CodeGen/AMDGPU/fminnum.ll
index 9e997c7a1045..a0642e211f13 100644
--- a/test/CodeGen/AMDGPU/fminnum.ll
+++ b/test/CodeGen/AMDGPU/fminnum.ll
@@ -1,281 +1,202 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-declare float @llvm.minnum.f32(float, float) #0
-declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
-declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #0
-declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #0
-declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #0
-
-; FUNC-LABEL: @test_fmin_f32
-; SI: v_min_f32_e32
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG: MIN_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
- %val = call float @llvm.minnum.f32(float %a, float %b) #0
+; GCN-LABEL: {{^}}test_fmin_f32:
+; GCN: v_min_f32_e32
+define amdgpu_kernel void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+ %val = call float @llvm.minnum.f32(float %a, float %b)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @test_fmin_v2f32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
-; EG: MIN_DX10 {{.*}}[[OUT]]
-; EG: MIN_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
- %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) #0
+; GCN-LABEL: {{^}}test_fmin_v2f32:
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+define amdgpu_kernel void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+ %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b)
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @test_fmin_v4f32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
-; EG: MIN_DX10 {{.*}}[[OUT]]
-; EG: MIN_DX10 {{.*}}[[OUT]]
-; EG: MIN_DX10 {{.*}}[[OUT]]
-; EG: MIN_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
- %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) #0
+; GCN-LABEL: {{^}}test_fmin_v4f32:
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+define amdgpu_kernel void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 {
+ %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: @test_fmin_v8f32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
-; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].X
-; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Y
-; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Z
-; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].W
-; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].X
-; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
-; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
-; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
-define amdgpu_kernel void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
- %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) #0
+; GCN-LABEL: {{^}}test_fmin_v8f32:
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+define amdgpu_kernel void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
+ %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b)
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
ret void
}
-; FUNC-LABEL: @test_fmin_v16f32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-; SI: v_min_f32_e32
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
-; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].X
-; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Y
-; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Z
-; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].W
-; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].X
-; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
-; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
-; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
-; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].X
-; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].Y
-; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].Z
-; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].W
-; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].X
-; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Y
-; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Z
-; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].W
-define amdgpu_kernel void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
- %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) #0
+; GCN-LABEL: {{^}}test_fmin_v16f32:
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+define amdgpu_kernel void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 {
+ %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b)
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
ret void
}
-; FUNC-LABEL: @constant_fold_fmin_f32
-; SI-NOT: v_min_f32_e32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MIN_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.minnum.f32(float 1.0, float 2.0) #0
+; GCN-LABEL: {{^}}constant_fold_fmin_f32:
+; GCN-NOT: v_min_f32_e32
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmin_f32(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float 1.0, float 2.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmin_f32_nan_nan
-; SI-NOT: v_min_f32_e32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MIN_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-; EG: 2143289344({{nan|1\.#QNAN0e\+00}})
-define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
+; GCN-LABEL: {{^}}constant_fold_fmin_f32_nan_nan:
+; GCN-NOT: v_min_f32_e32
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmin_f32_val_nan
-; SI-NOT: v_min_f32_e32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MIN_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) #0
+; GCN-LABEL: {{^}}constant_fold_fmin_f32_val_nan:
+; GCN-NOT: v_min_f32_e32
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmin_f32_nan_val
-; SI-NOT: v_min_f32_e32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MIN_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) #0
+; GCN-LABEL: {{^}}constant_fold_fmin_f32_nan_val:
+; GCN-NOT: v_min_f32_e32
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmin_f32_p0_p0
-; SI-NOT: v_min_f32_e32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MIN_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.minnum.f32(float 0.0, float 0.0) #0
+; GCN-LABEL: {{^}}constant_fold_fmin_f32_p0_p0:
+; GCN-NOT: v_min_f32_e32
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float 0.0, float 0.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmin_f32_p0_n0
-; SI-NOT: v_min_f32_e32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MIN_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.minnum.f32(float 0.0, float -0.0) #0
+; GCN-LABEL: {{^}}constant_fold_fmin_f32_p0_n0:
+; GCN-NOT: v_min_f32_e32
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float 0.0, float -0.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmin_f32_n0_p0
-; SI-NOT: v_min_f32_e32
-; SI: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MIN_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.minnum.f32(float -0.0, float 0.0) #0
+; GCN-LABEL: {{^}}constant_fold_fmin_f32_n0_p0:
+; GCN-NOT: v_min_f32_e32
+; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float -0.0, float 0.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @constant_fold_fmin_f32_n0_n0
-; SI-NOT: v_min_f32_e32
-; SI: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
-; SI: buffer_store_dword [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG-NOT: MIN_DX10
-; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
- %val = call float @llvm.minnum.f32(float -0.0, float -0.0) #0
+; GCN-LABEL: {{^}}constant_fold_fmin_f32_n0_n0:
+; GCN-NOT: v_min_f32_e32
+; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
+; GCN: buffer_store_dword [[REG]]
+define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float -0.0, float -0.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @fmin_var_immediate_f32
-; SI: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
- %val = call float @llvm.minnum.f32(float %a, float 2.0) #0
+; GCN-LABEL: {{^}}fmin_var_immediate_f32:
+; GCN: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
+define amdgpu_kernel void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.minnum.f32(float %a, float 2.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @fmin_immediate_var_f32
-; SI: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
- %val = call float @llvm.minnum.f32(float 2.0, float %a) #0
+; GCN-LABEL: {{^}}fmin_immediate_var_f32:
+; GCN: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
+define amdgpu_kernel void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.minnum.f32(float 2.0, float %a)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @fmin_var_literal_f32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
-; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
- %val = call float @llvm.minnum.f32(float %a, float 99.0) #0
+; GCN-LABEL: {{^}}fmin_var_literal_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; GCN: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+define amdgpu_kernel void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.minnum.f32(float %a, float 99.0)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @fmin_literal_var_f32
-; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
-; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
-
-; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
-; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
- %val = call float @llvm.minnum.f32(float 99.0, float %a) #0
+; GCN-LABEL: {{^}}fmin_literal_var_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; GCN: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+define amdgpu_kernel void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.minnum.f32(float 99.0, float %a)
store float %val, float addrspace(1)* %out, align 4
ret void
}
-attributes #0 = { nounwind readnone }
+; GCN-LABEL: {{^}}test_func_fmin_v3f32:
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN: v_min_f32_e32
+; GCN-NOT: v_min_f32
+define <3 x float> @test_func_fmin_v3f32(<3 x float> %a, <3 x float> %b) nounwind {
+ %val = call <3 x float> @llvm.minnum.v3f32(<3 x float> %a, <3 x float> %b) #0
+ ret <3 x float> %val
+}
+
+declare float @llvm.minnum.f32(float, float) #1
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
+declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) #1
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
+declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #1
+declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/fminnum.r600.ll b/test/CodeGen/AMDGPU/fminnum.r600.ll
new file mode 100644
index 000000000000..713e95c7f46e
--- /dev/null
+++ b/test/CodeGen/AMDGPU/fminnum.r600.ll
@@ -0,0 +1,202 @@
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG %s
+
+; EG-LABEL: {{^}}test_fmin_f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+define amdgpu_kernel void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+ %val = call float @llvm.minnum.f32(float %a, float %b)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}test_fmin_v2f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+define amdgpu_kernel void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+ %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b)
+ store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
+ ret void
+}
+
+; EG-LABEL: {{^}}test_fmin_v4f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+define amdgpu_kernel void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 {
+ %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
+ store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; EG-LABEL: {{^}}test_fmin_v8f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
+define amdgpu_kernel void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
+ %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b)
+ store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
+ ret void
+}
+
+; EG-LABEL: {{^}}test_fmin_v16f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].W
+define amdgpu_kernel void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 {
+ %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b)
+ store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmin_f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmin_f32(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float 1.0, float 2.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmin_f32_nan_nan:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+; EG: 2143289344({{nan|1\.#QNAN0e\+00}})
+define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmin_f32_val_nan:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmin_f32_nan_val:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmin_f32_p0_p0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float 0.0, float 0.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmin_f32_p0_n0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float 0.0, float -0.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmin_f32_n0_p0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float -0.0, float 0.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}constant_fold_fmin_f32_n0_n0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) #0 {
+ %val = call float @llvm.minnum.f32(float -0.0, float -0.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}fmin_var_immediate_f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
+define amdgpu_kernel void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.minnum.f32(float %a, float 2.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}fmin_immediate_var_f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
+define amdgpu_kernel void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.minnum.f32(float 2.0, float %a)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}fmin_var_literal_f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
+define amdgpu_kernel void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.minnum.f32(float %a, float 99.0)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}fmin_literal_var_f32:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
+define amdgpu_kernel void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.minnum.f32(float 99.0, float %a)
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+declare float @llvm.minnum.f32(float, float) #1
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
+declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #1
+declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
index 4d08651dcb4c..e14d4019c184 100644
--- a/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
+++ b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
@@ -1,6 +1,6 @@
-; XUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
+; XUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH %s
; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
diff --git a/test/CodeGen/AMDGPU/fneg-combines.ll b/test/CodeGen/AMDGPU/fneg-combines.ll
index fd3d4f053e95..c065227012f6 100644
--- a/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -1725,6 +1725,26 @@ define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float
}
; --------------------------------------------------------------------------------
+; fcanonicalize tests
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}v_fneg_canonicalize_f32:
+; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
+; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
+; GCN: buffer_store_dword [[RESULT]]
+define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load volatile float, float addrspace(1)* %a.gep
+ %trunc = call float @llvm.canonicalize.f32(float %a)
+ %fneg = fsub float -0.0, %trunc
+ store float %fneg, float addrspace(1)* %out.gep
+ ret void
+}
+
+; --------------------------------------------------------------------------------
; vintrp tests
; --------------------------------------------------------------------------------
@@ -2117,6 +2137,7 @@ declare float @llvm.trunc.f32(float) #1
declare float @llvm.round.f32(float) #1
declare float @llvm.rint.f32(float) #1
declare float @llvm.nearbyint.f32(float) #1
+declare float @llvm.canonicalize.f32(float) #1
declare float @llvm.minnum.f32(float, float) #1
declare float @llvm.maxnum.f32(float, float) #1
diff --git a/test/CodeGen/AMDGPU/function-args.ll b/test/CodeGen/AMDGPU/function-args.ll
index 48d94465c131..71541b295537 100644
--- a/test/CodeGen/AMDGPU/function-args.ll
+++ b/test/CodeGen/AMDGPU/function-args.ll
@@ -739,6 +739,45 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1,
ret void
}
+; Make sure v3 isn't a wasted register because of v3 types being promoted to v4
+; GCN-LABEL: {{^}}void_func_v3f32_wasted_reg:
+; GCN: s_waitcnt
+; GCN: ds_write_b32 v{{[0-9]+}}, v0
+; GCN-NEXT: ds_write_b32 v{{[0-9]+}}, v1
+; GCN-NEXT: ds_write_b32 v{{[0-9]+}}, v2
+; GCN-NEXT: ds_write_b32 v{{[0-9]+}}, v3
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
+ %arg0.0 = extractelement <3 x float> %arg0, i32 0
+ %arg0.1 = extractelement <3 x float> %arg0, i32 1
+ %arg0.2 = extractelement <3 x float> %arg0, i32 2
+ store volatile float %arg0.0, float addrspace(3)* undef
+ store volatile float %arg0.1, float addrspace(3)* undef
+ store volatile float %arg0.2, float addrspace(3)* undef
+ store volatile i32 %arg1, i32 addrspace(3)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}void_func_v3i32_wasted_reg:
+; GCN: s_waitcnt
+; GCN: ds_write_b32 v{{[0-9]+}}, v0
+; GCN-NEXT: ds_write_b32 v{{[0-9]+}}, v1
+; GCN-NEXT: ds_write_b32 v{{[0-9]+}}, v2
+; GCN-NEXT: ds_write_b32 v{{[0-9]+}}, v3
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
+ %arg0.0 = extractelement <3 x i32> %arg0, i32 0
+ %arg0.1 = extractelement <3 x i32> %arg0, i32 1
+ %arg0.2 = extractelement <3 x i32> %arg0, i32 2
+ store volatile i32 %arg0.0, i32 addrspace(3)* undef
+ store volatile i32 %arg0.1, i32 addrspace(3)* undef
+ store volatile i32 %arg0.2, i32 addrspace(3)* undef
+ store volatile i32 %arg1, i32 addrspace(3)* undef
+ ret void
+}
+
; Check there is no crash.
; GCN-LABEL: {{^}}void_func_v16i8:
define void @void_func_v16i8(<16 x i8> %arg0) #0 {
diff --git a/test/CodeGen/AMDGPU/function-returns.ll b/test/CodeGen/AMDGPU/function-returns.ll
index 32ecc417feda..20208b188d78 100644
--- a/test/CodeGen/AMDGPU/function-returns.ll
+++ b/test/CodeGen/AMDGPU/function-returns.ll
@@ -531,4 +531,43 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
ret { i32, <32 x i32> }%val
}
+; Make sure the last struct component is returned in v3, not v4.
+; GCN-LABEL: {{^}}v3i32_struct_func_void_wasted_reg:
+; GCN: ds_read_b32 v0,
+; GCN: ds_read_b32 v1,
+; GCN: ds_read_b32 v2,
+; GCN: ds_read_b32 v3,
+define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
+ %load0 = load volatile i32, i32 addrspace(3)* undef
+ %load1 = load volatile i32, i32 addrspace(3)* undef
+ %load2 = load volatile i32, i32 addrspace(3)* undef
+ %load3 = load volatile i32, i32 addrspace(3)* undef
+
+ %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0
+ %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1
+ %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2
+ %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0
+ %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1
+ ret { <3 x i32>, i32 } %insert.4
+}
+
+; GCN-LABEL: {{^}}v3f32_struct_func_void_wasted_reg:
+; GCN: ds_read_b32 v0,
+; GCN: ds_read_b32 v1,
+; GCN: ds_read_b32 v2,
+; GCN: ds_read_b32 v3,
+define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
+ %load0 = load volatile float, float addrspace(3)* undef
+ %load1 = load volatile float, float addrspace(3)* undef
+ %load2 = load volatile float, float addrspace(3)* undef
+ %load3 = load volatile i32, i32 addrspace(3)* undef
+
+ %insert.0 = insertelement <3 x float> undef, float %load0, i32 0
+ %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1
+ %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2
+ %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0
+ %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1
+ ret { <3 x float>, i32 } %insert.4
+}
+
attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/kernel-args.ll b/test/CodeGen/AMDGPU/kernel-args.ll
index 9492b710d13e..9d1f582f4a88 100644
--- a/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/test/CodeGen/AMDGPU/kernel-args.ll
@@ -1,19 +1,28 @@
; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=SI,GCN,MESA-GCN,FUNC %s
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=VI,GCN,MESA-VI,MESA-GCN,FUNC %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=VI,GCN,HSA-VI,FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefix=EG --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=cayman -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefix=EG --check-prefix=FUNC %s
+; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=EG,EGCM,FUNC %s
+; RUN: llc < %s -march=r600 -mcpu=cayman -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=CM,EGCM,FUNC %s
; FUNC-LABEL: {{^}}i8_arg:
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
-; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+
; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
; HSA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8
; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
+
+
+; EG: LSHR T0.X, KC0[2].Y, literal.x,
+; EG-NEXT: MOV * T1.X, KC0[2].Z,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+
+; CM: LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T1.X, KC0[2].Z,
define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
%ext = zext i8 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
@@ -23,12 +32,21 @@ define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) noun
; FUNC-LABEL: {{^}}i8_zext_arg:
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
-; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
; HSA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8
; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
+
+
+; EG: BFE_INT T0.X, T0.X, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45)
+
+; CM: BFE_INT * T0.X, T0.X, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
%ext = zext i8 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
@@ -38,7 +56,6 @@ define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zero
; FUNC-LABEL: {{^}}i8_sext_arg:
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
-; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
@@ -46,6 +63,16 @@ define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zero
; HSA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8
; HSA-VI: s_sext_i32_i8 s{{[0-9]+}}, [[VAL]]
; HSA-VI: flat_store_dword
+
+
+; EG: BFE_INT T0.X, T0.X, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45)
+
+; CM: BFE_INT * T0.X, T0.X, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
%ext = sext i8 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
@@ -56,7 +83,6 @@ define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 sign
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
-; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
@@ -65,6 +91,15 @@ define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 sign
; HSA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8
; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xffff{{$}}
; HSA-VI: flat_store_dword
+
+
+; EG: LSHR T0.X, KC0[2].Y, literal.x,
+; EG-NEXT: MOV * T1.X, KC0[2].Z,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+
+; CM: LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T1.X, KC0[2].Z,
define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
%ext = zext i16 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
@@ -75,13 +110,21 @@ define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) no
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
-; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
; HSA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8
; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xffff{{$}}
; HSA-VI: flat_store_dword
+
+; EG: BFE_INT T0.X, T0.X, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
+
+; CM: BFE_INT * T0.X, T0.X, 0.0, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
%ext = zext i16 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
@@ -92,7 +135,6 @@ define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 ze
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
-; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
@@ -100,6 +142,15 @@ define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 ze
; HSA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8
; HSA-VI: s_sext_i32_i16 s{{[0-9]+}}, [[VAL]]
; HSA-VI: flat_store_dword
+
+; EG: BFE_INT T0.X, T0.X, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
+
+; CM: BFE_INT * T0.X, T0.X, 0.0, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
%ext = sext i16 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
@@ -110,7 +161,7 @@ define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 si
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
-; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
+; EGCM: T{{[0-9]\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
; HSA-VI: s_load_dword s{{[0-9]}}, s[4:5], 0x8
@@ -123,7 +174,7 @@ entry:
; FUNC-LABEL: {{^}}f32_arg:
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
-; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
+; EGCM: T{{[0-9]\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8
@@ -137,8 +188,8 @@ entry:
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
-; EG: VTX_READ_8
-; EG: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
; GCN: s_load_dword s
; GCN-NOT: {{buffer|flat|global}}_load_
@@ -152,8 +203,8 @@ entry:
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
-; EG: VTX_READ_16
-; EG: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
; SI: s_load_dword s{{[0-9]+}}, s[0:1], 0xb
; MESA-VI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
@@ -168,8 +219,8 @@ entry:
; HSA-VI: kernarg_segment_byte_size = 16
; HSA-VI: kernarg_segment_alignment = 4
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8
@@ -183,8 +234,8 @@ entry:
; HSA-VI: kernarg_segment_byte_size = 16
; HSA-VI: kernarg_segment_alignment = 4
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
; HSA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[4:5], 0x8
@@ -198,9 +249,9 @@ entry:
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
-; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
-; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
-; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
+; EGCM-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
+; EGCM-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
+; EGCM-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
; SI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
@@ -216,9 +267,9 @@ entry:
; HSA-VI: kernarg_segment_byte_size = 16
; HSA-VI: kernarg_segment_alignment = 4
-; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
-; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
-; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
+; EGCM-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
+; EGCM-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
+; EGCM-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
@@ -233,9 +284,9 @@ entry:
; FUNC-LABEL: {{^}}v3i32_arg:
; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
@@ -248,9 +299,9 @@ entry:
; FUNC-LABEL: {{^}}v3f32_arg:
; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
@@ -263,10 +314,10 @@ entry:
; FUNC-LABEL: {{^}}v4i8_arg:
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
; GCN-DAG: s_load_dwordx2 s
; GCN-DAG: s_load_dword s
@@ -279,10 +330,10 @@ entry:
; FUNC-LABEL: {{^}}v4i16_arg:
; HSA-VI: kernarg_segment_byte_size = 16
; HSA-VI: kernarg_segment_alignment = 4
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
; SI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0xb
; SI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x9
@@ -305,10 +356,10 @@ entry:
; FUNC-LABEL: {{^}}v4i32_arg:
; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
@@ -322,10 +373,10 @@ entry:
; FUNC-LABEL: {{^}}v4f32_arg:
; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
@@ -339,14 +390,14 @@ entry:
; FUNC-LABEL: {{^}}v8i8_arg:
; HSA-VI: kernarg_segment_byte_size = 16
; HSA-VI: kernarg_segment_alignment = 4
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
; SI-NOT: {{buffer|flat|global}}_load
; SI: s_load_dwordx2 s
@@ -367,14 +418,14 @@ entry:
; FUNC-LABEL: {{^}}v8i16_arg:
; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
; SI: s_load_dwordx4
; SI-NEXT: s_load_dwordx2
@@ -393,14 +444,14 @@ entry:
; FUNC-LABEL: {{^}}v8i32_arg:
; HSA-VI: kernarg_segment_byte_size = 64
; HSA-VI: kernarg_segment_alignment = 5
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
; MESA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44
@@ -414,14 +465,14 @@ entry:
; FUNC-LABEL: {{^}}v8f32_arg:
; HSA-VI: kernarg_segment_byte_size = 64
; HSA-VI: kernarg_segment_alignment = 5
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
entry:
@@ -434,22 +485,22 @@ entry:
; FUNC-LABEL: {{^}}v16i8_arg:
; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
-; EG: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
+; EGCM: VTX_READ_8
; SI: s_load_dwordx4 s
; SI-NEXT: s_load_dwordx2 s
@@ -470,23 +521,23 @@ entry:
; FUNC-LABEL: {{^}}v16i16_arg:
; HSA-VI: kernarg_segment_byte_size = 64
; HSA-VI: kernarg_segment_alignment = 5
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
-; EG: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
+; EGCM: VTX_READ_16
; SI: s_load_dwordx8 s
; SI-NEXT: s_load_dwordx2 s
@@ -505,22 +556,22 @@ entry:
; FUNC-LABEL: {{^}}v16i32_arg:
; HSA-VI: kernarg_segment_byte_size = 128
; HSA-VI: kernarg_segment_alignment = 6
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
; HSA-VI: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40
@@ -533,22 +584,22 @@ entry:
; FUNC-LABEL: {{^}}v16f32_arg:
; HSA-VI: kernarg_segment_byte_size = 128
; HSA-VI: kernarg_segment_alignment = 6
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
-; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
; HSA-VI: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
index 6d2de108829d..cdfe9b460a01 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
@@ -480,5 +480,65 @@ define amdgpu_kernel void @test_export_vm_i32() #0 {
ret void
}
+; GCN-LABEL: {{^}}test_if_export_f32:
+; GCN: s_cbranch_execz
+; GCN: exp
+define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+ %cc = icmp eq i32 %flag, 0
+ br i1 %cc, label %end, label %exp
+
+exp:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
+ br label %end
+
+end:
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_if_export_vm_f32:
+; GCN: s_cbranch_execz
+; GCN: exp
+define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+ %cc = icmp eq i32 %flag, 0
+ br i1 %cc, label %end, label %exp
+
+exp:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
+ br label %end
+
+end:
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_if_export_done_f32:
+; GCN: s_cbranch_execz
+; GCN: exp
+define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+ %cc = icmp eq i32 %flag, 0
+ br i1 %cc, label %end, label %exp
+
+exp:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
+ br label %end
+
+end:
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_if_export_vm_done_f32:
+; GCN: s_cbranch_execz
+; GCN: exp
+define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+ %cc = icmp eq i32 %flag, 0
+ br i1 %cc, label %end, label %exp
+
+exp:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
+ br label %end
+
+end:
+ ret void
+}
+
attributes #0 = { nounwind }
attributes #1 = { nounwind inaccessiblememonly }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
index 65ab3e04237b..7efb1850a277 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
@@ -1,10 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX906
-declare float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c)
+declare float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 %clamp)
-; GFX906-LABEL: {{^}}test_llvm_amdgcn_fdot2
-; GFX906: v_dot2_f32_f16
-define amdgpu_kernel void @test_llvm_amdgcn_fdot2(
+; GFX906-LABEL: {{^}}test_llvm_amdgcn_fdot2_clamp
+; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_fdot2_clamp(
float addrspace(1)* %r,
<2 x half> addrspace(1)* %a,
<2 x half> addrspace(1)* %b,
@@ -13,7 +13,23 @@ entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
- %r.val = call float @llvm.amdgcn.fdot2(<2 x half> %a.val, <2 x half> %b.val, float %c.val)
+ %r.val = call float @llvm.amdgcn.fdot2(<2 x half> %a.val, <2 x half> %b.val, float %c.val, i1 1)
+ store float %r.val, float addrspace(1)* %r
+ ret void
+}
+
+; GFX906-LABEL: {{^}}test_llvm_amdgcn_fdot2_no_clamp
+; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_fdot2_no_clamp(
+ float addrspace(1)* %r,
+ <2 x half> addrspace(1)* %a,
+ <2 x half> addrspace(1)* %b,
+ float addrspace(1)* %c) {
+entry:
+ %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+ %c.val = load float, float addrspace(1)* %c
+ %r.val = call float @llvm.amdgcn.fdot2(<2 x half> %a.val, <2 x half> %b.val, float %c.val, i1 0)
store float %r.val, float addrspace(1)* %r
ret void
}
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll
new file mode 100644
index 000000000000..2d66a0be0690
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll
@@ -0,0 +1,113 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+
+
+; GCN-LABEL: {{^}}sample_l_1d:
+; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_l_2d:
+; GCN: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_1d:
+; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_2d:
+; GCN: image_sample_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_l_o_1d:
+; GCN: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_l_o_2d:
+; GCN: image_sample_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_o_1d:
+; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_o_2d:
+; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_l_2d:
+; GCN: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_l_2d:
+; GCN: image_gather4_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_l_o_2d:
+; GCN: image_gather4_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_l_o_2d:
+; GCN: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll
index 0d8f28bbef16..f1894cc14cc3 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll
@@ -1,10 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
-declare i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c)
+declare i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp)
-; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2
-; GFX906: v_dot2_i32_i16
-define amdgpu_kernel void @test_llvm_amdgcn_sdot2(
+; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2_clamp
+; GFX906: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_sdot2_clamp(
i32 addrspace(1)* %r,
<2 x i16> addrspace(1)* %a,
<2 x i16> addrspace(1)* %b,
@@ -13,7 +13,23 @@ entry:
%a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
%b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b
%c.val = load i32, i32 addrspace(1)* %c
- %r.val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val)
+ %r.val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 1)
+ store i32 %r.val, i32 addrspace(1)* %r
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2_no_clamp
+; GFX906: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_sdot2_no_clamp(
+ i32 addrspace(1)* %r,
+ <2 x i16> addrspace(1)* %a,
+ <2 x i16> addrspace(1)* %b,
+ i32 addrspace(1)* %c) {
+entry:
+ %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
+ %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b
+ %c.val = load i32, i32 addrspace(1)* %c
+ %r.val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 0)
store i32 %r.val, i32 addrspace(1)* %r
ret void
}
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
index 8b664e6f9a4c..2651200a344e 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
@@ -1,10 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
-declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c)
+declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 %clamp)
-; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4
-; GFX906: v_dot4_i32_i8
-define amdgpu_kernel void @test_llvm_amdgcn_sdot4(
+; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_clamp
+; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_sdot4_clamp(
i32 addrspace(1)* %r,
<4 x i8> addrspace(1)* %a,
<4 x i8> addrspace(1)* %b,
@@ -15,7 +15,25 @@ entry:
%a.val.cast = bitcast <4 x i8> %a.val to i32
%b.val.cast = bitcast <4 x i8> %b.val to i32
%c.val = load i32, i32 addrspace(1)* %c
- %r.val = call i32 @llvm.amdgcn.sdot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val)
+ %r.val = call i32 @llvm.amdgcn.sdot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1)
+ store i32 %r.val, i32 addrspace(1)* %r
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_no_clamp
+; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_sdot4_no_clamp(
+ i32 addrspace(1)* %r,
+ <4 x i8> addrspace(1)* %a,
+ <4 x i8> addrspace(1)* %b,
+ i32 addrspace(1)* %c) {
+entry:
+ %a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a
+ %b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b
+ %a.val.cast = bitcast <4 x i8> %a.val to i32
+ %b.val.cast = bitcast <4 x i8> %b.val to i32
+ %c.val = load i32, i32 addrspace(1)* %c
+ %r.val = call i32 @llvm.amdgcn.sdot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0)
store i32 %r.val, i32 addrspace(1)* %r
ret void
}
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
index e2466eae5394..456421c4984a 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
@@ -1,10 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
-declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c)
+declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 %clamp)
-; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8
-; GFX906: v_dot8_i32_i4
-define amdgpu_kernel void @test_llvm_amdgcn_sdot8(
+; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8_clamp
+; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_sdot8_clamp(
i32 addrspace(1)* %r,
<8 x i4> addrspace(1)* %a,
<8 x i4> addrspace(1)* %b,
@@ -15,7 +15,25 @@ entry:
%a.val.cast = bitcast <8 x i4> %a.val to i32
%b.val.cast = bitcast <8 x i4> %b.val to i32
%c.val = load i32, i32 addrspace(1)* %c
- %r.val = call i32 @llvm.amdgcn.sdot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val)
+ %r.val = call i32 @llvm.amdgcn.sdot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1)
+ store i32 %r.val, i32 addrspace(1)* %r
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8_no_clamp
+; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_sdot8_no_clamp(
+ i32 addrspace(1)* %r,
+ <8 x i4> addrspace(1)* %a,
+ <8 x i4> addrspace(1)* %b,
+ i32 addrspace(1)* %c) {
+entry:
+ %a.val = load <8 x i4>, <8 x i4> addrspace(1)* %a
+ %b.val = load <8 x i4>, <8 x i4> addrspace(1)* %b
+ %a.val.cast = bitcast <8 x i4> %a.val to i32
+ %b.val.cast = bitcast <8 x i4> %b.val to i32
+ %c.val = load i32, i32 addrspace(1)* %c
+ %r.val = call i32 @llvm.amdgcn.sdot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0)
store i32 %r.val, i32 addrspace(1)* %r
ret void
}
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
index 594f76048790..4f8cd6f682e6 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
@@ -136,6 +136,21 @@ body:
ret void
}
+; GCN-LABEL: {{^}}if_sendmsg:
+; GCN: s_cbranch_execz
+; GCN: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
+define amdgpu_gs void @if_sendmsg(i32 %flag) #0 {
+ %cc = icmp eq i32 %flag, 0
+ br i1 %cc, label %sendmsg, label %end
+
+sendmsg:
+ call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
+ br label %end
+
+end:
+ ret void
+}
+
declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
index b2912cb23343..18ca71d33bcc 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
@@ -1,10 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
-declare i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c)
+declare i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp)
-; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2
-; GFX906: v_dot2_u32_u16
-define amdgpu_kernel void @test_llvm_amdgcn_udot2(
+; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_clamp
+; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_udot2_clamp(
i32 addrspace(1)* %r,
<2 x i16> addrspace(1)* %a,
<2 x i16> addrspace(1)* %b,
@@ -13,7 +13,23 @@ entry:
%a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
%b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b
%c.val = load i32, i32 addrspace(1)* %c
- %r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val)
+ %r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 1)
+ store i32 %r.val, i32 addrspace(1)* %r
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_no_clamp
+; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_udot2_no_clamp(
+ i32 addrspace(1)* %r,
+ <2 x i16> addrspace(1)* %a,
+ <2 x i16> addrspace(1)* %b,
+ i32 addrspace(1)* %c) {
+entry:
+ %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
+ %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b
+ %c.val = load i32, i32 addrspace(1)* %c
+ %r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 0)
store i32 %r.val, i32 addrspace(1)* %r
ret void
}
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll
index 5ce060de7003..73d6a9ce968b 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll
@@ -1,10 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
-declare i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c)
+declare i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c, i1 %clamp)
-; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4
-; GFX906: v_dot4_u32_u8
-define amdgpu_kernel void @test_llvm_amdgcn_udot4(
+; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4_clamp
+; GFX906: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_udot4_clamp(
i32 addrspace(1)* %r,
<4 x i8> addrspace(1)* %a,
<4 x i8> addrspace(1)* %b,
@@ -15,7 +15,25 @@ entry:
%a.val.cast = bitcast <4 x i8> %a.val to i32
%b.val.cast = bitcast <4 x i8> %b.val to i32
%c.val = load i32, i32 addrspace(1)* %c
- %r.val = call i32 @llvm.amdgcn.udot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val)
+ %r.val = call i32 @llvm.amdgcn.udot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1)
+ store i32 %r.val, i32 addrspace(1)* %r
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4_no_clamp
+; GFX906: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_udot4_no_clamp(
+ i32 addrspace(1)* %r,
+ <4 x i8> addrspace(1)* %a,
+ <4 x i8> addrspace(1)* %b,
+ i32 addrspace(1)* %c) {
+entry:
+ %a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a
+ %b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b
+ %a.val.cast = bitcast <4 x i8> %a.val to i32
+ %b.val.cast = bitcast <4 x i8> %b.val to i32
+ %c.val = load i32, i32 addrspace(1)* %c
+ %r.val = call i32 @llvm.amdgcn.udot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0)
store i32 %r.val, i32 addrspace(1)* %r
ret void
}
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll
index 2599305bc8e0..c2f80cac8f7f 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll
@@ -1,10 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
-declare i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c)
+declare i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c, i1 %clamp)
-; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8
-; GFX906: v_dot8_u32_u4
-define amdgpu_kernel void @test_llvm_amdgcn_udot8(
+; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8_clamp
+; GFX906: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_udot8_clamp(
i32 addrspace(1)* %r,
<8 x i4> addrspace(1)* %a,
<8 x i4> addrspace(1)* %b,
@@ -15,7 +15,25 @@ entry:
%a.val.cast = bitcast <8 x i4> %a.val to i32
%b.val.cast = bitcast <8 x i4> %b.val to i32
%c.val = load i32, i32 addrspace(1)* %c
- %r.val = call i32 @llvm.amdgcn.udot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val)
+ %r.val = call i32 @llvm.amdgcn.udot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1)
+ store i32 %r.val, i32 addrspace(1)* %r
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8_no_clamp
+; GFX906: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_udot8_no_clamp(
+ i32 addrspace(1)* %r,
+ <8 x i4> addrspace(1)* %a,
+ <8 x i4> addrspace(1)* %b,
+ i32 addrspace(1)* %c) {
+entry:
+ %a.val = load <8 x i4>, <8 x i4> addrspace(1)* %a
+ %b.val = load <8 x i4>, <8 x i4> addrspace(1)* %b
+ %a.val.cast = bitcast <8 x i4> %a.val to i32
+ %b.val.cast = bitcast <8 x i4> %b.val to i32
+ %c.val = load i32, i32 addrspace(1)* %c
+ %r.val = call i32 @llvm.amdgcn.udot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0)
store i32 %r.val, i32 addrspace(1)* %r
ret void
}
diff --git a/test/CodeGen/AMDGPU/lower-kernargs.ll b/test/CodeGen/AMDGPU/lower-kernargs.ll
index fb903cfd8e97..630aa4a96bfb 100644
--- a/test/CodeGen/AMDGPU/lower-kernargs.ll
+++ b/test/CodeGen/AMDGPU/lower-kernargs.ll
@@ -98,7 +98,7 @@ define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
-; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !range !1, !invariant.load !0
+; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
; MESA-NEXT: ret void
@@ -121,7 +121,7 @@ define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
-; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !range !2, !invariant.load !0
+; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
; MESA-NEXT: ret void
@@ -144,7 +144,7 @@ define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
-; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !range !3, !invariant.load !0
+; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
; MESA-NEXT: ret void
@@ -167,7 +167,7 @@ define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
-; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !range !4, !invariant.load !0
+; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
; MESA-NEXT: ret void
@@ -1160,7 +1160,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* deref
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
-; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable !5
+; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable !1
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
; MESA-NEXT: ret void
;
@@ -1181,7 +1181,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
-; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable_or_null !6
+; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable_or_null !2
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
; MESA-NEXT: ret void
;
@@ -1223,7 +1223,7 @@ define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %
; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
-; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !align !7
+; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !align !3
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
; MESA-NEXT: ret void
;
@@ -1432,17 +1432,7 @@ attributes #0 = { nounwind "target-cpu"="kaveri" }
attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" }
attributes #2 = { nounwind "target-cpu"="tahiti" }
-; HSA: 0 = !{}
-; HSA: !1 = !{i64 42}
-; HSA: !2 = !{i64 128}
-; HSA: !3 = !{i64 1024}
-
-
-; MESA: !0 = !{}
-; MESA: !1 = !{i32 0, i32 256}
-; MESA: !2 = !{i32 0, i32 65536}
-; MESA: !3 = !{i32 -128, i32 128}
-; MESA: !4 = !{i32 -32768, i32 32768}
-; MESA: !5 = !{i64 42}
-; MESA: !6 = !{i64 128}
-; MESA: !7 = !{i64 1024}
+; GCN: 0 = !{}
+; GCN: !1 = !{i64 42}
+; GCN: !2 = !{i64 128}
+; GCN: !3 = !{i64 1024}
diff --git a/test/CodeGen/AMDGPU/mad-mix-lo.ll b/test/CodeGen/AMDGPU/mad-mix-lo.ll
index 848e8830a1a4..ed7b67f7e6a2 100644
--- a/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -112,12 +112,12 @@ define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half
; GCN-LABEL: {{^}}v_mad_mix_v4f32:
; GCN: s_waitcnt
-; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; GFX9-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
-; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX9-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX9-NEXT: v_mov_b32_e32 v0, v6
-; GFX9-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
+; GFX9-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
+; GFX9-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GFX9-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GFX9-NEXT: v_mov_b32_e32 v0, v7
+; GFX9-NEXT: v_mov_b32_e32 v1, v6
; GFX9-NEXT: s_setpc_b64
define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
%src0.ext = fpext <4 x half> %src0 to <4 x float>
@@ -169,11 +169,11 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_postcvt:
; GCN: s_waitcnt
; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX9-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX9-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX9-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GFX9-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GFX9-NEXT: v_mov_b32_e32 v0, v6
-; GFX9-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-NEXT: v_mov_b32_e32 v1, v2
; GFX9-NEXT: s_setpc_b64
define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
%src0.ext = fpext <4 x half> %src0 to <4 x float>
@@ -267,10 +267,11 @@ define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %sr
}
; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_precvt:
-; GFX9: v_mad_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX9: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX9: v_mad_mix_f32 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX9: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GFX9: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GFX9: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX9: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+
; GFX9: v_cvt_f16_f32
; GFX9: v_cvt_f16_f32
; GFX9: v_cvt_f16_f32
diff --git a/test/CodeGen/AMDGPU/mad-mix.ll b/test/CodeGen/AMDGPU/mad-mix.ll
index 6f56be1a8a23..b68a43ecb8c0 100644
--- a/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/test/CodeGen/AMDGPU/mad-mix.ll
@@ -54,13 +54,13 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %
}
; GCN-LABEL: {{^}}v_mad_mix_v2f32:
-; GFX900: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mad_mix_f32 v1, v0, v3, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v3, v2 op_sel_hi:[1,1,1]
+; GFX900: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX906: v_mov_b32_e32 v3, v1
-; GFX906-NEXT: v_fma_mix_f32 v1, v0, v3, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v3, v2 op_sel_hi:[1,1,1]
+; GFX906: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX906-NEXT: v_mov_b32_e32 v1, v3
; CIVI: v_mac_f32
define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
@@ -73,14 +73,14 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
; GCN-LABEL: {{^}}v_mad_mix_v2f32_shuffle:
; GCN: s_waitcnt
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mad_mix_f32 v1, v0, v3, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
-; GFX900-NEXT: v_mad_mix_f32 v0, v0, v3, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
+; GFX900: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
+; GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
; GFX900-NEXT: s_setpc_b64
-; GFX906-NEXT: v_mov_b32_e32 v3, v1
-; GFX906-NEXT: v_fma_mix_f32 v1, v0, v3, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
-; GFX906-NEXT: v_fma_mix_f32 v0, v0, v3, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
+; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
+; GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
+; GFX906-NEXT: v_mov_b32_e32 v0, v3
; GFX906-NEXT: s_setpc_b64
; CIVI: v_mac_f32
@@ -274,13 +274,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
}
; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imm1:
-; GFX9: v_mov_b32_e32 v2, v1
; GFX9: v_mov_b32_e32 v3, 1.0
-; GFX900: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
-; GFX900: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
+; GFX900: v_mad_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
+; GFX900: v_mad_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding
+; GFX900: v_mov_b32_e32 v1, v2
-; GFX906: v_fma_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
-; GFX906: v_fma_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
+; GFX906: v_fma_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
+; GFX906: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding
+; GFX906: v_mov_b32_e32 v1, v2
define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 {
%src0.ext = fpext <2 x half> %src0 to <2 x float>
%src1.ext = fpext <2 x half> %src1 to <2 x float>
@@ -289,13 +290,15 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1)
}
; GCN-LABEL: {{^}}v_mad_mix_v2f32_cvtf16imminv2pi:
-; GFX9: v_mov_b32_e32 v2, v1
; GFX9: v_mov_b32_e32 v3, 0x3e230000
-; GFX900: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
-; GFX900: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
-; GFX906: v_fma_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
-; GFX906: v_fma_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
+; GFX900: v_mad_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
+; GFX900: v_mad_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding
+; GFX900: v_mov_b32_e32 v1, v2
+
+; GFX906: v_fma_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
+; GFX906: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding
+; GFX906: v_mov_b32_e32 v1, v2
define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
%src0.ext = fpext <2 x half> %src0 to <2 x float>
%src1.ext = fpext <2 x half> %src1 to <2 x float>
@@ -305,14 +308,15 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half>
}
; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imminv2pi:
-; GFX9: v_mov_b32_e32 v2, v1
; GFX9: v_mov_b32_e32 v3, 0.15915494
-; GFX900: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
-; GFX900: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
+; GFX900: v_mad_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
+; GFX900: v_mad_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding
+; GFX900: v_mov_b32_e32 v1, v2
-; GFX906: v_fma_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
-; GFX906: v_fma_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
+; GFX906: v_fma_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
+; GFX906: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding
+; GFX906: v_mov_b32_e32 v1, v2
define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
%src0.ext = fpext <2 x half> %src0 to <2 x float>
%src1.ext = fpext <2 x half> %src1 to <2 x float>
diff --git a/test/CodeGen/AMDGPU/mul.i16.ll b/test/CodeGen/AMDGPU/mul.i16.ll
index 678fc3d1daf3..d8274105b823 100644
--- a/test/CodeGen/AMDGPU/mul.i16.ll
+++ b/test/CodeGen/AMDGPU/mul.i16.ll
@@ -90,8 +90,8 @@ define <3 x i16> @v_mul_v3i16(<3 x i16> %a, <3 x i16> %b) {
; VI: v_or_b32_e32
; GFX9: s_waitcnt
-; GFX9-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX9-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64
define <4 x i16> @v_mul_v4i16(<4 x i16> %a, <4 x i16> %b) {
%r.val = mul <4 x i16> %a, %b
diff --git a/test/CodeGen/AMDGPU/r600.extract-lowbits.ll b/test/CodeGen/AMDGPU/r600.extract-lowbits.ll
index bd02008096f0..71af6a9a4f51 100644
--- a/test/CodeGen/AMDGPU/r600.extract-lowbits.ll
+++ b/test/CodeGen/AMDGPU/r600.extract-lowbits.ll
@@ -1,5 +1,6 @@
-; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=EG %s
-; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=CM %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
+; RUN: llc -mtriple=r600-- -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM %s
; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll,
; but with all 64-bit tests, and tests with loads dropped.
@@ -15,11 +16,28 @@
; Pattern a. 32-bit
; ---------------------------------------------------------------------------- ;
-; R600-LABEL: bzhi32_a0:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
-; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+; EG-LABEL: bzhi32_a0:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: ALU clause starting at 4:
+; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
+;
+; CM-LABEL: bzhi32_a0:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
%onebit = shl i32 1, %numlowbits
%mask = add nsw i32 %onebit, -1
%masked = and i32 %mask, %val
@@ -27,11 +45,44 @@ define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1)
ret void
}
-; R600-LABEL: bzhi32_a1_indexzext:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
-; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) {
+; EG-LABEL: bzhi32_a1_indexzext:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_UINT T0.X, KC0[2].Y, 0.0, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: bzhi32_a1_indexzext:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 0, @8, KC0[], KC1[]
+; CM-NEXT: TEX 0 @6
+; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
+; CM-NEXT: ALU clause starting at 8:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 9:
+; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_UINT * T0.X, KC0[2].Y, 0.0, PV.W,
+; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = zext i8 %numlowbits to i32
%onebit = shl i32 1, %conv
%mask = add nsw i32 %onebit, -1
@@ -40,11 +91,28 @@ define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits,
ret void
}
-; R600-LABEL: bzhi32_a4_commutative:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
-; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+; EG-LABEL: bzhi32_a4_commutative:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: ALU clause starting at 4:
+; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
+;
+; CM-LABEL: bzhi32_a4_commutative:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
%onebit = shl i32 1, %numlowbits
%mask = add nsw i32 %onebit, -1
%masked = and i32 %val, %mask ; swapped order
@@ -56,11 +124,28 @@ define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32
; Pattern b. 32-bit
; ---------------------------------------------------------------------------- ;
-; R600-LABEL: bzhi32_b0:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
-; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+; EG-LABEL: bzhi32_b0:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: ALU clause starting at 4:
+; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
+;
+; CM-LABEL: bzhi32_b0:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
%notmask = shl i32 -1, %numlowbits
%mask = xor i32 %notmask, -1
%masked = and i32 %mask, %val
@@ -68,11 +153,44 @@ define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1)
ret void
}
-; R600-LABEL: bzhi32_b1_indexzext:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
-; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) {
+; EG-LABEL: bzhi32_b1_indexzext:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_UINT T0.X, KC0[2].Y, 0.0, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: bzhi32_b1_indexzext:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 0, @8, KC0[], KC1[]
+; CM-NEXT: TEX 0 @6
+; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
+; CM-NEXT: ALU clause starting at 8:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 9:
+; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_UINT * T0.X, KC0[2].Y, 0.0, PV.W,
+; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = zext i8 %numlowbits to i32
%notmask = shl i32 -1, %conv
%mask = xor i32 %notmask, -1
@@ -81,11 +199,28 @@ define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits,
ret void
}
-; R600-LABEL: bzhi32_b4_commutative:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
-; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+; EG-LABEL: bzhi32_b4_commutative:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: ALU clause starting at 4:
+; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
+;
+; CM-LABEL: bzhi32_b4_commutative:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
%notmask = shl i32 -1, %numlowbits
%mask = xor i32 %notmask, -1
%masked = and i32 %val, %mask ; swapped order
@@ -97,11 +232,28 @@ define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32
; Pattern c. 32-bit
; ---------------------------------------------------------------------------- ;
-; R600-LABEL: bzhi32_c0:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
-; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+; EG-LABEL: bzhi32_c0:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: ALU clause starting at 4:
+; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
+;
+; CM-LABEL: bzhi32_c0:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
%masked = and i32 %mask, %val
@@ -109,17 +261,52 @@ define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1)
ret void
}
-; R600-LABEL: bzhi32_c1_indexzext:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
-; R600: SUB_INT {{\*?}} [[SUBR:T[0-9]+]].[[SUBC:[XYZW]]], literal.x, KC0[2].Z
-; R600-NEXT: 32
-; R600-NEXT: AND_INT {{\*?}} {{T[0-9]+}}.[[AND1C:[XYZW]]], {{T[0-9]+|PV}}.[[SUBC]], literal.x
-; R600-NEXT: 255
-; R600: LSHR {{\*?}} {{T[0-9]}}.[[LSHRC:[XYZW]]], literal.x, {{T[0-9]+|PV}}.[[AND1C]]
-; R600-NEXT: -1
-; R600-NEXT: AND_INT {{[* ]*}}[[RET]], {{T[0-9]+|PV}}.[[LSHRC]], KC0[2].Y
define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) {
+; EG-LABEL: bzhi32_c1_indexzext:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: SUB_INT * T0.W, literal.x, T0.X,
+; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT: LSHR * T0.W, literal.x, PV.W,
+; EG-NEXT: -1(nan), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.X, PV.W, KC0[2].Y,
+; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: bzhi32_c1_indexzext:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 0, @8, KC0[], KC1[]
+; CM-NEXT: TEX 0 @6
+; CM-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
+; CM-NEXT: ALU clause starting at 8:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 9:
+; CM-NEXT: SUB_INT * T0.W, literal.x, T0.X,
+; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; CM-NEXT: LSHR * T0.W, literal.x, PV.W,
+; CM-NEXT: -1(nan), 0(0.000000e+00)
+; CM-NEXT: AND_INT * T0.X, PV.W, KC0[2].Y,
+; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
%mask = lshr i32 -1, %sh_prom
@@ -128,11 +315,28 @@ define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 add
ret void
}
-; R600-LABEL: bzhi32_c4_commutative:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
-; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+; EG-LABEL: bzhi32_c4_commutative:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: ALU clause starting at 4:
+; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
+;
+; CM-LABEL: bzhi32_c4_commutative:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
%masked = and i32 %val, %mask ; swapped order
@@ -144,11 +348,28 @@ define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32
; Pattern d. 32-bit.
; ---------------------------------------------------------------------------- ;
-; R600-LABEL: bzhi32_d0:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
-; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+; EG-LABEL: bzhi32_d0:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: ALU clause starting at 4:
+; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
+;
+; CM-LABEL: bzhi32_d0:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
%numhighbits = sub i32 32, %numlowbits
%highbitscleared = shl i32 %val, %numhighbits
%masked = lshr i32 %highbitscleared, %numhighbits
@@ -156,16 +377,50 @@ define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1)
ret void
}
-; R600-LABEL: bzhi32_d1_indexzext:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
-; R600: SUB_INT {{\*?}} [[SUBR:T[0-9]+]].[[SUBC:[XYZW]]], literal.x, KC0[2].Z
-; R600-NEXT: 32
-; R600-NEXT: AND_INT {{\*?}} [[AND:T[0-9]+\.[XYZW]]], {{T[0-9]+|PV}}.[[SUBC]], literal.x
-; R600-NEXT: 255
-; R600: LSHL {{\*?}} {{T[0-9]}}.[[LSHLC:[XYZW]]], KC0[2].Y, {{T[0-9]+|PV}}.[[AND1C]]
-; R600: LSHR {{[* ]*}}[[RET]], {{T[0-9]+|PV}}.[[LSHLC]], [[AND]]
define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) {
+; EG-LABEL: bzhi32_d1_indexzext:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: SUB_INT * T0.W, literal.x, T0.X,
+; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT: LSHL * T1.W, KC0[2].Y, PV.W,
+; EG-NEXT: LSHR T0.X, PV.W, T0.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: bzhi32_d1_indexzext:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 0, @8, KC0[], KC1[]
+; CM-NEXT: TEX 0 @6
+; CM-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
+; CM-NEXT: ALU clause starting at 8:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 9:
+; CM-NEXT: SUB_INT * T0.W, literal.x, T0.X,
+; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; CM-NEXT: LSHL * T1.W, KC0[2].Y, PV.W,
+; CM-NEXT: LSHR * T0.X, PV.W, T0.W,
+; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
%highbitscleared = shl i32 %val, %sh_prom
diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll
index 49c171e03de2..42a28b952739 100644
--- a/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -72,10 +72,18 @@ define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions:
; CHECK-NEXT: ; %bb.0:
; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
+; CHECK-NEXT: s_cbranch_execnz BB6_2
; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT: exp
+; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: BB6_2:
; CHECK: v_mov_b32_e64 v7, -1
; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
-; CHECK-NEXT: ; %bb.2:
+; CHECK-NEXT: s_cbranch_execnz BB6_4
+; CHECK-NEXT: ; %bb.3:
+; CHECK-NEXT: exp
+; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: BB6_4:
; CHECK-NEXT: s_endpgm
define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
call void @llvm.AMDGPU.kill(float %x)
diff --git a/test/CodeGen/AMDGPU/store-global.ll b/test/CodeGen/AMDGPU/store-global.ll
index a40e6b2683e5..8f8df884502b 100644
--- a/test/CodeGen/AMDGPU/store-global.ll
+++ b/test/CodeGen/AMDGPU/store-global.ll
@@ -24,23 +24,12 @@ entry:
; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
; EG-NOT: MEM_RAT MSKOR
-; IG 0: Get the byte index and truncate the value
-; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
-; EG: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
-; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
-; EG-NEXT: 3(4.203895e-45), 255(3.573311e-43)
-
-
-; IG 1: Truncate the calculated the shift amount for the mask
-
-; IG 2: Shift the value and the mask
-; EG: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
-; EG: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
-; EG-NEXT: 255
-; IG 3: Initialize the Y and Z channels to zero
-; XXX: An optimal scheduler should merge this into one of the prevous IGs.
-; EG: MOV T[[RW_GPR]].Y, 0.0
-; EG: MOV * T[[RW_GPR]].Z, 0.0
+; EG: VTX_READ_8
+; EG: AND_INT
+; EG: AND_INT
+; EG: LSHL
+; EG: LSHL
+; EG: LSHL
; SIVI: buffer_store_byte
; GFX9: global_store_byte
@@ -55,26 +44,13 @@ entry:
; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
; EG-NOT: MEM_RAT MSKOR
-; IG 0: Get the byte index and truncate the value
-
-
-; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
-; EG-NEXT: 3(4.203895e-45),
-
-; EG: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
-; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
-
-; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
-; IG 1: Truncate the calculated the shift amount for the mask
+; EG: VTX_READ_16
+; EG: AND_INT
+; EG: AND_INT
+; EG: LSHL
+; EG: LSHL
+; EG: LSHL
-; IG 2: Shift the value and the mask
-; EG: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
-; EG: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
-; EG-NEXT: 65535
-; IG 3: Initialize the Y and Z channels to zero
-; XXX: An optimal scheduler should merge this into one of the prevous IGs.
-; EG: MOV T[[RW_GPR]].Y, 0.0
-; EG: MOV * T[[RW_GPR]].Z, 0.0
; SIVI: buffer_store_short
; GFX9: global_store_short
diff --git a/test/CodeGen/AMDGPU/store-private.ll b/test/CodeGen/AMDGPU/store-private.ll
index f9fc75023d4f..840dc509d28c 100644
--- a/test/CodeGen/AMDGPU/store-private.ll
+++ b/test/CodeGen/AMDGPU/store-private.ll
@@ -32,7 +32,9 @@ entry:
; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
; EG: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
; EG-NEXT: 3(4.203895e-45)
-; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.x
+
+
+; EG: LSHL * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], literal.x, PV.W
; EG-NEXT: 255(3.573311e-43)
; EG: NOT_INT
@@ -57,12 +59,12 @@ entry:
; EG: MOVA_INT * AR.x (MASKED)
; EG: MOV [[OLD:T[0-9]\.[XYZW]]], {{.*}}AR.x
+; EG: VTX_READ_16
+
; IG 0: Get the byte index and truncate the value
; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
; EG: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
; EG-NEXT: 3(4.203895e-45)
-; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.x
-; EG-NEXT: 65535(9.183409e-41)
; EG: NOT_INT
; EG: AND_INT {{[\* ]*}}[[CLR_CHAN:T[0-9]\.[XYZW]]], {{.*}}[[OLD]]
diff --git a/test/CodeGen/AMDGPU/zero_extend.ll b/test/CodeGen/AMDGPU/zero_extend.ll
index ee9bbb67c0e6..2f365cb503e1 100644
--- a/test/CodeGen/AMDGPU/zero_extend.ll
+++ b/test/CodeGen/AMDGPU/zero_extend.ll
@@ -51,11 +51,11 @@ define amdgpu_kernel void @s_cmp_zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a,
; GCN: s_load_dword [[A:s[0-9]+]]
; GCN: s_load_dword [[B:s[0-9]+]]
-; SI: v_mov_b32_e32 [[V_A:v[0-9]+]], [[A]]
-; SI: v_cmp_eq_u32_e32 vcc, [[B]], [[V_A]]
-
-; VI: v_mov_b32_e32 [[V_B:v[0-9]+]], [[B]]
-; VI: v_cmp_eq_u32_e32 vcc, [[A]], [[V_B]]
+; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0xffff{{$}}
+; GCN-DAG: s_and_b32 [[MASK_A:s[0-9]+]], [[A]], [[MASK]]
+; GCN-DAG: s_and_b32 [[MASK_B:s[0-9]+]], [[B]], [[MASK]]
+; GCN: v_mov_b32_e32 [[V_B:v[0-9]+]], [[B]]
+; GCN: v_cmp_eq_u32_e32 vcc, [[MASK_A]], [[V_B]]
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN: buffer_store_short [[RESULT]]
diff --git a/test/CodeGen/ARM/aggregate-padding.ll b/test/CodeGen/ARM/aggregate-padding.ll
index bc46a9cdf913..ae7ab90fcd2f 100644
--- a/test/CodeGen/ARM/aggregate-padding.ll
+++ b/test/CodeGen/ARM/aggregate-padding.ll
@@ -99,3 +99,19 @@ define i16 @test_i16_forced_stack([8 x double], double, i32, i32, [3 x i16] %arg
%sum = add i16 %val0, %val2
ret i16 %sum
}
+
+; [2 x <4 x i32>] should be aligned only on a 64-bit boundary and contiguous.
+; None of the two <4 x i32> elements should introduce any padding to 128 bits.
+define i32 @test_4xi32_64bit_aligned_and_contiguous([8 x double], float, [2 x <4 x i32>] %arg) nounwind {
+; CHECK-LABEL: test_4xi32_64bit_aligned_and_contiguous:
+; CHECK-DAG: ldr [[VAL0_0:r[0-9]+]], [sp, #8]
+; CHECK-DAG: ldr [[VAL1_0:r[0-9]+]], [sp, #24]
+; CHECK: add r0, [[VAL0_0]], [[VAL1_0]]
+
+ %val0 = extractvalue [2 x <4 x i32>] %arg, 0
+ %val0_0 = extractelement <4 x i32> %val0, i32 0
+ %val1 = extractvalue [2 x <4 x i32>] %arg, 1
+ %val1_0 = extractelement <4 x i32> %val1, i32 0
+ %sum = add i32 %val0_0, %val1_0
+ ret i32 %sum
+}
diff --git a/test/CodeGen/ARM/inline-asm-operand-implicit-cast.ll b/test/CodeGen/ARM/inline-asm-operand-implicit-cast.ll
index 8bb671e10fbf..45bdb124e032 100644
--- a/test/CodeGen/ARM/inline-asm-operand-implicit-cast.ll
+++ b/test/CodeGen/ARM/inline-asm-operand-implicit-cast.ll
@@ -17,6 +17,42 @@ define arm_aapcscc double @zerobits_double_soft() #0 {
ret double %1
}
+; Check support for returning a float in GPR with matching float input with
+; soft float ABI
+define arm_aapcscc float @flt_gpr_matching_in_op_soft(float %f) #0 {
+; CHECK-LABEL: flt_gpr_matching_in_op_soft
+; CHECK: mov r0, r0
+ %1 = call float asm "mov $0, $1", "=&r,0"(float %f)
+ ret float %1
+}
+
+; Check support for returning a double in GPR with matching double input with
+; soft float ABI
+define arm_aapcscc double @dbl_gpr_matching_in_op_soft(double %d) #0 {
+; CHECK-LABEL: dbl_gpr_matching_in_op_soft
+; CHECK: mov r1, r0
+ %1 = call double asm "mov ${0:R}, ${1:Q}", "=&r,0"(double %d)
+ ret double %1
+}
+
+; Check support for returning a float in specific GPR with matching float input
+; with soft float ABI
+define arm_aapcscc float @flt_gpr_matching_spec_reg_in_op_soft(float %f) #0 {
+; CHECK-LABEL: flt_gpr_matching_spec_reg_in_op_soft
+; CHECK: mov r3, r3
+ %1 = call float asm "mov $0, $1", "=&{r3},0"(float %f)
+ ret float %1
+}
+
+; Check support for returning a double in specific GPR with matching double
+; input with soft float ABI
+define arm_aapcscc double @dbl_gpr_matching_spec_reg_in_op_soft(double %d) #0 {
+; CHECK-LABEL: dbl_gpr_matching_spec_reg_in_op_soft
+; CHECK: mov r3, r2
+ %1 = call double asm "mov ${0:R}, ${1:Q}", "=&{r2},0"(double %d)
+ ret double %1
+}
+
attributes #0 = { nounwind "target-features"="+d16,+vfp2,+vfp3,-fp-only-sp" "use-soft-float"="true" }
@@ -39,4 +75,48 @@ define double @zerobits_double_hard() #1 {
ret double %1
}
+; Check support for returning a float in GPR with matching float input with
+; hard float ABI
+define float @flt_gpr_matching_in_op_hard(float %f) #1 {
+; CHECK-LABEL: flt_gpr_matching_in_op_hard
+; CHECK: vmov r0, s0
+; CHECK: mov r0, r0
+; CHECK: vmov s0, r0
+ %1 = call float asm "mov $0, $1", "=&r,0"(float %f)
+ ret float %1
+}
+
+; Check support for returning a double in GPR with matching double input with
+; hard float ABI
+define double @dbl_gpr_matching_in_op_hard(double %d) #1 {
+; CHECK-LABEL: dbl_gpr_matching_in_op_hard
+; CHECK: vmov r0, r1, d0
+; CHECK: mov r1, r0
+; CHECK: vmov d0, r0, r1
+ %1 = call double asm "mov ${0:R}, ${1:Q}", "=&r,0"(double %d)
+ ret double %1
+}
+
+; Check support for returning a float in specific GPR with matching float
+; input with hard float ABI
+define float @flt_gpr_matching_spec_reg_in_op_hard(float %f) #1 {
+; CHECK-LABEL: flt_gpr_matching_spec_reg_in_op_hard
+; CHECK: vmov r3, s0
+; CHECK: mov r3, r3
+; CHECK: vmov s0, r3
+ %1 = call float asm "mov $0, $1", "=&{r3},0"(float %f)
+ ret float %1
+}
+
+; Check support for returning a double in specific GPR with matching double
+; input with hard float ABI
+define double @dbl_gpr_matching_spec_reg_in_op_hard(double %d) #1 {
+; CHECK-LABEL: dbl_gpr_matching_spec_reg_in_op_hard
+; CHECK: vmov r2, r3, d0
+; CHECK: mov r3, r2
+; CHECK: vmov d0, r2, r3
+ %1 = call double asm "mov ${0:R}, ${1:Q}", "=&{r2},0"(double %d)
+ ret double %1
+}
+
attributes #1 = { nounwind "target-features"="+d16,+vfp2,+vfp3,-fp-only-sp" "use-soft-float"="false" }
diff --git a/test/CodeGen/ARM/inlineasm-64bit.ll b/test/CodeGen/ARM/inlineasm-64bit.ll
index 8e747c5eb650..62c71ab375c8 100644
--- a/test/CodeGen/ARM/inlineasm-64bit.ll
+++ b/test/CodeGen/ARM/inlineasm-64bit.ll
@@ -104,3 +104,11 @@ define i64 @tied_64bit_lookback_test(i64 %in) nounwind {
%res = extractvalue {i64, i32, i64} %vars, 2
ret i64 %res
}
+
+; Check access to low and high part with a specific register pair constraint
+define i64 @low_high_specific_reg_pair(i64 %in) nounwind {
+; CHECK-LABEL: low_high_specific_reg_pair
+; CHECK: mov r3, r2
+ %res = call i64 asm "mov ${0:R}, ${1:Q}", "=&{r2},0"(i64 %in)
+ ret i64 %res
+}
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index 10e56a346a2a..49dbb03135f5 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -65,8 +65,8 @@ if.end:
%s2 = sub nsw i32 %s, %size
%s3 = sub nsw i32 %sub, %s2
; CHECK: sub [[R1:r[0-9]+]], [[R2:r[0-9]+]], r2
-; CHECK: sub [[R3:r[0-9]+]], [[R1]], r2
-; CHECK: sub [[R4:r[0-9]+]], [[R1]], [[R3]]
+; CHECK: sub [[R3:r[0-9]+]], r2, [[R1]]
+; CHECK: add [[R4:r[0-9]+]], [[R1]], [[R3]]
; CHECK-NOT: sub
; CHECK: str
store i32 %s3, i32* %offset, align 4
diff --git a/test/CodeGen/Hexagon/bit-cmp0.mir b/test/CodeGen/Hexagon/bit-cmp0.mir
new file mode 100644
index 000000000000..e4a2514f0030
--- /dev/null
+++ b/test/CodeGen/Hexagon/bit-cmp0.mir
@@ -0,0 +1,154 @@
+# RUN: llc -march=hexagon -run-pass hexagon-bit-simplify -o - %s | FileCheck %s
+
+--- |
+ @g0 = global i32 0, align 4
+
+ define i32 @f0() { ret i32 0 }
+ define i32 @f1() { ret i32 0 }
+ define i32 @f2() { ret i32 0 }
+ define i32 @f3() { ret i32 0 }
+ define i32 @f4() { ret i32 0 }
+ define i32 @f5() { ret i32 0 }
+ define i32 @f6() { ret i32 0 }
+ define i32 @f7() { ret i32 0 }
+...
+
+# Case 0: is-zero with known zero register
+# CHECK-LABEL: name: f0
+# CHECK: %[[R00:[0-9]+]]:intregs = A2_tfrsi 1
+# CHECK: $r0 = COPY %[[R00]]
+# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+name: f0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:intregs = A2_tfrsi 0
+ %2:intregs = A4_rcmpeqi killed %0, 0
+ $r0 = COPY %2
+ PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+...
+
+# Case 1: is-zero with known non-zero register
+# CHECK-LABEL: name: f1
+# CHECK: %[[R10:[0-9]+]]:intregs = A2_tfrsi 0
+# CHECK: $r0 = COPY %[[R10]]
+# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+name: f1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:intregs = A2_tfrsi 128
+ %2:intregs = A4_rcmpeqi killed %0, 0
+ $r0 = COPY %2
+ PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+...
+
+# Case 2: is-not-zero with known zero register
+# CHECK-LABEL: name: f2
+# CHECK: %[[R20:[0-9]+]]:intregs = A2_tfrsi 0
+# CHECK: $r0 = COPY %[[R20]]
+# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+name: f2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:intregs = A2_tfrsi 0
+ %2:intregs = A4_rcmpneqi killed %0, 0
+ $r0 = COPY %2
+ PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+...
+
+# Case 3: is-not-zero with known non-zero register
+# CHECK-LABEL: name: f3
+# CHECK: %[[R30:[0-9]+]]:intregs = A2_tfrsi 1
+# CHECK: $r0 = COPY %[[R30]]
+# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+name: f3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:intregs = A2_tfrsi 1024
+ %2:intregs = A4_rcmpneqi killed %0, 0
+ $r0 = COPY %2
+ PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+...
+
+
+# Case 4: is-zero with mux(p, 1, 0)
+# CHECK-LABEL: name: f4
+# CHECK: %[[R40:[0-9]+]]:predregs = COPY $p0
+# CHECK: %[[R41:[0-9]+]]:intregs = C2_muxii %[[R40]], 0, 1
+# CHECK: $r0 = COPY %[[R41]]
+# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+name: f4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $p0
+ %0:predregs = COPY $p0
+ %1:intregs = C2_muxii %0, 1, 0
+ %2:intregs = A4_rcmpeqi killed %1, 0
+ $r0 = COPY %2
+ PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+...
+
+# Case 5: is-zero with mux(p, 0, 1)
+# CHECK-LABEL: name: f5
+# CHECK: %[[R50:[0-9]+]]:predregs = COPY $p0
+# CHECK: %[[R51:[0-9]+]]:intregs = C2_muxii %[[R50]], 1, 0
+# CHECK: $r0 = COPY %[[R51]]
+# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+name: f5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $p0
+ %0:predregs = COPY $p0
+ %1:intregs = C2_muxii %0, 0, 1
+ %2:intregs = A4_rcmpeqi killed %1, 0
+ $r0 = COPY %2
+ PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+...
+
+# Case 6: is-not-zero with mux(p, 1, 2)
+# CHECK-LABEL: name: f6
+# CHECK: %[[R60:[0-9]+]]:intregs = A2_tfrsi 1
+# CHECK: $r0 = COPY %[[R60]]
+# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+name: f6
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $p0
+ %0:predregs = COPY $p0
+ %1:intregs = C2_muxii %0, 1, 2
+ %2:intregs = A4_rcmpneqi killed %1, 0
+ $r0 = COPY %2
+ PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+...
+
+# Case 7: is-not-zero with mux(p, @g0, 2)
+# CHECK-LABEL: name: f7
+# CHECK: %[[R70:[0-9]+]]:intregs = A2_tfrsi 1
+# CHECK: $r0 = COPY %[[R70]]
+# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+name: f7
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $p0
+ %0:predregs = COPY $p0
+ %1:intregs = C2_muxii %0, @g0, 2
+ %2:intregs = A4_rcmpneqi killed %1, 0
+ $r0 = COPY %2
+ PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+...
+
diff --git a/test/CodeGen/Mips/GlobalISel/instruction-select/gloal_address.mir b/test/CodeGen/Mips/GlobalISel/instruction-select/gloal_address.mir
new file mode 100644
index 000000000000..2c9bf5a827d4
--- /dev/null
+++ b/test/CodeGen/Mips/GlobalISel/instruction-select/gloal_address.mir
@@ -0,0 +1,46 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
+--- |
+
+ @.str = private unnamed_addr constant [11 x i8] c"hello %d \0A\00"
+
+ define void @main() {entry: ret void}
+ declare i32 @printf(i8*, ...)
+
+...
+---
+name: main
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ ; MIPS32-LABEL: name: main
+ ; MIPS32: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @.str
+ ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @.str
+ ; MIPS32: [[LUi1:%[0-9]+]]:gpr32 = LUi 18838
+ ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi [[LUi1]], 722
+ ; MIPS32: [[LUi2:%[0-9]+]]:gpr32 = LUi 0
+ ; MIPS32: [[ORi1:%[0-9]+]]:gpr32 = ORi [[LUi2]], 0
+ ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32: $a0 = COPY [[ADDiu]]
+ ; MIPS32: $a1 = COPY [[ORi]]
+ ; MIPS32: JAL @printf, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $v0
+ ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32: $v0 = COPY [[ORi1]]
+ ; MIPS32: RetRA implicit $v0
+ %2:gprb(p0) = G_GLOBAL_VALUE @.str
+ %1:gprb(p0) = COPY %2(p0)
+ %3:gprb(s32) = G_CONSTANT i32 1234567890
+ %4:gprb(s32) = G_CONSTANT i32 0
+ ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ $a0 = COPY %1(p0)
+ $a1 = COPY %3(s32)
+ JAL @printf, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $v0
+ %0:gprb(s32) = COPY $v0
+ ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+ $v0 = COPY %4(s32)
+ RetRA implicit $v0
+
+...
diff --git a/test/CodeGen/Mips/GlobalISel/irtranslator/global_address.ll b/test/CodeGen/Mips/GlobalISel/irtranslator/global_address.ll
new file mode 100644
index 000000000000..a96028645f01
--- /dev/null
+++ b/test/CodeGen/Mips/GlobalISel/irtranslator/global_address.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
+
+@.str = private unnamed_addr constant [11 x i8] c"hello %d \0A\00"
+
+define i32 @main() {
+ ; MIPS32-LABEL: name: main
+ ; MIPS32: bb.1.entry:
+ ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @.str
+ ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY [[GV]](p0)
+ ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1234567890
+ ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32: $a0 = COPY [[COPY]](p0)
+ ; MIPS32: $a1 = COPY [[C]](s32)
+ ; MIPS32: JAL @printf, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $v0
+ ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0
+ ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32: $v0 = COPY [[C1]](s32)
+ ; MIPS32: RetRA implicit $v0
+entry:
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 signext 1234567890)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/GlobalISel/legalizer/global_address.mir b/test/CodeGen/Mips/GlobalISel/legalizer/global_address.mir
new file mode 100644
index 000000000000..9fcc82164b1e
--- /dev/null
+++ b/test/CodeGen/Mips/GlobalISel/legalizer/global_address.mir
@@ -0,0 +1,43 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
+--- |
+
+ @.str = private unnamed_addr constant [11 x i8] c"hello %d \0A\00"
+
+ define void @main() {entry: ret void}
+ declare i32 @printf(i8*, ...)
+
+...
+---
+name: main
+alignment: 2
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ ; MIPS32-LABEL: name: main
+ ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @.str
+ ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY [[GV]](p0)
+ ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1234567890
+ ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32: $a0 = COPY [[COPY]](p0)
+ ; MIPS32: $a1 = COPY [[C]](s32)
+ ; MIPS32: JAL @printf, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $v0
+ ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0
+ ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32: $v0 = COPY [[C1]](s32)
+ ; MIPS32: RetRA implicit $v0
+ %2:_(p0) = G_GLOBAL_VALUE @.str
+ %1:_(p0) = COPY %2(p0)
+ %3:_(s32) = G_CONSTANT i32 1234567890
+ %4:_(s32) = G_CONSTANT i32 0
+ ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ $a0 = COPY %1(p0)
+ $a1 = COPY %3(s32)
+ JAL @printf, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $v0
+ %0:_(s32) = COPY $v0
+ ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+ $v0 = COPY %4(s32)
+ RetRA implicit $v0
+
+...
diff --git a/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll b/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll
new file mode 100644
index 000000000000..ec98a3643596
--- /dev/null
+++ b/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32
+
+@.str = private unnamed_addr constant [11 x i8] c"hello %d \0A\00"
+
+define i32 @main() {
+; MIPS32-LABEL: main:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: addiu $sp, $sp, -24
+; MIPS32-NEXT: .cfi_def_cfa_offset 24
+; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: .cfi_offset 31, -4
+; MIPS32-NEXT: lui $1, %hi($.str)
+; MIPS32-NEXT: addiu $4, $1, %lo($.str)
+; MIPS32-NEXT: lui $1, 18838
+; MIPS32-NEXT: ori $5, $1, 722
+; MIPS32-NEXT: lui $1, 0
+; MIPS32-NEXT: ori $2, $1, 0
+; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT: jal printf
+; MIPS32-NEXT: nop
+; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: move $2, $1
+; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: addiu $sp, $sp, 24
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: nop
+entry:
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 signext 1234567890)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
+
diff --git a/test/CodeGen/Mips/GlobalISel/regbankselect/global_address.mir b/test/CodeGen/Mips/GlobalISel/regbankselect/global_address.mir
new file mode 100644
index 000000000000..11815516f64e
--- /dev/null
+++ b/test/CodeGen/Mips/GlobalISel/regbankselect/global_address.mir
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
+--- |
+
+ @.str = private unnamed_addr constant [11 x i8] c"hello %d \0A\00"
+
+ define void @main() {entry: ret void}
+ declare i32 @printf(i8*, ...)
+
+...
+---
+name: main
+alignment: 2
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ ; MIPS32-LABEL: name: main
+ ; MIPS32: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @.str
+ ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY [[GV]](p0)
+ ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1234567890
+ ; MIPS32: [[C1:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 0
+ ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32: $a0 = COPY [[COPY]](p0)
+ ; MIPS32: $a1 = COPY [[C]](s32)
+ ; MIPS32: JAL @printf, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $v0
+ ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $v0
+ ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32: $v0 = COPY [[C1]](s32)
+ ; MIPS32: RetRA implicit $v0
+ %2:_(p0) = G_GLOBAL_VALUE @.str
+ %1:_(p0) = COPY %2(p0)
+ %3:_(s32) = G_CONSTANT i32 1234567890
+ %4:_(s32) = G_CONSTANT i32 0
+ ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ $a0 = COPY %1(p0)
+ $a1 = COPY %3(s32)
+ JAL @printf, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $v0
+ %0:_(s32) = COPY $v0
+ ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+ $v0 = COPY %4(s32)
+ RetRA implicit $v0
+
+...
diff --git a/test/CodeGen/Mips/const-mult.ll b/test/CodeGen/Mips/const-mult.ll
index dc4f2f9c862b..cbb3c91299fa 100644
--- a/test/CodeGen/Mips/const-mult.ll
+++ b/test/CodeGen/Mips/const-mult.ll
@@ -312,20 +312,20 @@ define i32 @mul22224078_32(i32 %a) {
; MIPS32-NEXT: sll $2, $4, 4
; MIPS32-NEXT: subu $1, $2, $1
; MIPS32-NEXT: sll $2, $4, 6
-; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: subu $1, $1, $2
; MIPS32-NEXT: sll $2, $4, 8
-; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: addu $1, $2, $1
; MIPS32-NEXT: sll $2, $4, 10
-; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: subu $1, $1, $2
; MIPS32-NEXT: sll $2, $4, 13
-; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: addu $1, $2, $1
; MIPS32-NEXT: sll $2, $4, 16
-; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: subu $1, $1, $2
; MIPS32-NEXT: sll $2, $4, 24
; MIPS32-NEXT: sll $3, $4, 22
; MIPS32-NEXT: sll $5, $4, 20
; MIPS32-NEXT: sll $4, $4, 18
-; MIPS32-NEXT: subu $1, $4, $1
+; MIPS32-NEXT: addu $1, $4, $1
; MIPS32-NEXT: addu $1, $5, $1
; MIPS32-NEXT: addu $1, $3, $1
; MIPS32-NEXT: jr $ra
@@ -338,20 +338,20 @@ define i32 @mul22224078_32(i32 %a) {
; MIPS64-NEXT: sll $3, $1, 4
; MIPS64-NEXT: subu $2, $3, $2
; MIPS64-NEXT: sll $3, $1, 6
-; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: subu $2, $2, $3
; MIPS64-NEXT: sll $3, $1, 8
-; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: addu $2, $3, $2
; MIPS64-NEXT: sll $3, $1, 10
-; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: subu $2, $2, $3
; MIPS64-NEXT: sll $3, $1, 13
-; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: addu $2, $3, $2
; MIPS64-NEXT: sll $3, $1, 16
-; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: subu $2, $2, $3
; MIPS64-NEXT: sll $3, $1, 24
; MIPS64-NEXT: sll $4, $1, 22
; MIPS64-NEXT: sll $5, $1, 20
; MIPS64-NEXT: sll $1, $1, 18
-; MIPS64-NEXT: subu $1, $1, $2
+; MIPS64-NEXT: addu $1, $1, $2
; MIPS64-NEXT: addu $1, $5, $1
; MIPS64-NEXT: addu $1, $4, $1
; MIPS64-NEXT: jr $ra
@@ -373,20 +373,20 @@ define i64 @mul22224078_64(i64 %a) {
; MIPS32-NEXT: sll $4, $5, 4
; MIPS32-NEXT: subu $3, $4, $3
; MIPS32-NEXT: sll $4, $5, 6
-; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: subu $3, $3, $4
; MIPS32-NEXT: sll $4, $5, 8
-; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: addu $3, $4, $3
; MIPS32-NEXT: sll $4, $5, 10
-; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: subu $3, $3, $4
; MIPS32-NEXT: sll $4, $5, 13
-; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: addu $3, $4, $3
; MIPS32-NEXT: sll $4, $5, 16
-; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: subu $3, $3, $4
; MIPS32-NEXT: sll $4, $5, 24
; MIPS32-NEXT: sll $6, $5, 22
; MIPS32-NEXT: sll $7, $5, 20
; MIPS32-NEXT: sll $5, $5, 18
-; MIPS32-NEXT: subu $3, $5, $3
+; MIPS32-NEXT: addu $3, $5, $3
; MIPS32-NEXT: addu $3, $7, $3
; MIPS32-NEXT: addu $3, $6, $3
; MIPS32-NEXT: addu $3, $4, $3
@@ -399,20 +399,20 @@ define i64 @mul22224078_64(i64 %a) {
; MIPS64-NEXT: dsll $2, $4, 4
; MIPS64-NEXT: dsubu $1, $2, $1
; MIPS64-NEXT: dsll $2, $4, 6
-; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: dsubu $1, $1, $2
; MIPS64-NEXT: dsll $2, $4, 8
-; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: daddu $1, $2, $1
; MIPS64-NEXT: dsll $2, $4, 10
-; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: dsubu $1, $1, $2
; MIPS64-NEXT: dsll $2, $4, 13
-; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: daddu $1, $2, $1
; MIPS64-NEXT: dsll $2, $4, 16
-; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: dsubu $1, $1, $2
; MIPS64-NEXT: dsll $2, $4, 24
; MIPS64-NEXT: dsll $3, $4, 22
; MIPS64-NEXT: dsll $5, $4, 20
; MIPS64-NEXT: dsll $4, $4, 18
-; MIPS64-NEXT: dsubu $1, $4, $1
+; MIPS64-NEXT: daddu $1, $4, $1
; MIPS64-NEXT: daddu $1, $5, $1
; MIPS64-NEXT: daddu $1, $3, $1
; MIPS64-NEXT: jr $ra
diff --git a/test/CodeGen/PowerPC/signbit-shift.ll b/test/CodeGen/PowerPC/signbit-shift.ll
index 758758781448..41d250e924e2 100644
--- a/test/CodeGen/PowerPC/signbit-shift.ll
+++ b/test/CodeGen/PowerPC/signbit-shift.ll
@@ -243,8 +243,8 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
define i32 @sub_lshr(i32 %x, i32 %y) {
; CHECK-LABEL: sub_lshr:
; CHECK: # %bb.0:
-; CHECK-NEXT: srwi 3, 3, 31
-; CHECK-NEXT: subf 3, 3, 4
+; CHECK-NEXT: srawi 3, 3, 31
+; CHECK-NEXT: add 3, 4, 3
; CHECK-NEXT: blr
%sh = lshr i32 %x, 31
%r = sub i32 %y, %sh
@@ -257,8 +257,8 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: vspltisw 4, -16
; CHECK-NEXT: vspltisw 5, 15
; CHECK-NEXT: vsubuwm 4, 5, 4
-; CHECK-NEXT: vsrw 2, 2, 4
-; CHECK-NEXT: vsubuwm 2, 3, 2
+; CHECK-NEXT: vsraw 2, 2, 4
+; CHECK-NEXT: vadduwm 2, 3, 2
; CHECK-NEXT: blr
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%r = sub <4 x i32> %y, %sh
@@ -268,8 +268,8 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
define i32 @sub_const_op_lshr(i32 %x) {
; CHECK-LABEL: sub_const_op_lshr:
; CHECK: # %bb.0:
-; CHECK-NEXT: srwi 3, 3, 31
-; CHECK-NEXT: subfic 3, 3, 43
+; CHECK-NEXT: srawi 3, 3, 31
+; CHECK-NEXT: addi 3, 3, 43
; CHECK-NEXT: blr
%sh = lshr i32 %x, 31
%r = sub i32 43, %sh
@@ -284,9 +284,9 @@ define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) {
; CHECK-NEXT: addis 3, 2, .LCPI21_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI21_0@toc@l
; CHECK-NEXT: vsubuwm 3, 4, 3
-; CHECK-NEXT: vsrw 2, 2, 3
+; CHECK-NEXT: vsraw 2, 2, 3
; CHECK-NEXT: lvx 3, 0, 3
-; CHECK-NEXT: vsubuwm 2, 3, 2
+; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh
diff --git a/test/CodeGen/RISCV/tail-calls.ll b/test/CodeGen/RISCV/tail-calls.ll
index 2279e8c37792..4d7db01d1fb7 100644
--- a/test/CodeGen/RISCV/tail-calls.ll
+++ b/test/CodeGen/RISCV/tail-calls.ll
@@ -106,7 +106,7 @@ entry:
tail call void @callee_irq()
ret void
}
-attributes #0 = { "interrupt" }
+attributes #0 = { "interrupt"="machine" }
; Byval parameters hand the function a pointer directly into the stack area
; we want to reuse during a tail call. Do not tail call optimize functions with
diff --git a/test/CodeGen/SystemZ/shift-12.ll b/test/CodeGen/SystemZ/shift-12.ll
index 4ebc42b44a47..53d3d5362dfd 100644
--- a/test/CodeGen/SystemZ/shift-12.ll
+++ b/test/CodeGen/SystemZ/shift-12.ll
@@ -104,3 +104,15 @@ define i32 @f10(i32 %a, i32 %sh) {
%reuse = add i32 %and, %shift
ret i32 %reuse
}
+
+; Test that AND is not removed for i128 (which calls __ashlti3)
+define i128 @f11(i128 %a, i32 %sh) {
+; CHECK-LABEL: f11:
+; CHECK: risbg %r4, %r4, 57, 191, 0
+; CHECK: brasl %r14, __ashlti3@PLT
+ %and = and i32 %sh, 127
+ %ext = zext i32 %and to i128
+ %shift = shl i128 %a, %ext
+ ret i128 %shift
+}
+
diff --git a/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll b/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll
index 8714a5a5703c..9ed7ca217973 100644
--- a/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll
+++ b/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll
@@ -688,8 +688,8 @@ define <8 x float> @fun30(<8 x float> %val1, <8 x float> %val2, <8 x double> %va
; CHECK-NEXT: vpkg %v6, %v6, %v7
; CHECK-NEXT: vpkg %v4, %v4, %v5
; CHECK-NEXT: vn %v5, %v16, %v6
-; CHECK-NEXT: vsel %v24, %v3, %v2, %v5
-; CHECK-NEXT: vldeb %v17, %v17
+; CHECK-DAG: vsel %v24, %v3, %v2, %v5
+; CHECK-DAG: vldeb %v17, %v17
; CHECK-NEXT: vldeb %v18, %v18
; CHECK-NEXT: vfchdb %v17, %v18, %v17
; CHECK-NEXT: vmrhf %v18, %v30, %v30
diff --git a/test/CodeGen/X86/atom-fixup-lea2.ll b/test/CodeGen/X86/atom-fixup-lea2.ll
index 9b0b472be0f3..b8a0369a45f4 100644
--- a/test/CodeGen/X86/atom-fixup-lea2.ll
+++ b/test/CodeGen/X86/atom-fixup-lea2.ll
@@ -1,5 +1,4 @@
; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
-; RUN: llc < %s -mcpu=goldmont -mtriple=i686-linux | FileCheck %s
; CHECK:%bb.5
; CHECK-NEXT:leal
diff --git a/test/CodeGen/X86/combine-sdiv.ll b/test/CodeGen/X86/combine-sdiv.ll
index cc99d71009c6..7f0573c6175c 100644
--- a/test/CodeGen/X86/combine-sdiv.ll
+++ b/test/CodeGen/X86/combine-sdiv.ll
@@ -285,43 +285,23 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
; SSE-LABEL: combine_vec_sdiv_by_pow2b_v16i8:
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: pxor %xmm2, %xmm2
-; SSE-NEXT: pcmpgtb %xmm0, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm3
-; SSE-NEXT: psrlw $4, %xmm3
-; SSE-NEXT: pand {{.*}}(%rip), %xmm3
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [49408,32992,24736,57408,49408,32992,24736,57408]
-; SSE-NEXT: pblendvb %xmm0, %xmm3, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm3
-; SSE-NEXT: psrlw $2, %xmm3
-; SSE-NEXT: pand {{.*}}(%rip), %xmm3
-; SSE-NEXT: paddb %xmm0, %xmm0
-; SSE-NEXT: pblendvb %xmm0, %xmm3, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm3
-; SSE-NEXT: psrlw $1, %xmm3
-; SSE-NEXT: pand {{.*}}(%rip), %xmm3
-; SSE-NEXT: paddb %xmm0, %xmm0
-; SSE-NEXT: pblendvb %xmm0, %xmm3, %xmm2
-; SSE-NEXT: paddb %xmm1, %xmm2
-; SSE-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
-; SSE-NEXT: movdqa %xmm3, %xmm4
-; SSE-NEXT: psraw $4, %xmm4
-; SSE-NEXT: movdqa {{.*#+}} xmm5 = [16384,32800,41056,8384,16384,32800,41056,8384]
-; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm5[8],xmm0[9],xmm5[9],xmm0[10],xmm5[10],xmm0[11],xmm5[11],xmm0[12],xmm5[12],xmm0[13],xmm5[13],xmm0[14],xmm5[14],xmm0[15],xmm5[15]
-; SSE-NEXT: pblendvb %xmm0, %xmm4, %xmm3
-; SSE-NEXT: movdqa %xmm3, %xmm4
-; SSE-NEXT: psraw $2, %xmm4
-; SSE-NEXT: paddw %xmm0, %xmm0
-; SSE-NEXT: pblendvb %xmm0, %xmm4, %xmm3
-; SSE-NEXT: movdqa %xmm3, %xmm4
-; SSE-NEXT: psraw $1, %xmm4
-; SSE-NEXT: paddw %xmm0, %xmm0
-; SSE-NEXT: pblendvb %xmm0, %xmm4, %xmm3
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pcmpgtb %xmm1, %xmm0
+; SSE-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,4,2,16,8,32,64,2]
+; SSE-NEXT: pmullw %xmm2, %xmm3
; SSE-NEXT: psrlw $8, %xmm3
-; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE-NEXT: pmullw %xmm2, %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm3
+; SSE-NEXT: paddb %xmm1, %xmm3
+; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm3[8],xmm2[9],xmm3[9],xmm2[10],xmm3[10],xmm2[11],xmm3[11],xmm2[12],xmm3[12],xmm2[13],xmm3[13],xmm2[14],xmm3[14],xmm2[15],xmm3[15]
; SSE-NEXT: movdqa %xmm2, %xmm4
; SSE-NEXT: psraw $4, %xmm4
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSE-NEXT: movdqa {{.*#+}} xmm5 = [16384,32800,41056,8384,16384,32800,41056,8384]
+; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm5[8],xmm0[9],xmm5[9],xmm0[10],xmm5[10],xmm0[11],xmm5[11],xmm0[12],xmm5[12],xmm0[13],xmm5[13],xmm0[14],xmm5[14],xmm0[15],xmm5[15]
; SSE-NEXT: pblendvb %xmm0, %xmm4, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm4
; SSE-NEXT: psraw $2, %xmm4
@@ -332,9 +312,23 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
; SSE-NEXT: paddw %xmm0, %xmm0
; SSE-NEXT: pblendvb %xmm0, %xmm4, %xmm2
; SSE-NEXT: psrlw $8, %xmm2
-; SSE-NEXT: packuswb %xmm3, %xmm2
+; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT: movdqa %xmm3, %xmm4
+; SSE-NEXT: psraw $4, %xmm4
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSE-NEXT: pblendvb %xmm0, %xmm4, %xmm3
+; SSE-NEXT: movdqa %xmm3, %xmm4
+; SSE-NEXT: psraw $2, %xmm4
+; SSE-NEXT: paddw %xmm0, %xmm0
+; SSE-NEXT: pblendvb %xmm0, %xmm4, %xmm3
+; SSE-NEXT: movdqa %xmm3, %xmm4
+; SSE-NEXT: psraw $1, %xmm4
+; SSE-NEXT: paddw %xmm0, %xmm0
+; SSE-NEXT: pblendvb %xmm0, %xmm4, %xmm3
+; SSE-NEXT: psrlw $8, %xmm3
+; SSE-NEXT: packuswb %xmm2, %xmm3
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; SSE-NEXT: pblendvb %xmm0, %xmm2, %xmm1
+; SSE-NEXT: pblendvb %xmm0, %xmm3, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
@@ -342,18 +336,15 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
-; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [49408,32992,24736,57408,49408,32992,24736,57408]
-; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm2
-; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vpaddb %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm2
-; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vpaddb %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4,2,16,8,32,64,2]
+; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX1-NEXT: vpmullw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; AVX1-NEXT: vpsraw $4, %xmm2, %xmm3
@@ -387,18 +378,11 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm1
-; AVX2-NEXT: vpsrlw $4, %xmm1, %xmm2
-; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [49408,32992,24736,57408,49408,32992,24736,57408]
-; AVX2-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsrlw $2, %xmm1, %xmm2
-; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX2-NEXT: vpaddb %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm2
-; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX2-NEXT: vpaddb %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; AVX2-NEXT: vpsraw $4, %xmm2, %xmm3
@@ -426,6 +410,7 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: combine_vec_sdiv_by_pow2b_v16i8:
@@ -481,18 +466,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psraw $15, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm2
-; SSE-NEXT: psrlw $8, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7]
-; SSE-NEXT: movdqa %xmm2, %xmm1
-; SSE-NEXT: psrlw $4, %xmm1
-; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4],xmm2[5,6],xmm1[7]
-; SSE-NEXT: movdqa %xmm1, %xmm2
-; SSE-NEXT: psrlw $2, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2],xmm1[3,4],xmm2[5,6,7]
-; SSE-NEXT: movdqa %xmm2, %xmm1
-; SSE-NEXT: psrlw $1, %xmm1
-; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2],xmm2[3],xmm1[4,5],xmm2[6],xmm1[7]
+; SSE-NEXT: pmulhuw {{.*}}(%rip), %xmm1
; SSE-NEXT: paddw %xmm0, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: psraw $4, %xmm2
@@ -510,14 +484,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
; AVX1-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
-; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4],xmm1[5,6],xmm2[7]
-; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2],xmm1[3,4],xmm2[5,6,7]
-; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3],xmm2[4,5],xmm1[6],xmm2[7]
+; AVX1-NEXT: vpmulhuw {{.*}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpsraw $4, %xmm1, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3],xmm1[4],xmm2[5,6],xmm1[7]
@@ -531,10 +498,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
; AVX2-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsraw $15, %xmm0, %xmm1
-; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
-; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; AVX2-NEXT: vpmulhuw {{.*}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovsxwd %xmm1, %ymm1
; AVX2-NEXT: vpsravd {{.*}}(%rip), %ymm1, %ymm1
@@ -547,9 +511,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
; AVX512F-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsraw $15, %xmm0, %xmm1
-; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX512F-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
-; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
+; AVX512F-NEXT: vpmulhuw {{.*}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpaddw %xmm1, %xmm0, %xmm1
; AVX512F-NEXT: vpmovsxwd %xmm1, %ymm1
; AVX512F-NEXT: vpsravd {{.*}}(%rip), %ymm1, %ymm1
@@ -583,70 +545,44 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
; SSE-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: psraw $15, %xmm2
+; SSE-NEXT: movdqa {{.*#+}} xmm3 = [1,4,2,16,8,32,64,2]
+; SSE-NEXT: pmulhuw %xmm3, %xmm2
+; SSE-NEXT: paddw %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm4
+; SSE-NEXT: psraw $4, %xmm4
+; SSE-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1,2],xmm4[3],xmm2[4],xmm4[5,6],xmm2[7]
+; SSE-NEXT: movdqa %xmm4, %xmm5
+; SSE-NEXT: psraw $2, %xmm5
+; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm4[0],xmm5[1],xmm4[2,3],xmm5[4],xmm4[5],xmm5[6],xmm4[7]
+; SSE-NEXT: movdqa %xmm5, %xmm2
+; SSE-NEXT: psraw $1, %xmm2
+; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm5[0,1],xmm2[2],xmm5[3],xmm2[4,5],xmm5[6],xmm2[7]
+; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3,4,5,6,7]
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: psraw $15, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm3
-; SSE-NEXT: psrlw $8, %xmm3
-; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3,4,5,6,7]
-; SSE-NEXT: movdqa %xmm3, %xmm0
-; SSE-NEXT: psrlw $4, %xmm0
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm3[0],xmm0[1,2,3,4],xmm3[5,6],xmm0[7]
-; SSE-NEXT: movdqa %xmm0, %xmm3
-; SSE-NEXT: psrlw $2, %xmm3
-; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm0[0],xmm3[1,2],xmm0[3,4],xmm3[5,6,7]
-; SSE-NEXT: movdqa %xmm3, %xmm0
-; SSE-NEXT: psrlw $1, %xmm0
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm3[0,1],xmm0[2],xmm3[3],xmm0[4,5],xmm3[6],xmm0[7]
-; SSE-NEXT: paddw %xmm2, %xmm0
+; SSE-NEXT: pmulhuw %xmm3, %xmm0
+; SSE-NEXT: paddw %xmm1, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm3
; SSE-NEXT: psraw $4, %xmm3
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm0[0,1,2],xmm3[3],xmm0[4],xmm3[5,6],xmm0[7]
-; SSE-NEXT: movdqa %xmm3, %xmm4
-; SSE-NEXT: psraw $2, %xmm4
-; SSE-NEXT: pblendw {{.*#+}} xmm4 = xmm3[0],xmm4[1],xmm3[2,3],xmm4[4],xmm3[5],xmm4[6],xmm3[7]
-; SSE-NEXT: movdqa %xmm4, %xmm0
-; SSE-NEXT: psraw $1, %xmm0
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm4[0,1],xmm0[2],xmm4[3],xmm0[4,5],xmm4[6],xmm0[7]
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7]
-; SSE-NEXT: movdqa %xmm1, %xmm2
-; SSE-NEXT: psraw $15, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm3
-; SSE-NEXT: psrlw $8, %xmm3
-; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0],xmm3[1,2,3,4,5,6,7]
-; SSE-NEXT: movdqa %xmm3, %xmm2
-; SSE-NEXT: psrlw $4, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4],xmm3[5,6],xmm2[7]
-; SSE-NEXT: movdqa %xmm2, %xmm3
-; SSE-NEXT: psrlw $2, %xmm3
-; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0],xmm3[1,2],xmm2[3,4],xmm3[5,6,7]
-; SSE-NEXT: movdqa %xmm3, %xmm2
-; SSE-NEXT: psrlw $1, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2],xmm3[3],xmm2[4,5],xmm3[6],xmm2[7]
-; SSE-NEXT: paddw %xmm1, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm3
-; SSE-NEXT: psraw $4, %xmm3
-; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1,2],xmm3[3],xmm2[4],xmm3[5,6],xmm2[7]
-; SSE-NEXT: movdqa %xmm3, %xmm4
-; SSE-NEXT: psraw $2, %xmm4
-; SSE-NEXT: pblendw {{.*#+}} xmm4 = xmm3[0],xmm4[1],xmm3[2,3],xmm4[4],xmm3[5],xmm4[6],xmm3[7]
-; SSE-NEXT: movdqa %xmm4, %xmm2
-; SSE-NEXT: psraw $1, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3],xmm2[4,5],xmm4[6],xmm2[7]
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7]
-; SSE-NEXT: movdqa %xmm2, %xmm1
+; SSE-NEXT: movdqa %xmm3, %xmm0
+; SSE-NEXT: psraw $2, %xmm0
+; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2,3],xmm0[4],xmm3[5],xmm0[6],xmm3[7]
+; SSE-NEXT: movdqa %xmm0, %xmm3
+; SSE-NEXT: psraw $1, %xmm3
+; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm0[0,1],xmm3[2],xmm0[3],xmm3[4,5],xmm0[6],xmm3[7]
+; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3,4,5,6,7]
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: movdqa %xmm3, %xmm1
; SSE-NEXT: retq
;
; AVX1-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpsraw $15, %xmm1, %xmm2
-; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4],xmm2[5,6],xmm3[7]
-; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3,4],xmm3[5,6,7]
-; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3],xmm3[4,5],xmm2[6],xmm3[7]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4,2,16,8,32,64,2]
+; AVX1-NEXT: vpmulhuw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsraw $4, %xmm1, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3],xmm1[4],xmm2[5,6],xmm1[7]
@@ -655,14 +591,7 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
; AVX1-NEXT: vpsraw $1, %xmm1, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3],xmm2[4,5],xmm1[6],xmm2[7]
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2
-; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4],xmm2[5,6],xmm3[7]
-; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3,4],xmm3[5,6,7]
-; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3],xmm3[4,5],xmm2[6],xmm3[7]
+; AVX1-NEXT: vpmulhuw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpsraw $4, %xmm2, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4],xmm3[5,6],xmm2[7]
@@ -680,26 +609,17 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
; AVX2-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,14,15,12,13,11,10,15,16,14,15,12,13,11,10,15]
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,1,4,3,5,6,1,0,2,1,4,3,5,6,1]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15]
; AVX2-NEXT: vpsraw $15, %ymm0, %ymm4
+; AVX2-NEXT: vpmulhuw {{.*}}(%rip), %ymm4, %ymm4
+; AVX2-NEXT: vpaddw %ymm4, %ymm0, %ymm4
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm1[4],ymm4[4],ymm1[5],ymm4[5],ymm1[6],ymm4[6],ymm1[7],ymm4[7],ymm1[12],ymm4[12],ymm1[13],ymm4[13],ymm1[14],ymm4[14],ymm1[15],ymm4[15]
-; AVX2-NEXT: vpsrlvd %ymm3, %ymm5, %ymm3
+; AVX2-NEXT: vpsravd %ymm3, %ymm5, %ymm3
; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3
; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11]
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm4 = ymm1[0],ymm4[0],ymm1[1],ymm4[1],ymm1[2],ymm4[2],ymm1[3],ymm4[3],ymm1[8],ymm4[8],ymm1[9],ymm4[9],ymm1[10],ymm4[10],ymm1[11],ymm4[11]
-; AVX2-NEXT: vpsrlvd %ymm2, %ymm4, %ymm2
-; AVX2-NEXT: vpsrld $16, %ymm2, %ymm2
-; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,1,4,3,5,6,1,0,2,1,4,3,5,6,1]
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15]
-; AVX2-NEXT: vpsravd %ymm5, %ymm3, %ymm3
-; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11]
-; AVX2-NEXT: vpsravd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm4[0],ymm1[1],ymm4[1],ymm1[2],ymm4[2],ymm1[3],ymm4[3],ymm1[8],ymm4[8],ymm1[9],ymm4[9],ymm1[10],ymm4[10],ymm1[11],ymm4[11]
+; AVX2-NEXT: vpsravd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
; AVX2-NEXT: vpackusdw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
@@ -708,9 +628,7 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
; AVX512F-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm1
-; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512F-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
-; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
+; AVX512F-NEXT: vpmulhuw {{.*}}(%rip), %ymm1, %ymm1
; AVX512F-NEXT: vpaddw %ymm1, %ymm0, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512F-NEXT: vpsravd {{.*}}(%rip), %zmm1, %zmm1
@@ -753,93 +671,50 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
; SSE-NEXT: movdqa %xmm1, %xmm4
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psraw $15, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm5
-; SSE-NEXT: psrlw $8, %xmm5
-; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm0[0],xmm5[1,2,3,4,5,6,7]
-; SSE-NEXT: movdqa %xmm5, %xmm0
-; SSE-NEXT: psrlw $4, %xmm0
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm5[0],xmm0[1,2,3,4],xmm5[5,6],xmm0[7]
-; SSE-NEXT: movdqa %xmm0, %xmm5
-; SSE-NEXT: psrlw $2, %xmm5
-; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm0[0],xmm5[1,2],xmm0[3,4],xmm5[5,6,7]
-; SSE-NEXT: movdqa %xmm5, %xmm0
-; SSE-NEXT: psrlw $1, %xmm0
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm5[0,1],xmm0[2],xmm5[3],xmm0[4,5],xmm5[6],xmm0[7]
+; SSE-NEXT: movdqa {{.*#+}} xmm5 = [1,4,2,16,8,32,64,2]
+; SSE-NEXT: pmulhuw %xmm5, %xmm0
; SSE-NEXT: paddw %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm5
-; SSE-NEXT: psraw $4, %xmm5
-; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm0[0,1,2],xmm5[3],xmm0[4],xmm5[5,6],xmm0[7]
-; SSE-NEXT: movdqa %xmm5, %xmm6
-; SSE-NEXT: psraw $2, %xmm6
-; SSE-NEXT: pblendw {{.*#+}} xmm6 = xmm5[0],xmm6[1],xmm5[2,3],xmm6[4],xmm5[5],xmm6[6],xmm5[7]
-; SSE-NEXT: movdqa %xmm6, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm6
+; SSE-NEXT: psraw $4, %xmm6
+; SSE-NEXT: pblendw {{.*#+}} xmm6 = xmm0[0,1,2],xmm6[3],xmm0[4],xmm6[5,6],xmm0[7]
+; SSE-NEXT: movdqa %xmm6, %xmm7
+; SSE-NEXT: psraw $2, %xmm7
+; SSE-NEXT: pblendw {{.*#+}} xmm7 = xmm6[0],xmm7[1],xmm6[2,3],xmm7[4],xmm6[5],xmm7[6],xmm6[7]
+; SSE-NEXT: movdqa %xmm7, %xmm0
; SSE-NEXT: psraw $1, %xmm0
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm6[0,1],xmm0[2],xmm6[3],xmm0[4,5],xmm6[6],xmm0[7]
+; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm7[0,1],xmm0[2],xmm7[3],xmm0[4,5],xmm7[6],xmm0[7]
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; SSE-NEXT: movdqa %xmm4, %xmm1
; SSE-NEXT: psraw $15, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm5
-; SSE-NEXT: psrlw $8, %xmm5
-; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm1[0],xmm5[1,2,3,4,5,6,7]
-; SSE-NEXT: movdqa %xmm5, %xmm1
-; SSE-NEXT: psrlw $4, %xmm1
-; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm5[0],xmm1[1,2,3,4],xmm5[5,6],xmm1[7]
-; SSE-NEXT: movdqa %xmm1, %xmm5
-; SSE-NEXT: psrlw $2, %xmm5
-; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm1[0],xmm5[1,2],xmm1[3,4],xmm5[5,6,7]
-; SSE-NEXT: movdqa %xmm5, %xmm1
-; SSE-NEXT: psrlw $1, %xmm1
-; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm5[0,1],xmm1[2],xmm5[3],xmm1[4,5],xmm5[6],xmm1[7]
+; SSE-NEXT: pmulhuw %xmm5, %xmm1
; SSE-NEXT: paddw %xmm4, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm5
-; SSE-NEXT: psraw $4, %xmm5
-; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm1[0,1,2],xmm5[3],xmm1[4],xmm5[5,6],xmm1[7]
-; SSE-NEXT: movdqa %xmm5, %xmm6
-; SSE-NEXT: psraw $2, %xmm6
-; SSE-NEXT: pblendw {{.*#+}} xmm6 = xmm5[0],xmm6[1],xmm5[2,3],xmm6[4],xmm5[5],xmm6[6],xmm5[7]
-; SSE-NEXT: movdqa %xmm6, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm6
+; SSE-NEXT: psraw $4, %xmm6
+; SSE-NEXT: pblendw {{.*#+}} xmm6 = xmm1[0,1,2],xmm6[3],xmm1[4],xmm6[5,6],xmm1[7]
+; SSE-NEXT: movdqa %xmm6, %xmm7
+; SSE-NEXT: psraw $2, %xmm7
+; SSE-NEXT: pblendw {{.*#+}} xmm7 = xmm6[0],xmm7[1],xmm6[2,3],xmm7[4],xmm6[5],xmm7[6],xmm6[7]
+; SSE-NEXT: movdqa %xmm7, %xmm1
; SSE-NEXT: psraw $1, %xmm1
-; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm6[0,1],xmm1[2],xmm6[3],xmm1[4,5],xmm6[6],xmm1[7]
+; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm7[0,1],xmm1[2],xmm7[3],xmm1[4,5],xmm7[6],xmm1[7]
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm4[0],xmm1[1,2,3,4,5,6,7]
; SSE-NEXT: movdqa %xmm2, %xmm4
; SSE-NEXT: psraw $15, %xmm4
-; SSE-NEXT: movdqa %xmm4, %xmm5
-; SSE-NEXT: psrlw $8, %xmm5
-; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm4[0],xmm5[1,2,3,4,5,6,7]
-; SSE-NEXT: movdqa %xmm5, %xmm4
-; SSE-NEXT: psrlw $4, %xmm4
-; SSE-NEXT: pblendw {{.*#+}} xmm4 = xmm5[0],xmm4[1,2,3,4],xmm5[5,6],xmm4[7]
-; SSE-NEXT: movdqa %xmm4, %xmm5
-; SSE-NEXT: psrlw $2, %xmm5
-; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm4[0],xmm5[1,2],xmm4[3,4],xmm5[5,6,7]
-; SSE-NEXT: movdqa %xmm5, %xmm4
-; SSE-NEXT: psrlw $1, %xmm4
-; SSE-NEXT: pblendw {{.*#+}} xmm4 = xmm5[0,1],xmm4[2],xmm5[3],xmm4[4,5],xmm5[6],xmm4[7]
+; SSE-NEXT: pmulhuw %xmm5, %xmm4
; SSE-NEXT: paddw %xmm2, %xmm4
-; SSE-NEXT: movdqa %xmm4, %xmm5
-; SSE-NEXT: psraw $4, %xmm5
-; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm4[0,1,2],xmm5[3],xmm4[4],xmm5[5,6],xmm4[7]
-; SSE-NEXT: movdqa %xmm5, %xmm6
-; SSE-NEXT: psraw $2, %xmm6
-; SSE-NEXT: pblendw {{.*#+}} xmm6 = xmm5[0],xmm6[1],xmm5[2,3],xmm6[4],xmm5[5],xmm6[6],xmm5[7]
-; SSE-NEXT: movdqa %xmm6, %xmm4
+; SSE-NEXT: movdqa %xmm4, %xmm6
+; SSE-NEXT: psraw $4, %xmm6
+; SSE-NEXT: pblendw {{.*#+}} xmm6 = xmm4[0,1,2],xmm6[3],xmm4[4],xmm6[5,6],xmm4[7]
+; SSE-NEXT: movdqa %xmm6, %xmm7
+; SSE-NEXT: psraw $2, %xmm7
+; SSE-NEXT: pblendw {{.*#+}} xmm7 = xmm6[0],xmm7[1],xmm6[2,3],xmm7[4],xmm6[5],xmm7[6],xmm6[7]
+; SSE-NEXT: movdqa %xmm7, %xmm4
; SSE-NEXT: psraw $1, %xmm4
-; SSE-NEXT: pblendw {{.*#+}} xmm4 = xmm6[0,1],xmm4[2],xmm6[3],xmm4[4,5],xmm6[6],xmm4[7]
+; SSE-NEXT: pblendw {{.*#+}} xmm4 = xmm7[0,1],xmm4[2],xmm7[3],xmm4[4,5],xmm7[6],xmm4[7]
; SSE-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3,4,5,6,7]
; SSE-NEXT: movdqa %xmm3, %xmm2
; SSE-NEXT: psraw $15, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm5
-; SSE-NEXT: psrlw $8, %xmm5
-; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm2[0],xmm5[1,2,3,4,5,6,7]
-; SSE-NEXT: movdqa %xmm5, %xmm2
-; SSE-NEXT: psrlw $4, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm5[0],xmm2[1,2,3,4],xmm5[5,6],xmm2[7]
-; SSE-NEXT: movdqa %xmm2, %xmm5
-; SSE-NEXT: psrlw $2, %xmm5
-; SSE-NEXT: pblendw {{.*#+}} xmm5 = xmm2[0],xmm5[1,2],xmm2[3,4],xmm5[5,6,7]
-; SSE-NEXT: movdqa %xmm5, %xmm2
-; SSE-NEXT: psrlw $1, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm5[0,1],xmm2[2],xmm5[3],xmm2[4,5],xmm5[6],xmm2[7]
+; SSE-NEXT: pmulhuw %xmm5, %xmm2
; SSE-NEXT: paddw %xmm3, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm5
; SSE-NEXT: psraw $4, %xmm5
@@ -857,54 +732,10 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
;
; AVX1-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3
-; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsrlw $4, %xmm3, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3,4],xmm3[5,6],xmm4[7]
-; AVX1-NEXT: vpsrlw $2, %xmm3, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2],xmm3[3,4],xmm4[5,6,7]
-; AVX1-NEXT: vpsrlw $1, %xmm3, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2],xmm3[3],xmm4[4,5],xmm3[6],xmm4[7]
-; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsraw $4, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4],xmm3[5,6],xmm2[7]
-; AVX1-NEXT: vpsraw $2, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2,3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
-; AVX1-NEXT: vpsraw $1, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3],xmm3[4,5],xmm2[6],xmm3[7]
-; AVX1-NEXT: vpsraw $15, %xmm0, %xmm3
-; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsrlw $4, %xmm3, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3,4],xmm3[5,6],xmm4[7]
-; AVX1-NEXT: vpsrlw $2, %xmm3, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2],xmm3[3,4],xmm4[5,6,7]
-; AVX1-NEXT: vpsrlw $1, %xmm3, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2],xmm3[3],xmm4[4,5],xmm3[6],xmm4[7]
-; AVX1-NEXT: vpaddw %xmm3, %xmm0, %xmm3
-; AVX1-NEXT: vpsraw $4, %xmm3, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[3],xmm3[4],xmm4[5,6],xmm3[7]
-; AVX1-NEXT: vpsraw $2, %xmm3, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2,3],xmm4[4],xmm3[5],xmm4[6],xmm3[7]
-; AVX1-NEXT: vpsraw $1, %xmm3, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2],xmm3[3],xmm4[4,5],xmm3[6],xmm4[7]
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm3
-; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [0,65535,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,65535]
-; AVX1-NEXT: vandps %ymm2, %ymm3, %ymm3
-; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0
-; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpsraw $15, %xmm3, %xmm4
-; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm5
-; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsrlw $4, %xmm4, %xmm5
-; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3,4],xmm4[5,6],xmm5[7]
-; AVX1-NEXT: vpsrlw $2, %xmm4, %xmm5
-; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0],xmm5[1,2],xmm4[3,4],xmm5[5,6,7]
-; AVX1-NEXT: vpsrlw $1, %xmm4, %xmm5
-; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm5[2],xmm4[3],xmm5[4,5],xmm4[6],xmm5[7]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,4,2,16,8,32,64,2]
+; AVX1-NEXT: vpmulhuw %xmm2, %xmm4, %xmm4
; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpsraw $4, %xmm3, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[3],xmm3[4],xmm4[5,6],xmm3[7]
@@ -912,16 +743,9 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2,3],xmm4[4],xmm3[5],xmm4[6],xmm3[7]
; AVX1-NEXT: vpsraw $1, %xmm3, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2],xmm3[3],xmm4[4,5],xmm3[6],xmm4[7]
-; AVX1-NEXT: vpsraw $15, %xmm1, %xmm4
-; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm5
-; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsrlw $4, %xmm4, %xmm5
-; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3,4],xmm4[5,6],xmm5[7]
-; AVX1-NEXT: vpsrlw $2, %xmm4, %xmm5
-; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0],xmm5[1,2],xmm4[3,4],xmm5[5,6,7]
-; AVX1-NEXT: vpsrlw $1, %xmm4, %xmm5
-; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm5[2],xmm4[3],xmm5[4,5],xmm4[6],xmm5[7]
-; AVX1-NEXT: vpaddw %xmm4, %xmm1, %xmm4
+; AVX1-NEXT: vpsraw $15, %xmm0, %xmm4
+; AVX1-NEXT: vpmulhuw %xmm2, %xmm4, %xmm4
+; AVX1-NEXT: vpaddw %xmm4, %xmm0, %xmm4
; AVX1-NEXT: vpsraw $4, %xmm4, %xmm5
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2],xmm5[3],xmm4[4],xmm5[5,6],xmm4[7]
; AVX1-NEXT: vpsraw $2, %xmm4, %xmm5
@@ -929,51 +753,62 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
; AVX1-NEXT: vpsraw $1, %xmm4, %xmm5
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm5[2],xmm4[3],xmm5[4,5],xmm4[6],xmm5[7]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
-; AVX1-NEXT: vandps %ymm2, %ymm3, %ymm3
-; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
-; AVX1-NEXT: vorps %ymm1, %ymm3, %ymm1
+; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [0,65535,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,65535]
+; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3
+; AVX1-NEXT: vandnps %ymm0, %ymm4, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpsraw $15, %xmm3, %xmm5
+; AVX1-NEXT: vpmulhuw %xmm2, %xmm5, %xmm5
+; AVX1-NEXT: vpaddw %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpsraw $4, %xmm3, %xmm5
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2],xmm5[3],xmm3[4],xmm5[5,6],xmm3[7]
+; AVX1-NEXT: vpsraw $2, %xmm3, %xmm5
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm5[1],xmm3[2,3],xmm5[4],xmm3[5],xmm5[6],xmm3[7]
+; AVX1-NEXT: vpsraw $1, %xmm3, %xmm5
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm5[2],xmm3[3],xmm5[4,5],xmm3[6],xmm5[7]
+; AVX1-NEXT: vpsraw $15, %xmm1, %xmm5
+; AVX1-NEXT: vpmulhuw %xmm2, %xmm5, %xmm2
+; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpsraw $4, %xmm2, %xmm5
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm5[3],xmm2[4],xmm5[5,6],xmm2[7]
+; AVX1-NEXT: vpsraw $2, %xmm2, %xmm5
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm5[1],xmm2[2,3],xmm5[4],xmm2[5],xmm5[6],xmm2[7]
+; AVX1-NEXT: vpsraw $1, %xmm2, %xmm5
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2],xmm2[3],xmm5[4,5],xmm2[6],xmm5[7]
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT: vandps %ymm4, %ymm2, %ymm2
+; AVX1-NEXT: vandnps %ymm1, %ymm4, %ymm1
+; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,14,15,12,13,11,10,15,16,14,15,12,13,11,10,15]
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,1,4,3,5,6,1,0,2,1,4,3,5,6,1]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15]
; AVX2-NEXT: vpsraw $15, %ymm0, %ymm5
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm6 = ymm2[4],ymm5[4],ymm2[5],ymm5[5],ymm2[6],ymm5[6],ymm2[7],ymm5[7],ymm2[12],ymm5[12],ymm2[13],ymm5[13],ymm2[14],ymm5[14],ymm2[15],ymm5[15]
-; AVX2-NEXT: vpsrlvd %ymm4, %ymm6, %ymm6
-; AVX2-NEXT: vpsrld $16, %ymm6, %ymm6
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11]
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm5 = ymm2[0],ymm5[0],ymm2[1],ymm5[1],ymm2[2],ymm5[2],ymm2[3],ymm5[3],ymm2[8],ymm5[8],ymm2[9],ymm5[9],ymm2[10],ymm5[10],ymm2[11],ymm5[11]
-; AVX2-NEXT: vpsrlvd %ymm3, %ymm5, %ymm5
-; AVX2-NEXT: vpsrld $16, %ymm5, %ymm5
-; AVX2-NEXT: vpackusdw %ymm6, %ymm5, %ymm5
+; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [1,4,2,16,8,32,64,2,1,4,2,16,8,32,64,2]
+; AVX2-NEXT: # ymm6 = mem[0,1,0,1]
+; AVX2-NEXT: vpmulhuw %ymm6, %ymm5, %ymm5
; AVX2-NEXT: vpaddw %ymm5, %ymm0, %ymm5
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm6 = ymm2[4],ymm5[4],ymm2[5],ymm5[5],ymm2[6],ymm5[6],ymm2[7],ymm5[7],ymm2[12],ymm5[12],ymm2[13],ymm5[13],ymm2[14],ymm5[14],ymm2[15],ymm5[15]
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm7 = [0,2,1,4,3,5,6,1,0,2,1,4,3,5,6,1]
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm8 = ymm7[4],ymm2[4],ymm7[5],ymm2[5],ymm7[6],ymm2[6],ymm7[7],ymm2[7],ymm7[12],ymm2[12],ymm7[13],ymm2[13],ymm7[14],ymm2[14],ymm7[15],ymm2[15]
-; AVX2-NEXT: vpsravd %ymm8, %ymm6, %ymm6
-; AVX2-NEXT: vpsrld $16, %ymm6, %ymm6
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm7 = ymm2[4],ymm5[4],ymm2[5],ymm5[5],ymm2[6],ymm5[6],ymm2[7],ymm5[7],ymm2[12],ymm5[12],ymm2[13],ymm5[13],ymm2[14],ymm5[14],ymm2[15],ymm5[15]
+; AVX2-NEXT: vpsravd %ymm4, %ymm7, %ymm7
+; AVX2-NEXT: vpsrld $16, %ymm7, %ymm7
+; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11]
; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm5 = ymm2[0],ymm5[0],ymm2[1],ymm5[1],ymm2[2],ymm5[2],ymm2[3],ymm5[3],ymm2[8],ymm5[8],ymm2[9],ymm5[9],ymm2[10],ymm5[10],ymm2[11],ymm5[11]
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm7 = ymm7[0],ymm2[0],ymm7[1],ymm2[1],ymm7[2],ymm2[2],ymm7[3],ymm2[3],ymm7[8],ymm2[8],ymm7[9],ymm2[9],ymm7[10],ymm2[10],ymm7[11],ymm2[11]
-; AVX2-NEXT: vpsravd %ymm7, %ymm5, %ymm5
+; AVX2-NEXT: vpsravd %ymm3, %ymm5, %ymm5
; AVX2-NEXT: vpsrld $16, %ymm5, %ymm5
-; AVX2-NEXT: vpackusdw %ymm6, %ymm5, %ymm5
+; AVX2-NEXT: vpackusdw %ymm7, %ymm5, %ymm5
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm5[1,2,3,4,5,6,7],ymm0[8],ymm5[9,10,11,12,13,14,15]
; AVX2-NEXT: vpsraw $15, %ymm1, %ymm5
+; AVX2-NEXT: vpmulhuw %ymm6, %ymm5, %ymm5
+; AVX2-NEXT: vpaddw %ymm5, %ymm1, %ymm5
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm6 = ymm2[4],ymm5[4],ymm2[5],ymm5[5],ymm2[6],ymm5[6],ymm2[7],ymm5[7],ymm2[12],ymm5[12],ymm2[13],ymm5[13],ymm2[14],ymm5[14],ymm2[15],ymm5[15]
-; AVX2-NEXT: vpsrlvd %ymm4, %ymm6, %ymm4
-; AVX2-NEXT: vpsrld $16, %ymm4, %ymm4
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm5 = ymm2[0],ymm5[0],ymm2[1],ymm5[1],ymm2[2],ymm5[2],ymm2[3],ymm5[3],ymm2[8],ymm5[8],ymm2[9],ymm5[9],ymm2[10],ymm5[10],ymm2[11],ymm5[11]
-; AVX2-NEXT: vpsrlvd %ymm3, %ymm5, %ymm3
-; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3
-; AVX2-NEXT: vpackusdw %ymm4, %ymm3, %ymm3
-; AVX2-NEXT: vpaddw %ymm3, %ymm1, %ymm3
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm3[4],ymm2[5],ymm3[5],ymm2[6],ymm3[6],ymm2[7],ymm3[7],ymm2[12],ymm3[12],ymm2[13],ymm3[13],ymm2[14],ymm3[14],ymm2[15],ymm3[15]
-; AVX2-NEXT: vpsravd %ymm8, %ymm4, %ymm4
+; AVX2-NEXT: vpsravd %ymm4, %ymm6, %ymm4
; AVX2-NEXT: vpsrld $16, %ymm4, %ymm4
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm3[0],ymm2[1],ymm3[1],ymm2[2],ymm3[2],ymm2[3],ymm3[3],ymm2[8],ymm3[8],ymm2[9],ymm3[9],ymm2[10],ymm3[10],ymm2[11],ymm3[11]
-; AVX2-NEXT: vpsravd %ymm7, %ymm2, %ymm2
+; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm5[0],ymm2[1],ymm5[1],ymm2[2],ymm5[2],ymm2[3],ymm5[3],ymm2[8],ymm5[8],ymm2[9],ymm5[9],ymm2[10],ymm5[10],ymm2[11],ymm5[11]
+; AVX2-NEXT: vpsravd %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpsrld $16, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
@@ -982,11 +817,9 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
; AVX512F-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm2
-; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
-; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm3 = [16,14,15,12,13,11,10,15,16,14,15,12,13,11,10,15]
-; AVX512F-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3]
-; AVX512F-NEXT: vpsrlvd %zmm3, %zmm2, %zmm2
-; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
+; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [1,4,2,16,8,32,64,2,1,4,2,16,8,32,64,2]
+; AVX512F-NEXT: # ymm3 = mem[0,1,0,1]
+; AVX512F-NEXT: vpmulhuw %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpaddw %ymm2, %ymm0, %ymm2
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm4 = [0,2,1,4,3,5,6,1,0,2,1,4,3,5,6,1]
@@ -995,9 +828,7 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
; AVX512F-NEXT: vpsraw $15, %ymm1, %ymm2
-; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
-; AVX512F-NEXT: vpsrlvd %zmm3, %zmm2, %zmm2
-; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
+; AVX512F-NEXT: vpmulhuw %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm2
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
; AVX512F-NEXT: vpsravd %zmm4, %zmm2, %zmm2
@@ -2021,43 +1852,22 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm2
-; SSE-NEXT: pxor %xmm3, %xmm3
-; SSE-NEXT: pcmpgtb %xmm0, %xmm3
-; SSE-NEXT: movdqa %xmm3, %xmm4
-; SSE-NEXT: psrlw $4, %xmm4
-; SSE-NEXT: pand {{.*}}(%rip), %xmm4
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = [256,224,256,224,57600,57568,8416,8416]
-; SSE-NEXT: pblendvb %xmm0, %xmm4, %xmm3
-; SSE-NEXT: movdqa %xmm3, %xmm4
-; SSE-NEXT: psrlw $2, %xmm4
-; SSE-NEXT: pand {{.*}}(%rip), %xmm4
-; SSE-NEXT: paddb %xmm0, %xmm0
-; SSE-NEXT: pblendvb %xmm0, %xmm4, %xmm3
-; SSE-NEXT: movdqa %xmm3, %xmm4
-; SSE-NEXT: psrlw $1, %xmm4
-; SSE-NEXT: pand {{.*}}(%rip), %xmm4
-; SSE-NEXT: paddb %xmm0, %xmm0
-; SSE-NEXT: pblendvb %xmm0, %xmm4, %xmm3
-; SSE-NEXT: paddb %xmm1, %xmm3
-; SSE-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
-; SSE-NEXT: movdqa %xmm4, %xmm5
-; SSE-NEXT: psraw $4, %xmm5
-; SSE-NEXT: movdqa {{.*#+}} xmm6 = [0,32,0,32,8192,8224,57376,57376]
-; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm6[8],xmm0[9],xmm6[9],xmm0[10],xmm6[10],xmm0[11],xmm6[11],xmm0[12],xmm6[12],xmm0[13],xmm6[13],xmm0[14],xmm6[14],xmm0[15],xmm6[15]
-; SSE-NEXT: pblendvb %xmm0, %xmm5, %xmm4
-; SSE-NEXT: movdqa %xmm4, %xmm5
-; SSE-NEXT: psraw $2, %xmm5
-; SSE-NEXT: paddw %xmm0, %xmm0
-; SSE-NEXT: pblendvb %xmm0, %xmm5, %xmm4
-; SSE-NEXT: movdqa %xmm4, %xmm5
-; SSE-NEXT: psraw $1, %xmm5
-; SSE-NEXT: paddw %xmm0, %xmm0
-; SSE-NEXT: pblendvb %xmm0, %xmm5, %xmm4
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pcmpgtb %xmm1, %xmm0
+; SSE-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE-NEXT: pmullw {{.*}}(%rip), %xmm4
; SSE-NEXT: psrlw $8, %xmm4
-; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm4
+; SSE-NEXT: paddb %xmm1, %xmm4
+; SSE-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
; SSE-NEXT: movdqa %xmm3, %xmm5
; SSE-NEXT: psraw $4, %xmm5
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSE-NEXT: movdqa {{.*#+}} xmm6 = [0,32,0,32,8192,8224,57376,57376]
+; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm6[8],xmm0[9],xmm6[9],xmm0[10],xmm6[10],xmm0[11],xmm6[11],xmm0[12],xmm6[12],xmm0[13],xmm6[13],xmm0[14],xmm6[14],xmm0[15],xmm6[15]
; SSE-NEXT: pblendvb %xmm0, %xmm5, %xmm3
; SSE-NEXT: movdqa %xmm3, %xmm5
; SSE-NEXT: psraw $2, %xmm5
@@ -2068,9 +1878,23 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; SSE-NEXT: paddw %xmm0, %xmm0
; SSE-NEXT: pblendvb %xmm0, %xmm5, %xmm3
; SSE-NEXT: psrlw $8, %xmm3
-; SSE-NEXT: packuswb %xmm4, %xmm3
+; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT: movdqa %xmm4, %xmm5
+; SSE-NEXT: psraw $4, %xmm5
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSE-NEXT: pblendvb %xmm0, %xmm5, %xmm4
+; SSE-NEXT: movdqa %xmm4, %xmm5
+; SSE-NEXT: psraw $2, %xmm5
+; SSE-NEXT: paddw %xmm0, %xmm0
+; SSE-NEXT: pblendvb %xmm0, %xmm5, %xmm4
+; SSE-NEXT: movdqa %xmm4, %xmm5
+; SSE-NEXT: psraw $1, %xmm5
+; SSE-NEXT: paddw %xmm0, %xmm0
+; SSE-NEXT: pblendvb %xmm0, %xmm5, %xmm4
+; SSE-NEXT: psrlw $8, %xmm4
+; SSE-NEXT: packuswb %xmm3, %xmm4
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,0,0,255,0,0,255,255,255,255,255,255,255]
-; SSE-NEXT: pblendvb %xmm0, %xmm3, %xmm1
+; SSE-NEXT: pblendvb %xmm0, %xmm4, %xmm1
; SSE-NEXT: psubb %xmm1, %xmm2
; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,255,0,255,255,255,0,255,255,0,0,0,0,255,0,255]
; SSE-NEXT: pblendvb %xmm0, %xmm2, %xmm1
@@ -2081,18 +1905,14 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3
-; AVX1-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,224,256,224,57600,57568,8416,8416]
-; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm3
-; AVX1-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
-; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm3
-; AVX1-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
-; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
; AVX1-NEXT: vpsraw $4, %xmm3, %xmm4
@@ -2129,18 +1949,11 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm2
-; AVX2-NEXT: vpsrlw $4, %xmm2, %xmm3
-; AVX2-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [256,224,256,224,57600,57568,8416,8416]
-; AVX2-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlw $2, %xmm2, %xmm3
-; AVX2-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
-; AVX2-NEXT: vpaddb %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlw $1, %xmm2, %xmm3
-; AVX2-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
-; AVX2-NEXT: vpaddb %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
+; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
+; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
+; AVX2-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm2
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
; AVX2-NEXT: vpsraw $4, %xmm3, %xmm4
@@ -2171,6 +1984,7 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; AVX2-NEXT: vpsubb %xmm0, %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,255,255,255,0,255,255,0,0,0,0,255,0,255]
; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: non_splat_minus_one_divisor_1:
diff --git a/test/CodeGen/X86/combine-shl.ll b/test/CodeGen/X86/combine-shl.ll
index c037b0f0aa4b..1fc557f008fe 100644
--- a/test/CodeGen/X86/combine-shl.ll
+++ b/test/CodeGen/X86/combine-shl.ll
@@ -337,30 +337,7 @@ define <8 x i32> @combine_vec_shl_zext_lshr0(<8 x i16> %x) {
define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) {
; SSE2-LABEL: combine_vec_shl_zext_lshr1:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: psrlw $8, %xmm0
-; SSE2-NEXT: pandn %xmm0, %xmm1
-; SSE2-NEXT: por %xmm2, %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [65535,65535,65535,0,0,0,0,65535]
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: pand %xmm0, %xmm2
-; SSE2-NEXT: psrlw $4, %xmm1
-; SSE2-NEXT: pandn %xmm1, %xmm0
-; SSE2-NEXT: por %xmm2, %xmm0
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,0,0,65535,65535,0]
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm0, %xmm2
-; SSE2-NEXT: psrlw $2, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: por %xmm2, %xmm0
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm0, %xmm2
-; SSE2-NEXT: psrlw $1, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: pmulhuw {{.*}}(%rip), %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
@@ -385,18 +362,7 @@ define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) {
;
; SSE41-LABEL: combine_vec_shl_zext_lshr1:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $4, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6],xmm1[7]
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $2, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3,4],xmm1[5,6],xmm0[7]
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psrlw $1, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; SSE41-NEXT: pmulhuw {{.*}}(%rip), %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
@@ -406,13 +372,9 @@ define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) {
;
; AVX-LABEL: combine_vec_shl_zext_lshr1:
; AVX: # %bb.0:
+; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,3,4,5,6,7,8]
-; AVX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
-; AVX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
; AVX-NEXT: retq
%1 = lshr <8 x i16> %x, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
%2 = zext <8 x i16> %1 to <8 x i32>
diff --git a/test/CodeGen/X86/dagcombine-select.ll b/test/CodeGen/X86/dagcombine-select.ll
index 2b05154676e6..1ef6cfdfd40a 100644
--- a/test/CodeGen/X86/dagcombine-select.ll
+++ b/test/CodeGen/X86/dagcombine-select.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -verify-machineinstrs < %s | FileCheck -enable-var-scope %s
-; RUN: llc -mtriple=x86_64-unknown-unknown -verify-machineinstrs -mattr=+bmi < %s | FileCheck -check-prefix=BMI -enable-var-scope %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,NOBMI -enable-var-scope
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -mattr=+bmi | FileCheck %s -check-prefixes=CHECK,BMI -enable-var-scope
define i32 @select_and1(i32 %x, i32 %y) {
; CHECK-LABEL: select_and1:
@@ -283,14 +283,14 @@ define double @frem_constant_sel_constants(i1 %cond) {
declare i64 @llvm.cttz.i64(i64, i1)
define i64 @cttz_64_eq_select(i64 %v) nounwind {
-; CHECK-LABEL: cttz_64_eq_select:
-; CHECK: # %bb.0:
-; CHECK-NEXT: bsfq %rdi, %rcx
-; CHECK-NEXT: movq $-1, %rax
-; CHECK-NEXT: cmovneq %rcx, %rax
-; CHECK-NEXT: addq $6, %rax
-; CHECK-NEXT: retq
-
+; NOBMI-LABEL: cttz_64_eq_select:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: bsfq %rdi, %rcx
+; NOBMI-NEXT: movq $-1, %rax
+; NOBMI-NEXT: cmovneq %rcx, %rax
+; NOBMI-NEXT: addq $6, %rax
+; NOBMI-NEXT: retq
+;
; BMI-LABEL: cttz_64_eq_select:
; BMI: # %bb.0:
; BMI-NEXT: tzcntq %rdi, %rcx
@@ -298,6 +298,7 @@ define i64 @cttz_64_eq_select(i64 %v) nounwind {
; BMI-NEXT: cmovaeq %rcx, %rax
; BMI-NEXT: addq $6, %rax
; BMI-NEXT: retq
+
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 %v, 0
%.op = add nuw nsw i64 %cnt, 6
@@ -306,14 +307,14 @@ define i64 @cttz_64_eq_select(i64 %v) nounwind {
}
define i64 @cttz_64_ne_select(i64 %v) nounwind {
-; CHECK-LABEL: cttz_64_ne_select:
-; CHECK: # %bb.0:
-; CHECK-NEXT: bsfq %rdi, %rcx
-; CHECK-NEXT: movq $-1, %rax
-; CHECK-NEXT: cmovneq %rcx, %rax
-; CHECK-NEXT: addq $6, %rax
-; CHECK-NEXT: retq
-
+; NOBMI-LABEL: cttz_64_ne_select:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: bsfq %rdi, %rcx
+; NOBMI-NEXT: movq $-1, %rax
+; NOBMI-NEXT: cmovneq %rcx, %rax
+; NOBMI-NEXT: addq $6, %rax
+; NOBMI-NEXT: retq
+;
; BMI-LABEL: cttz_64_ne_select:
; BMI: # %bb.0:
; BMI-NEXT: tzcntq %rdi, %rcx
@@ -321,6 +322,7 @@ define i64 @cttz_64_ne_select(i64 %v) nounwind {
; BMI-NEXT: cmovaeq %rcx, %rax
; BMI-NEXT: addq $6, %rax
; BMI-NEXT: retq
+
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp ne i64 %v, 0
%.op = add nuw nsw i64 %cnt, 6
@@ -330,14 +332,14 @@ define i64 @cttz_64_ne_select(i64 %v) nounwind {
declare i32 @llvm.cttz.i32(i32, i1)
define i32 @cttz_32_eq_select(i32 %v) nounwind {
-; CHECK-LABEL: cttz_32_eq_select:
-; CHECK: # %bb.0:
-; CHECK-NEXT: bsfl %edi, %ecx
-; CHECK-NEXT: movl $-1, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
-; CHECK-NEXT: addl $6, %eax
-; CHECK-NEXT: retq
-
+; NOBMI-LABEL: cttz_32_eq_select:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: bsfl %edi, %ecx
+; NOBMI-NEXT: movl $-1, %eax
+; NOBMI-NEXT: cmovnel %ecx, %eax
+; NOBMI-NEXT: addl $6, %eax
+; NOBMI-NEXT: retq
+;
; BMI-LABEL: cttz_32_eq_select:
; BMI: # %bb.0:
; BMI-NEXT: tzcntl %edi, %ecx
@@ -345,6 +347,7 @@ define i32 @cttz_32_eq_select(i32 %v) nounwind {
; BMI-NEXT: cmovael %ecx, %eax
; BMI-NEXT: addl $6, %eax
; BMI-NEXT: retq
+
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 %v, 0
%.op = add nuw nsw i32 %cnt, 6
@@ -353,14 +356,14 @@ define i32 @cttz_32_eq_select(i32 %v) nounwind {
}
define i32 @cttz_32_ne_select(i32 %v) nounwind {
-; CHECK-LABEL: cttz_32_ne_select:
-; CHECK: # %bb.0:
-; CHECK-NEXT: bsfl %edi, %ecx
-; CHECK-NEXT: movl $-1, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
-; CHECK-NEXT: addl $6, %eax
-; CHECK-NEXT: retq
-
+; NOBMI-LABEL: cttz_32_ne_select:
+; NOBMI: # %bb.0:
+; NOBMI-NEXT: bsfl %edi, %ecx
+; NOBMI-NEXT: movl $-1, %eax
+; NOBMI-NEXT: cmovnel %ecx, %eax
+; NOBMI-NEXT: addl $6, %eax
+; NOBMI-NEXT: retq
+;
; BMI-LABEL: cttz_32_ne_select:
; BMI: # %bb.0:
; BMI-NEXT: tzcntl %edi, %ecx
@@ -368,6 +371,7 @@ define i32 @cttz_32_ne_select(i32 %v) nounwind {
; BMI-NEXT: cmovael %ecx, %eax
; BMI-NEXT: addl $6, %eax
; BMI-NEXT: retq
+
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp ne i32 %v, 0
%.op = add nuw nsw i32 %cnt, 6
diff --git a/test/CodeGen/X86/fast-isel-fold-mem.ll b/test/CodeGen/X86/fast-isel-fold-mem.ll
index 5686484ef935..1c5171926c4b 100644
--- a/test/CodeGen/X86/fast-isel-fold-mem.ll
+++ b/test/CodeGen/X86/fast-isel-fold-mem.ll
@@ -1,10 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin | FileCheck %s
define i64 @fold_load(i64* %a, i64 %b) {
-; CHECK-LABEL: fold_load
-; CHECK: addq (%rdi), %rsi
-; CHECK-NEXT: movq %rsi, %rax
+; CHECK-LABEL: fold_load:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: addq (%rdi), %rsi
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: retq
%1 = load i64, i64* %a, align 8
%2 = add i64 %1, %b
ret i64 %2
diff --git a/test/CodeGen/X86/fast-isel-select.ll b/test/CodeGen/X86/fast-isel-select.ll
index 7b3c99f13cca..cf459f85b33e 100644
--- a/test/CodeGen/X86/fast-isel-select.ll
+++ b/test/CodeGen/X86/fast-isel-select.ll
@@ -1,14 +1,23 @@
-; RUN: llc -mtriple x86_64-apple-darwin -O0 -o - < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -O0 | FileCheck %s
+
; Make sure we only use the less significant bit of the value that feeds the
; select. Otherwise, we may account for a non-zero value whereas the
; lsb is zero.
; <rdar://problem/15651765>
-; CHECK-LABEL: fastisel_select:
-; CHECK: subb {{%[a-z0-9]+}}, [[RES:%[a-z0-9]+]]
-; CHECK: testb $1, [[RES]]
-; CHECK: cmovnel %edi, %esi
define i32 @fastisel_select(i1 %exchSub2211_, i1 %trunc_8766) {
+; CHECK-LABEL: fastisel_select:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movb %sil, %al
+; CHECK-NEXT: movb %dil, %cl
+; CHECK-NEXT: xorl %esi, %esi
+; CHECK-NEXT: subb %al, %cl
+; CHECK-NEXT: testb $1, %cl
+; CHECK-NEXT: movl $1204476887, %edi ## imm = 0x47CADBD7
+; CHECK-NEXT: cmovnel %edi, %esi
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: retq
%shuffleInternal15257_8932 = sub i1 %exchSub2211_, %trunc_8766
%counter_diff1345 = select i1 %shuffleInternal15257_8932, i32 1204476887, i32 0
ret i32 %counter_diff1345
diff --git a/test/CodeGen/X86/fast-isel-sext-zext.ll b/test/CodeGen/X86/fast-isel-sext-zext.ll
index 5e54c98b0d14..82ed6c72ebca 100644
--- a/test/CodeGen/X86/fast-isel-sext-zext.ll
+++ b/test/CodeGen/X86/fast-isel-sext-zext.ll
@@ -9,7 +9,6 @@ define i8 @test1(i8 %x) nounwind {
; X32-NEXT: andb $1, %al
; X32-NEXT: negb %al
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test1:
; X64: ## %bb.0:
@@ -17,7 +16,6 @@ define i8 @test1(i8 %x) nounwind {
; X64-NEXT: negb %dil
; X64-NEXT: movl %edi, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%z = trunc i8 %x to i1
%u = sext i1 %z to i8
ret i8 %u
@@ -32,7 +30,6 @@ define i16 @test2(i16 %x) nounwind {
; X32-NEXT: movsbl %al, %eax
; X32-NEXT: ## kill: def $ax killed $ax killed $eax
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test2:
; X64: ## %bb.0:
@@ -41,7 +38,6 @@ define i16 @test2(i16 %x) nounwind {
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%z = trunc i16 %x to i1
%u = sext i1 %z to i16
ret i16 %u
@@ -55,7 +51,6 @@ define i32 @test3(i32 %x) nounwind {
; X32-NEXT: negb %al
; X32-NEXT: movsbl %al, %eax
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test3:
; X64: ## %bb.0:
@@ -63,7 +58,6 @@ define i32 @test3(i32 %x) nounwind {
; X64-NEXT: negb %dil
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%z = trunc i32 %x to i1
%u = sext i1 %z to i32
ret i32 %u
@@ -77,7 +71,6 @@ define i32 @test4(i32 %x) nounwind {
; X32-NEXT: negb %al
; X32-NEXT: movsbl %al, %eax
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test4:
; X64: ## %bb.0:
@@ -85,7 +78,6 @@ define i32 @test4(i32 %x) nounwind {
; X64-NEXT: negb %dil
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%z = trunc i32 %x to i1
%u = sext i1 %z to i32
ret i32 %u
@@ -97,14 +89,12 @@ define i8 @test5(i8 %x) nounwind {
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $1, %al
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test5:
; X64: ## %bb.0:
; X64-NEXT: andb $1, %dil
; X64-NEXT: movl %edi, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%z = trunc i8 %x to i1
%u = zext i1 %z to i8
ret i8 %u
@@ -118,7 +108,6 @@ define i16 @test6(i16 %x) nounwind {
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: ## kill: def $ax killed $ax killed $eax
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test6:
; X64: ## %bb.0:
@@ -126,7 +115,6 @@ define i16 @test6(i16 %x) nounwind {
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%z = trunc i16 %x to i1
%u = zext i1 %z to i16
ret i16 %u
@@ -139,14 +127,12 @@ define i32 @test7(i32 %x) nounwind {
; X32-NEXT: andb $1, %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test7:
; X64: ## %bb.0:
; X64-NEXT: andb $1, %dil
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%z = trunc i32 %x to i1
%u = zext i1 %z to i32
ret i32 %u
@@ -159,14 +145,12 @@ define i32 @test8(i32 %x) nounwind {
; X32-NEXT: andb $1, %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test8:
; X64: ## %bb.0:
; X64-NEXT: andb $1, %dil
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%z = trunc i32 %x to i1
%u = zext i1 %z to i32
ret i32 %u
@@ -178,14 +162,12 @@ define i16 @test9(i8 %x) nounwind {
; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: ## kill: def $ax killed $ax killed $eax
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test9:
; X64: ## %bb.0:
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%u = sext i8 %x to i16
ret i16 %u
}
@@ -195,13 +177,11 @@ define i32 @test10(i8 %x) nounwind {
; X32: ## %bb.0:
; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test10:
; X64: ## %bb.0:
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%u = sext i8 %x to i32
ret i32 %u
}
@@ -213,13 +193,11 @@ define i64 @test11(i8 %x) nounwind {
; X32-NEXT: movl %eax, %edx
; X32-NEXT: sarl $31, %edx
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test11:
; X64: ## %bb.0:
; X64-NEXT: movsbq %dil, %rax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%u = sext i8 %x to i64
ret i64 %u
}
@@ -230,14 +208,12 @@ define i16 @test12(i8 %x) nounwind {
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: ## kill: def $ax killed $ax killed $eax
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test12:
; X64: ## %bb.0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%u = zext i8 %x to i16
ret i16 %u
}
@@ -247,13 +223,11 @@ define i32 @test13(i8 %x) nounwind {
; X32: ## %bb.0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test13:
; X64: ## %bb.0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%u = zext i8 %x to i32
ret i32 %u
}
@@ -264,13 +238,11 @@ define i64 @test14(i8 %x) nounwind {
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test14:
; X64: ## %bb.0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%u = zext i8 %x to i64
ret i64 %u
}
@@ -280,13 +252,11 @@ define i32 @test15(i16 %x) nounwind {
; X32: ## %bb.0:
; X32-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test15:
; X64: ## %bb.0:
; X64-NEXT: movswl %di, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%u = sext i16 %x to i32
ret i32 %u
}
@@ -298,13 +268,11 @@ define i64 @test16(i16 %x) nounwind {
; X32-NEXT: movl %eax, %edx
; X32-NEXT: sarl $31, %edx
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test16:
; X64: ## %bb.0:
; X64-NEXT: movswq %di, %rax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%u = sext i16 %x to i64
ret i64 %u
}
@@ -314,13 +282,11 @@ define i32 @test17(i16 %x) nounwind {
; X32: ## %bb.0:
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test17:
; X64: ## %bb.0:
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%u = zext i16 %x to i32
ret i32 %u
}
@@ -331,13 +297,11 @@ define i64 @test18(i16 %x) nounwind {
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test18:
; X64: ## %bb.0:
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%u = zext i16 %x to i64
ret i64 %u
}
@@ -349,13 +313,11 @@ define i64 @test19(i32 %x) nounwind {
; X32-NEXT: movl %eax, %edx
; X32-NEXT: sarl $31, %edx
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test19:
; X64: ## %bb.0:
; X64-NEXT: movslq %edi, %rax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%u = sext i32 %x to i64
ret i64 %u
}
@@ -366,13 +328,11 @@ define i64 @test20(i32 %x) nounwind {
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: retl
-; X32-NEXT: ## -- End function
;
; X64-LABEL: test20:
; X64: ## %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: retq
-; X64-NEXT: ## -- End function
%u = zext i32 %x to i64
ret i64 %u
}
diff --git a/test/CodeGen/X86/flags-copy-lowering.mir b/test/CodeGen/X86/flags-copy-lowering.mir
index 54ce02aaca58..d5991754d40b 100644
--- a/test/CodeGen/X86/flags-copy-lowering.mir
+++ b/test/CodeGen/X86/flags-copy-lowering.mir
@@ -90,6 +90,18 @@
call void @foo()
ret i64 0
}
+
+ define i32 @test_existing_setcc(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret i32 0
+ }
+
+ define i32 @test_existing_setcc_memory(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret i32 0
+ }
...
---
name: test_branch
@@ -936,3 +948,110 @@ body: |
; CHECK: %8:gr64 = CMOVE64rr %0, %1, implicit killed $eflags
...
+---
+name: test_existing_setcc
+# CHECK-LABEL: name: test_existing_setcc
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2, %bb.3
+ liveins: $rdi, $rsi
+
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY $rsi
+ CMP64rr %0, %1, implicit-def $eflags
+ %2:gr8 = SETAr implicit $eflags
+ %3:gr8 = SETAEr implicit $eflags
+ %4:gr64 = COPY $eflags
+ ; CHECK: CMP64rr %0, %1, implicit-def $eflags
+ ; CHECK-NEXT: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
+ ; CHECK-NEXT: %[[AE_REG:[^:]*]]:gr8 = SETAEr implicit $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+ $eflags = COPY %4
+ JA_1 %bb.1, implicit $eflags
+ JB_1 %bb.2, implicit $eflags
+ JMP_1 %bb.3
+ ; CHECK-NOT: $eflags =
+ ;
+ ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags
+ ; CHECK-NEXT: JNE_1 %bb.1, implicit killed $eflags
+ ; CHECK-SAME: {{$[[:space:]]}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: {{.*$}}
+ ; CHECK-SAME: {{$[[:space:]]}}
+ ; CHECK-NEXT: TEST8rr %[[AE_REG]], %[[AE_REG]], implicit-def $eflags
+ ; CHECK-NEXT: JE_1 %bb.2, implicit killed $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+
+ bb.1:
+ %5:gr32 = MOV32ri64 42
+ $eax = COPY %5
+ RET 0, $eax
+
+ bb.2:
+ %6:gr32 = MOV32ri64 43
+ $eax = COPY %6
+ RET 0, $eax
+
+ bb.3:
+ %7:gr32 = MOV32r0 implicit-def dead $eflags
+ $eax = COPY %7
+ RET 0, $eax
+
+...
+---
+name: test_existing_setcc_memory
+# CHECK-LABEL: name: test_existing_setcc_memory
+liveins:
+ - { reg: '$rdi', virtual-reg: '%0' }
+ - { reg: '$rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $rdi, $rsi
+
+ %0:gr64 = COPY $rdi
+ %1:gr64 = COPY $rsi
+ CMP64rr %0, %1, implicit-def $eflags
+ SETEm %0, 1, $noreg, -16, $noreg, implicit $eflags
+ %2:gr64 = COPY $eflags
+ ; CHECK: CMP64rr %0, %1, implicit-def $eflags
+ ; We cannot reuse this SETE because it stores the flag directly to memory,
+ ; so we have two SETEs here. FIXME: It'd be great if something could fold
+ ; these automatically. If not, maybe we want to unfold SETcc instructions
+ ; writing to memory so we can reuse them.
+ ; CHECK-NEXT: SETEm {{.*}} implicit $eflags
+ ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
+ ; CHECK-NOT: COPY{{( killed)?}} $eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+
+ $eflags = COPY %2
+ JE_1 %bb.1, implicit $eflags
+ JMP_1 %bb.2
+ ; CHECK-NOT: $eflags =
+ ;
+ ; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags
+ ; CHECK-NEXT: JNE_1 %bb.1, implicit killed $eflags
+ ; CHECK-NEXT: JMP_1 %bb.2
+
+ bb.1:
+ %3:gr32 = MOV32ri64 42
+ $eax = COPY %3
+ RET 0, $eax
+
+ bb.2:
+ %4:gr32 = MOV32ri64 43
+ $eax = COPY %4
+ RET 0, $eax
+
+...
diff --git a/test/CodeGen/X86/lea-opt.ll b/test/CodeGen/X86/lea-opt.ll
index b285a4ed5224..6899babf31de 100644
--- a/test/CodeGen/X86/lea-opt.ll
+++ b/test/CodeGen/X86/lea-opt.ll
@@ -307,3 +307,154 @@ sw.bb.2: ; preds = %entry
sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry
ret void
}
+
+define i32 @test5(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: test5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addl %esi, %esi
+; CHECK-NEXT: subl %esi, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+entry:
+ %mul = mul nsw i32 %y, -2
+ %add = add nsw i32 %mul, %x
+ ret i32 %add
+}
+
+define i32 @test6(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: test6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: leal (%rsi,%rsi,2), %eax
+; CHECK-NEXT: subl %eax, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+entry:
+ %mul = mul nsw i32 %y, -3
+ %add = add nsw i32 %mul, %x
+ ret i32 %add
+}
+
+define i32 @test7(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: test7:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: shll $2, %esi
+; CHECK-NEXT: subl %esi, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+entry:
+ %mul = mul nsw i32 %y, -4
+ %add = add nsw i32 %mul, %x
+ ret i32 %add
+}
+
+define i32 @test8(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: test8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: leal (,%rsi,4), %eax
+; CHECK-NEXT: subl %edi, %eax
+; CHECK-NEXT: retq
+entry:
+ %mul = shl nsw i32 %y, 2
+ %sub = sub nsw i32 %mul, %x
+ ret i32 %sub
+}
+
+
+define i32 @test9(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: test9:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addl %esi, %esi
+; CHECK-NEXT: subl %esi, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+entry:
+ %mul = mul nsw i32 -2, %y
+ %add = add nsw i32 %x, %mul
+ ret i32 %add
+}
+
+define i32 @test10(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: test10:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: leal (%rsi,%rsi,2), %eax
+; CHECK-NEXT: subl %eax, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+entry:
+ %mul = mul nsw i32 -3, %y
+ %add = add nsw i32 %x, %mul
+ ret i32 %add
+}
+
+define i32 @test11(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: test11:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: shll $2, %esi
+; CHECK-NEXT: subl %esi, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+entry:
+ %mul = mul nsw i32 -4, %y
+ %add = add nsw i32 %x, %mul
+ ret i32 %add
+}
+
+define i32 @test12(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: test12:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: leal (,%rsi,4), %eax
+; CHECK-NEXT: subl %edi, %eax
+; CHECK-NEXT: retq
+entry:
+ %mul = mul nsw i32 4, %y
+ %sub = sub nsw i32 %mul, %x
+ ret i32 %sub
+}
+
+define i64 @test13(i64 %x, i64 %y) #0 {
+; CHECK-LABEL: test13:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: shlq $2, %rsi
+; CHECK-NEXT: subq %rsi, %rdi
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: retq
+entry:
+ %mul = mul nsw i64 -4, %y
+ %add = add nsw i64 %x, %mul
+ ret i64 %add
+}
+
+define i32 @test14(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: test14:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: leal (,%rsi,4), %eax
+; CHECK-NEXT: subl %edi, %eax
+; CHECK-NEXT: retq
+entry:
+ %mul = mul nsw i32 4, %y
+ %sub = sub nsw i32 %mul, %x
+ ret i32 %sub
+}
+
+define zeroext i16 @test15(i16 zeroext %x, i16 zeroext %y) #0 {
+; CHECK-LABEL: test15:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: shll $3, %esi
+; CHECK-NEXT: subl %esi, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+entry:
+ %conv = zext i16 %x to i32
+ %conv1 = zext i16 %y to i32
+ %mul = mul nsw i32 -8, %conv1
+ %add = add nsw i32 %conv, %mul
+ %conv2 = trunc i32 %add to i16
+ ret i16 %conv2
+}
+
+attributes #0 = { norecurse nounwind optsize readnone uwtable}
diff --git a/test/CodeGen/X86/machine-outliner-tailcalls.ll b/test/CodeGen/X86/machine-outliner-tailcalls.ll
index 6f28354c386b..71ebade623cf 100644
--- a/test/CodeGen/X86/machine-outliner-tailcalls.ll
+++ b/test/CodeGen/X86/machine-outliner-tailcalls.ll
@@ -1,4 +1,4 @@
-; RUN: llc -enable-machine-outliner -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=x86_64-apple-darwin < %s | FileCheck %s
@x = common local_unnamed_addr global i32 0, align 4
diff --git a/test/CodeGen/X86/mul-constant-i16.ll b/test/CodeGen/X86/mul-constant-i16.ll
index 737bcc7c864d..cf367ecbb98e 100644
--- a/test/CodeGen/X86/mul-constant-i16.ll
+++ b/test/CodeGen/X86/mul-constant-i16.ll
@@ -766,6 +766,50 @@ define i16 @test_mul_by_520(i16 %x) {
ret i16 %mul
}
+define i16 @test_mul_by_neg10(i16 %x) {
+; X86-LABEL: test_mul_by_neg10:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mul_by_neg10:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: addl %edi, %edi
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %mul = mul nsw i16 %x, -10
+ ret i16 %mul
+}
+
+define i16 @test_mul_by_neg36(i16 %x) {
+; X86-LABEL: test_mul_by_neg36:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: leal (%eax,%eax,8), %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mul_by_neg36:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: shll $2, %edi
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %mul = mul nsw i16 %x, -36
+ ret i16 %mul
+}
+
; (x*9+42)*(x*5+2)
define i16 @test_mul_spec(i16 %x) nounwind {
; X86-LABEL: test_mul_spec:
diff --git a/test/CodeGen/X86/mul-constant-i32.ll b/test/CodeGen/X86/mul-constant-i32.ll
index 356d5a00abf6..04f867bb4e16 100644
--- a/test/CodeGen/X86/mul-constant-i32.ll
+++ b/test/CodeGen/X86/mul-constant-i32.ll
@@ -1997,6 +1997,118 @@ define i32 @test_mul_by_520(i32 %x) {
ret i32 %mul
}
+define i32 @test_mul_by_neg10(i32 %x) {
+; X86-LABEL: test_mul_by_neg10:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_by_neg10:
+; X64-HSW: # %bb.0:
+; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: negl %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_neg10:
+; X64-JAG: # %bb.0:
+; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
+; X64-JAG-NEXT: negl %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_neg10:
+; X86-NOOPT: # %bb.0:
+; X86-NOOPT-NEXT: imull $-10, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_neg10:
+; HSW-NOOPT: # %bb.0:
+; HSW-NOOPT-NEXT: imull $-10, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_neg10:
+; JAG-NOOPT: # %bb.0:
+; JAG-NOOPT-NEXT: imull $-10, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_neg10:
+; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: negl %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_neg10:
+; SLM-NOOPT: # %bb.0:
+; SLM-NOOPT-NEXT: imull $-10, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i32 %x, -10
+ ret i32 %mul
+}
+
+define i32 @test_mul_by_neg36(i32 %x) {
+; X86-LABEL: test_mul_by_neg36:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: leal (%eax,%eax,8), %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_by_neg36:
+; X64-HSW: # %bb.0:
+; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: negl %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_neg36:
+; X64-JAG: # %bb.0:
+; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
+; X64-JAG-NEXT: negl %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_neg36:
+; X86-NOOPT: # %bb.0:
+; X86-NOOPT-NEXT: imull $-36, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_neg36:
+; HSW-NOOPT: # %bb.0:
+; HSW-NOOPT-NEXT: imull $-36, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_neg36:
+; JAG-NOOPT: # %bb.0:
+; JAG-NOOPT-NEXT: imull $-36, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_neg36:
+; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: negl %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_neg36:
+; SLM-NOOPT: # %bb.0:
+; SLM-NOOPT-NEXT: imull $-36, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i32 %x, -36
+ ret i32 %mul
+}
+
; (x*9+42)*(x*5+2)
define i32 @test_mul_spec(i32 %x) nounwind {
; X86-LABEL: test_mul_spec:
diff --git a/test/CodeGen/X86/mul-constant-i64.ll b/test/CodeGen/X86/mul-constant-i64.ll
index 332ad7f01299..761ca67ab31b 100644
--- a/test/CodeGen/X86/mul-constant-i64.ll
+++ b/test/CodeGen/X86/mul-constant-i64.ll
@@ -2107,6 +2107,144 @@ define i64 @test_mul_by_520(i64 %x) {
ret i64 %mul
}
+define i64 @test_mul_by_neg10(i64 %x) {
+; X86-LABEL: test_mul_by_neg10:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl $-10, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: leal (%esi,%esi,4), %ecx
+; X86-NEXT: addl %ecx, %ecx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_by_neg10:
+; X64-HSW: # %bb.0:
+; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: negq %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_neg10:
+; X64-JAG: # %bb.0:
+; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
+; X64-JAG-NEXT: negq %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_neg10:
+; X86-NOOPT: # %bb.0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: movl $-10, %edx
+; X86-NOOPT-NEXT: movl %ecx, %eax
+; X86-NOOPT-NEXT: mull %edx
+; X86-NOOPT-NEXT: subl %ecx, %edx
+; X86-NOOPT-NEXT: imull $-10, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_neg10:
+; HSW-NOOPT: # %bb.0:
+; HSW-NOOPT-NEXT: imulq $-10, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_neg10:
+; JAG-NOOPT: # %bb.0:
+; JAG-NOOPT-NEXT: imulq $-10, %rdi, %rax # sched: [6:4.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_neg10:
+; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: negq %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_neg10:
+; SLM-NOOPT: # %bb.0:
+; SLM-NOOPT-NEXT: imulq $-10, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i64 %x, -10
+ ret i64 %mul
+}
+
+define i64 @test_mul_by_neg36(i64 %x) {
+; X86-LABEL: test_mul_by_neg36:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl $-36, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: leal (%esi,%esi,8), %ecx
+; X86-NEXT: shll $2, %ecx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_by_neg36:
+; X64-HSW: # %bb.0:
+; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: negq %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_neg36:
+; X64-JAG: # %bb.0:
+; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00]
+; X64-JAG-NEXT: negq %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_neg36:
+; X86-NOOPT: # %bb.0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: movl $-36, %edx
+; X86-NOOPT-NEXT: movl %ecx, %eax
+; X86-NOOPT-NEXT: mull %edx
+; X86-NOOPT-NEXT: subl %ecx, %edx
+; X86-NOOPT-NEXT: imull $-36, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_neg36:
+; HSW-NOOPT: # %bb.0:
+; HSW-NOOPT-NEXT: imulq $-36, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_neg36:
+; JAG-NOOPT: # %bb.0:
+; JAG-NOOPT-NEXT: imulq $-36, %rdi, %rax # sched: [6:4.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_neg36:
+; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: negq %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_neg36:
+; SLM-NOOPT: # %bb.0:
+; SLM-NOOPT-NEXT: imulq $-36, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i64 %x, -36
+ ret i64 %mul
+}
+
; (x*9+42)*(x*5+2)
define i64 @test_mul_spec(i64 %x) nounwind {
; X86-LABEL: test_mul_spec:
diff --git a/test/CodeGen/X86/pku.ll b/test/CodeGen/X86/pku.ll
index 96ee97341749..6031bafb0972 100644
--- a/test/CodeGen/X86/pku.ll
+++ b/test/CodeGen/X86/pku.ll
@@ -26,17 +26,11 @@ define void @test_x86_wrpkru(i32 %src) {
}
define i32 @test_x86_rdpkru() {
-; X86-LABEL: test_x86_rdpkru:
-; X86: ## %bb.0:
-; X86-NEXT: xorl %ecx, %ecx ## encoding: [0x31,0xc9]
-; X86-NEXT: rdpkru ## encoding: [0x0f,0x01,0xee]
-; X86-NEXT: retl ## encoding: [0xc3]
-;
-; X64-LABEL: test_x86_rdpkru:
-; X64: ## %bb.0:
-; X64-NEXT: xorl %ecx, %ecx ## encoding: [0x31,0xc9]
-; X64-NEXT: rdpkru ## encoding: [0x0f,0x01,0xee]
-; X64-NEXT: retq ## encoding: [0xc3]
+; CHECK-LABEL: test_x86_rdpkru:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %ecx, %ecx ## encoding: [0x31,0xc9]
+; CHECK-NEXT: rdpkru ## encoding: [0x0f,0x01,0xee]
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call i32 @llvm.x86.rdpkru()
ret i32 %res
}
diff --git a/test/CodeGen/X86/pmaddubsw.ll b/test/CodeGen/X86/pmaddubsw.ll
new file mode 100644
index 000000000000..d44315af2c6b
--- /dev/null
+++ b/test/CodeGen/X86/pmaddubsw.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX256,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX256,AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX256,AVX512,AVX512BW
+
+; NOTE: We're testing with loads because ABI lowering creates a concat_vectors that extract_vector_elt creation can see through.
+; This would require the combine to recreate the concat_vectors.
+define <8 x i16> @pmaddubsw_128(<16 x i8>* %Aptr, <16 x i8>* %Bptr) {
+; SSE-LABEL: pmaddubsw_128:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa (%rsi), %xmm0
+; SSE-NEXT: pmaddubsw (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: pmaddubsw_128:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovdqa (%rsi), %xmm0
+; AVX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %A = load <16 x i8>, <16 x i8>* %Aptr
+ %B = load <16 x i8>, <16 x i8>* %Bptr
+ %A_even = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %A_odd = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %B_even = shufflevector <16 x i8> %B, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %B_odd = shufflevector <16 x i8> %B, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %A_even_ext = sext <8 x i8> %A_even to <8 x i32>
+ %B_even_ext = zext <8 x i8> %B_even to <8 x i32>
+ %A_odd_ext = sext <8 x i8> %A_odd to <8 x i32>
+ %B_odd_ext = zext <8 x i8> %B_odd to <8 x i32>
+ %even_mul = mul <8 x i32> %A_even_ext, %B_even_ext
+ %odd_mul = mul <8 x i32> %A_odd_ext, %B_odd_ext
+ %add = add <8 x i32> %even_mul, %odd_mul
+ %cmp_max = icmp sgt <8 x i32> %add, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %max = select <8 x i1> %cmp_max, <8 x i32> %add, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %cmp_min = icmp slt <8 x i32> %max, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %min = select <8 x i1> %cmp_min, <8 x i32> %max, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %trunc = trunc <8 x i32> %min to <8 x i16>
+ ret <8 x i16> %trunc
+}
+
+define <16 x i16> @pmaddubsw_256(<32 x i8>* %Aptr, <32 x i8>* %Bptr) {
+; SSE-LABEL: pmaddubsw_256:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa (%rsi), %xmm0
+; SSE-NEXT: movdqa 16(%rsi), %xmm1
+; SSE-NEXT: pmaddubsw (%rdi), %xmm0
+; SSE-NEXT: pmaddubsw 16(%rdi), %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: pmaddubsw_256:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqa (%rdi), %ymm0
+; AVX1-NEXT: vmovdqa (%rsi), %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpmaddubsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX256-LABEL: pmaddubsw_256:
+; AVX256: # %bb.0:
+; AVX256-NEXT: vmovdqa (%rsi), %ymm0
+; AVX256-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0
+; AVX256-NEXT: retq
+ %A = load <32 x i8>, <32 x i8>* %Aptr
+ %B = load <32 x i8>, <32 x i8>* %Bptr
+ %A_even = shufflevector <32 x i8> %A, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ %A_odd = shufflevector <32 x i8> %A, <32 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+ %B_even = shufflevector <32 x i8> %B, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ %B_odd = shufflevector <32 x i8> %B, <32 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+ %A_even_ext = sext <16 x i8> %A_even to <16 x i32>
+ %B_even_ext = zext <16 x i8> %B_even to <16 x i32>
+ %A_odd_ext = sext <16 x i8> %A_odd to <16 x i32>
+ %B_odd_ext = zext <16 x i8> %B_odd to <16 x i32>
+ %even_mul = mul <16 x i32> %A_even_ext, %B_even_ext
+ %odd_mul = mul <16 x i32> %A_odd_ext, %B_odd_ext
+ %add = add <16 x i32> %even_mul, %odd_mul
+ %cmp_max = icmp sgt <16 x i32> %add, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %max = select <16 x i1> %cmp_max, <16 x i32> %add, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %cmp_min = icmp slt <16 x i32> %max, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %min = select <16 x i1> %cmp_min, <16 x i32> %max, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %trunc = trunc <16 x i32> %min to <16 x i16>
+ ret <16 x i16> %trunc
+}
+
+define <64 x i16> @pmaddubsw_512(<128 x i8>* %Aptr, <128 x i8>* %Bptr) {
+; SSE-LABEL: pmaddubsw_512:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa 112(%rdx), %xmm0
+; SSE-NEXT: movdqa 96(%rdx), %xmm1
+; SSE-NEXT: movdqa 80(%rdx), %xmm2
+; SSE-NEXT: movdqa 64(%rdx), %xmm3
+; SSE-NEXT: movdqa (%rdx), %xmm4
+; SSE-NEXT: movdqa 16(%rdx), %xmm5
+; SSE-NEXT: movdqa 32(%rdx), %xmm6
+; SSE-NEXT: movdqa 48(%rdx), %xmm7
+; SSE-NEXT: pmaddubsw (%rsi), %xmm4
+; SSE-NEXT: pmaddubsw 16(%rsi), %xmm5
+; SSE-NEXT: pmaddubsw 32(%rsi), %xmm6
+; SSE-NEXT: pmaddubsw 48(%rsi), %xmm7
+; SSE-NEXT: pmaddubsw 64(%rsi), %xmm3
+; SSE-NEXT: pmaddubsw 80(%rsi), %xmm2
+; SSE-NEXT: pmaddubsw 96(%rsi), %xmm1
+; SSE-NEXT: pmaddubsw 112(%rsi), %xmm0
+; SSE-NEXT: movdqa %xmm0, 112(%rdi)
+; SSE-NEXT: movdqa %xmm1, 96(%rdi)
+; SSE-NEXT: movdqa %xmm2, 80(%rdi)
+; SSE-NEXT: movdqa %xmm3, 64(%rdi)
+; SSE-NEXT: movdqa %xmm7, 48(%rdi)
+; SSE-NEXT: movdqa %xmm6, 32(%rdi)
+; SSE-NEXT: movdqa %xmm5, 16(%rdi)
+; SSE-NEXT: movdqa %xmm4, (%rdi)
+; SSE-NEXT: movq %rdi, %rax
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: pmaddubsw_512:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqa (%rdi), %ymm0
+; AVX1-NEXT: vmovdqa 32(%rdi), %ymm1
+; AVX1-NEXT: vmovdqa 64(%rdi), %ymm2
+; AVX1-NEXT: vmovdqa 96(%rdi), %ymm8
+; AVX1-NEXT: vmovdqa (%rsi), %ymm4
+; AVX1-NEXT: vmovdqa 32(%rsi), %ymm5
+; AVX1-NEXT: vmovdqa 64(%rsi), %ymm6
+; AVX1-NEXT: vmovdqa 96(%rsi), %ymm9
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm7
+; AVX1-NEXT: vpmaddubsw %xmm3, %xmm7, %xmm3
+; AVX1-NEXT: vpmaddubsw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm4
+; AVX1-NEXT: vpmaddubsw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpmaddubsw %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm4
+; AVX1-NEXT: vpmaddubsw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpmaddubsw %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT: vextractf128 $1, %ymm8, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm4
+; AVX1-NEXT: vpmaddubsw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpmaddubsw %xmm8, %xmm9, %xmm4
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: pmaddubsw_512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovdqa (%rsi), %ymm0
+; AVX2-NEXT: vmovdqa 32(%rsi), %ymm1
+; AVX2-NEXT: vmovdqa 64(%rsi), %ymm2
+; AVX2-NEXT: vmovdqa 96(%rsi), %ymm3
+; AVX2-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vpmaddubsw 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT: vpmaddubsw 64(%rdi), %ymm2, %ymm2
+; AVX2-NEXT: vpmaddubsw 96(%rdi), %ymm3, %ymm3
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: pmaddubsw_512:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
+; AVX512F-NEXT: vmovdqa 32(%rsi), %ymm1
+; AVX512F-NEXT: vmovdqa 64(%rsi), %ymm2
+; AVX512F-NEXT: vmovdqa 96(%rsi), %ymm3
+; AVX512F-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0
+; AVX512F-NEXT: vpmaddubsw 32(%rdi), %ymm1, %ymm1
+; AVX512F-NEXT: vpmaddubsw 64(%rdi), %ymm2, %ymm2
+; AVX512F-NEXT: vpmaddubsw 96(%rdi), %ymm3, %ymm3
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: pmaddubsw_512:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0
+; AVX512BW-NEXT: vmovdqa64 64(%rsi), %zmm1
+; AVX512BW-NEXT: vpmaddubsw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: vpmaddubsw 64(%rdi), %zmm1, %zmm1
+; AVX512BW-NEXT: retq
+ %A = load <128 x i8>, <128 x i8>* %Aptr
+ %B = load <128 x i8>, <128 x i8>* %Bptr
+ %A_even = shufflevector <128 x i8> %A, <128 x i8> undef, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62, i32 64, i32 66, i32 68, i32 70, i32 72, i32 74, i32 76, i32 78, i32 80, i32 82, i32 84, i32 86, i32 88, i32 90, i32 92, i32 94, i32 96, i32 98, i32 100, i32 102, i32 104, i32 106, i32 108, i32 110, i32 112, i32 114, i32 116, i32 118, i32 120, i32 122, i32 124, i32 126>
+ %A_odd = shufflevector <128 x i8> %A, <128 x i8> undef, <64 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 33, i32 35, i32 37, i32 39, i32 41, i32 43, i32 45, i32 47, i32 49, i32 51, i32 53, i32 55, i32 57, i32 59, i32 61, i32 63, i32 65, i32 67, i32 69, i32 71, i32 73, i32 75, i32 77, i32 79, i32 81, i32 83, i32 85, i32 87, i32 89, i32 91, i32 93, i32 95, i32 97, i32 99, i32 101, i32 103, i32 105, i32 107, i32 109, i32 111, i32 113, i32 115, i32 117, i32 119, i32 121, i32 123, i32 125, i32 127>
+ %B_even = shufflevector <128 x i8> %B, <128 x i8> undef, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62, i32 64, i32 66, i32 68, i32 70, i32 72, i32 74, i32 76, i32 78, i32 80, i32 82, i32 84, i32 86, i32 88, i32 90, i32 92, i32 94, i32 96, i32 98, i32 100, i32 102, i32 104, i32 106, i32 108, i32 110, i32 112, i32 114, i32 116, i32 118, i32 120, i32 122, i32 124, i32 126>
+ %B_odd = shufflevector <128 x i8> %B, <128 x i8> undef, <64 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 33, i32 35, i32 37, i32 39, i32 41, i32 43, i32 45, i32 47, i32 49, i32 51, i32 53, i32 55, i32 57, i32 59, i32 61, i32 63, i32 65, i32 67, i32 69, i32 71, i32 73, i32 75, i32 77, i32 79, i32 81, i32 83, i32 85, i32 87, i32 89, i32 91, i32 93, i32 95, i32 97, i32 99, i32 101, i32 103, i32 105, i32 107, i32 109, i32 111, i32 113, i32 115, i32 117, i32 119, i32 121, i32 123, i32 125, i32 127>
+ %A_even_ext = sext <64 x i8> %A_even to <64 x i32>
+ %B_even_ext = zext <64 x i8> %B_even to <64 x i32>
+ %A_odd_ext = sext <64 x i8> %A_odd to <64 x i32>
+ %B_odd_ext = zext <64 x i8> %B_odd to <64 x i32>
+ %even_mul = mul <64 x i32> %A_even_ext, %B_even_ext
+ %odd_mul = mul <64 x i32> %A_odd_ext, %B_odd_ext
+ %add = add <64 x i32> %even_mul, %odd_mul
+ %cmp_max = icmp sgt <64 x i32> %add, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %max = select <64 x i1> %cmp_max, <64 x i32> %add, <64 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %cmp_min = icmp slt <64 x i32> %max, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %min = select <64 x i1> %cmp_min, <64 x i32> %max, <64 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %trunc = trunc <64 x i32> %min to <64 x i16>
+ ret <64 x i16> %trunc
+}
+
+define <8 x i16> @pmaddubsw_swapped_indices(<16 x i8>* %Aptr, <16 x i8>* %Bptr) {
+; SSE-LABEL: pmaddubsw_swapped_indices:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa (%rsi), %xmm0
+; SSE-NEXT: pmaddubsw (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: pmaddubsw_swapped_indices:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovdqa (%rsi), %xmm0
+; AVX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %A = load <16 x i8>, <16 x i8>* %Aptr
+ %B = load <16 x i8>, <16 x i8>* %Bptr
+ %A_even = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 1, i32 2, i32 5, i32 6, i32 9, i32 10, i32 13, i32 14> ;indices aren't all even
+ %A_odd = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 7, i32 8, i32 11, i32 12, i32 15> ;indices aren't all odd
+ %B_even = shufflevector <16 x i8> %B, <16 x i8> undef, <8 x i32> <i32 1, i32 2, i32 5, i32 6, i32 9, i32 10, i32 13, i32 14> ;same indices as A
+ %B_odd = shufflevector <16 x i8> %B, <16 x i8> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 7, i32 8, i32 11, i32 12, i32 15> ;same indices as A
+ %A_even_ext = sext <8 x i8> %A_even to <8 x i32>
+ %B_even_ext = zext <8 x i8> %B_even to <8 x i32>
+ %A_odd_ext = sext <8 x i8> %A_odd to <8 x i32>
+ %B_odd_ext = zext <8 x i8> %B_odd to <8 x i32>
+ %even_mul = mul <8 x i32> %A_even_ext, %B_even_ext
+ %odd_mul = mul <8 x i32> %A_odd_ext, %B_odd_ext
+ %add = add <8 x i32> %even_mul, %odd_mul
+ %cmp_max = icmp sgt <8 x i32> %add, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %max = select <8 x i1> %cmp_max, <8 x i32> %add, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %cmp_min = icmp slt <8 x i32> %max, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %min = select <8 x i1> %cmp_min, <8 x i32> %max, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %trunc = trunc <8 x i32> %min to <8 x i16>
+ ret <8 x i16> %trunc
+}
+
+define <8 x i16> @pmaddubsw_swapped_extend(<16 x i8>* %Aptr, <16 x i8>* %Bptr) {
+; SSE-LABEL: pmaddubsw_swapped_extend:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa (%rdi), %xmm0
+; SSE-NEXT: pmaddubsw (%rsi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: pmaddubsw_swapped_extend:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovdqa (%rdi), %xmm0
+; AVX-NEXT: vpmaddubsw (%rsi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %A = load <16 x i8>, <16 x i8>* %Aptr
+ %B = load <16 x i8>, <16 x i8>* %Bptr
+ %A_even = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %A_odd = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %B_even = shufflevector <16 x i8> %B, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %B_odd = shufflevector <16 x i8> %B, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %A_even_ext = zext <8 x i8> %A_even to <8 x i32>
+ %B_even_ext = sext <8 x i8> %B_even to <8 x i32>
+ %A_odd_ext = zext <8 x i8> %A_odd to <8 x i32>
+ %B_odd_ext = sext <8 x i8> %B_odd to <8 x i32>
+ %even_mul = mul <8 x i32> %A_even_ext, %B_even_ext
+ %odd_mul = mul <8 x i32> %A_odd_ext, %B_odd_ext
+ %add = add <8 x i32> %even_mul, %odd_mul
+ %cmp_max = icmp sgt <8 x i32> %add, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %max = select <8 x i1> %cmp_max, <8 x i32> %add, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %cmp_min = icmp slt <8 x i32> %max, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %min = select <8 x i1> %cmp_min, <8 x i32> %max, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %trunc = trunc <8 x i32> %min to <8 x i16>
+ ret <8 x i16> %trunc
+}
+
+define <8 x i16> @pmaddubsw_commuted_mul(<16 x i8>* %Aptr, <16 x i8>* %Bptr) {
+; SSE-LABEL: pmaddubsw_commuted_mul:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa (%rsi), %xmm0
+; SSE-NEXT: pmaddubsw (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: pmaddubsw_commuted_mul:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovdqa (%rsi), %xmm0
+; AVX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %A = load <16 x i8>, <16 x i8>* %Aptr
+ %B = load <16 x i8>, <16 x i8>* %Bptr
+ %A_even = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %A_odd = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %B_even = shufflevector <16 x i8> %B, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %B_odd = shufflevector <16 x i8> %B, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %A_even_ext = sext <8 x i8> %A_even to <8 x i32>
+ %B_even_ext = zext <8 x i8> %B_even to <8 x i32>
+ %A_odd_ext = sext <8 x i8> %A_odd to <8 x i32>
+ %B_odd_ext = zext <8 x i8> %B_odd to <8 x i32>
+ %even_mul = mul <8 x i32> %B_even_ext, %A_even_ext
+ %odd_mul = mul <8 x i32> %A_odd_ext, %B_odd_ext
+ %add = add <8 x i32> %even_mul, %odd_mul
+ %cmp_max = icmp sgt <8 x i32> %add, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %max = select <8 x i1> %cmp_max, <8 x i32> %add, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %cmp_min = icmp slt <8 x i32> %max, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %min = select <8 x i1> %cmp_min, <8 x i32> %max, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %trunc = trunc <8 x i32> %min to <8 x i16>
+ ret <8 x i16> %trunc
+}
+
+define <8 x i16> @pmaddubsw_bad_extend(<16 x i8>* %Aptr, <16 x i8>* %Bptr) {
+; SSE-LABEL: pmaddubsw_bad_extend:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa (%rdi), %xmm1
+; SSE-NEXT: movdqa (%rsi), %xmm0
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE-NEXT: pand %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm1, %xmm3
+; SSE-NEXT: psllw $8, %xmm3
+; SSE-NEXT: psraw $8, %xmm3
+; SSE-NEXT: movdqa %xmm3, %xmm4
+; SSE-NEXT: pmulhw %xmm2, %xmm4
+; SSE-NEXT: pmullw %xmm2, %xmm3
+; SSE-NEXT: movdqa %xmm3, %xmm2
+; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSE-NEXT: psraw $8, %xmm0
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm4
+; SSE-NEXT: pmulhw %xmm0, %xmm4
+; SSE-NEXT: pmullw %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; SSE-NEXT: paddd %xmm2, %xmm0
+; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSE-NEXT: paddd %xmm3, %xmm1
+; SSE-NEXT: packssdw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: pmaddubsw_bad_extend:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovdqa (%rsi), %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <8,10,12,14,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX1-NEXT: vpshufb %xmm4, %xmm0, %xmm5
+; AVX1-NEXT: vpmovsxbd %xmm5, %xmm5
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
+; AVX1-NEXT: vpmulld %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpshufb %xmm4, %xmm1, %xmm3
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; AVX1-NEXT: vpmulld %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = <9,11,13,15,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX1-NEXT: vpshufb %xmm4, %xmm0, %xmm5
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = <1,3,5,7,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX1-NEXT: vpshufb %xmm6, %xmm0, %xmm0
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT: vpshufb %xmm4, %xmm1, %xmm4
+; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT: vpmulld %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpshufb %xmm6, %xmm1, %xmm1
+; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
+; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpaddd %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: pmaddubsw_bad_extend:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovdqa (%rdi), %xmm0
+; AVX2-NEXT: vmovdqa (%rsi), %xmm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vpmovsxbd %xmm3, %ymm3
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm2
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero
+; AVX2-NEXT: vpmulld %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
+; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
+; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: pmaddubsw_bad_extend:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512-NEXT: vmovdqa (%rsi), %xmm1
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX512-NEXT: vpshufb %xmm2, %xmm0, %xmm3
+; AVX512-NEXT: vpmovsxbd %xmm3, %ymm3
+; AVX512-NEXT: vpshufb %xmm2, %xmm1, %xmm2
+; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero
+; AVX512-NEXT: vpmulld %ymm2, %ymm3, %ymm2
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
+; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX512-NEXT: vpmovsxbd %xmm1, %ymm1
+; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528]
+; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %A = load <16 x i8>, <16 x i8>* %Aptr
+ %B = load <16 x i8>, <16 x i8>* %Bptr
+ %A_even = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %A_odd = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %B_even = shufflevector <16 x i8> %B, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %B_odd = shufflevector <16 x i8> %B, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %A_even_ext = sext <8 x i8> %A_even to <8 x i32>
+ %B_even_ext = zext <8 x i8> %B_even to <8 x i32>
+ %A_odd_ext = zext <8 x i8> %A_odd to <8 x i32>
+ %B_odd_ext = sext <8 x i8> %B_odd to <8 x i32>
+ %even_mul = mul <8 x i32> %A_even_ext, %B_even_ext
+ %odd_mul = mul <8 x i32> %A_odd_ext, %B_odd_ext
+ %add = add <8 x i32> %even_mul, %odd_mul
+ %cmp_max = icmp sgt <8 x i32> %add, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %max = select <8 x i1> %cmp_max, <8 x i32> %add, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %cmp_min = icmp slt <8 x i32> %max, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %min = select <8 x i1> %cmp_min, <8 x i32> %max, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %trunc = trunc <8 x i32> %min to <8 x i16>
+ ret <8 x i16> %trunc
+}
+
+define <8 x i16> @pmaddubsw_bad_indices(<16 x i8>* %Aptr, <16 x i8>* %Bptr) {
+; SSE-LABEL: pmaddubsw_bad_indices:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa (%rdi), %xmm1
+; SSE-NEXT: movdqa (%rsi), %xmm0
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE-NEXT: pand %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm1, %xmm3
+; SSE-NEXT: pshufb {{.*#+}} xmm3 = xmm3[u,1,u,2,u,5,u,6,u,9,u,10,u,13,u,14]
+; SSE-NEXT: psraw $8, %xmm3
+; SSE-NEXT: movdqa %xmm3, %xmm4
+; SSE-NEXT: pmulhw %xmm2, %xmm4
+; SSE-NEXT: pmullw %xmm2, %xmm3
+; SSE-NEXT: movdqa %xmm3, %xmm2
+; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,0,u,3,u,4,u,7,u,8,u,11,u,12,u,15]
+; SSE-NEXT: psraw $8, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm4
+; SSE-NEXT: pmulhw %xmm0, %xmm4
+; SSE-NEXT: pmullw %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; SSE-NEXT: paddd %xmm2, %xmm0
+; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSE-NEXT: paddd %xmm3, %xmm1
+; SSE-NEXT: packssdw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: pmaddubsw_bad_indices:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovdqa (%rsi), %xmm1
+; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[9,10,13,14,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm0[1,2,5,6,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
+; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,10,12,14,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
+; AVX1-NEXT: vpmulld %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
+; AVX1-NEXT: vpmulld %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm0[8,11,12,15,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,3,4,7,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm1[9,11,13,15,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
+; AVX1-NEXT: vpmulld %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpaddd %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: pmaddubsw_bad_indices:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovdqa (%rdi), %xmm0
+; AVX2-NEXT: vmovdqa (%rsi), %xmm1
+; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[1,2,5,6,9,10,13,14,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpmovsxbd %xmm2, %ymm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
+; AVX2-NEXT: vpmulld %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,3,4,7,8,11,12,15,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
+; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: pmaddubsw_bad_indices:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512-NEXT: vmovdqa (%rsi), %xmm1
+; AVX512-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[1,2,5,6,9,10,13,14,u,u,u,u,u,u,u,u]
+; AVX512-NEXT: vpmovsxbd %xmm2, %ymm2
+; AVX512-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
+; AVX512-NEXT: vpmulld %ymm3, %ymm2, %ymm2
+; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,3,4,7,8,11,12,15,u,u,u,u,u,u,u,u]
+; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0
+; AVX512-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
+; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
+; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528]
+; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %A = load <16 x i8>, <16 x i8>* %Aptr
+ %B = load <16 x i8>, <16 x i8>* %Bptr
+ %A_even = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 1, i32 2, i32 5, i32 6, i32 9, i32 10, i32 13, i32 14> ;indices aren't all even
+ %A_odd = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 7, i32 8, i32 11, i32 12, i32 15> ;indices aren't all odd
+ %B_even = shufflevector <16 x i8> %B, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> ;different than A
+ %B_odd = shufflevector <16 x i8> %B, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> ;different than A
+ %A_even_ext = sext <8 x i8> %A_even to <8 x i32>
+ %B_even_ext = zext <8 x i8> %B_even to <8 x i32>
+ %A_odd_ext = sext <8 x i8> %A_odd to <8 x i32>
+ %B_odd_ext = zext <8 x i8> %B_odd to <8 x i32>
+ %even_mul = mul <8 x i32> %A_even_ext, %B_even_ext
+ %odd_mul = mul <8 x i32> %A_odd_ext, %B_odd_ext
+ %add = add <8 x i32> %even_mul, %odd_mul
+ %cmp_max = icmp sgt <8 x i32> %add, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %max = select <8 x i1> %cmp_max, <8 x i32> %add, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %cmp_min = icmp slt <8 x i32> %max, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %min = select <8 x i1> %cmp_min, <8 x i32> %max, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %trunc = trunc <8 x i32> %min to <8 x i16>
+ ret <8 x i16> %trunc
+}
diff --git a/test/CodeGen/X86/rem.ll b/test/CodeGen/X86/rem.ll
index 672baa5c1bdc..5f2cc199bcf4 100644
--- a/test/CodeGen/X86/rem.ll
+++ b/test/CodeGen/X86/rem.ll
@@ -15,8 +15,8 @@ define i32 @test1(i32 %X) {
; CHECK-NEXT: addl %eax, %edx
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: shll $8, %eax
-; CHECK-NEXT: subl %edx, %eax
-; CHECK-NEXT: subl %eax, %ecx
+; CHECK-NEXT: subl %eax, %edx
+; CHECK-NEXT: addl %edx, %ecx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: retl
%tmp1 = srem i32 %X, 255
@@ -48,8 +48,8 @@ define i32 @test3(i32 %X) {
; CHECK-NEXT: shrl $7, %edx
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: shll $8, %eax
-; CHECK-NEXT: subl %edx, %eax
-; CHECK-NEXT: subl %eax, %ecx
+; CHECK-NEXT: subl %eax, %edx
+; CHECK-NEXT: addl %edx, %ecx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: retl
%tmp1 = urem i32 %X, 255
diff --git a/test/CodeGen/X86/rotate-extract-vector.ll b/test/CodeGen/X86/rotate-extract-vector.ll
index 6059a76259ba..e2679dded8b5 100644
--- a/test/CodeGen/X86/rotate-extract-vector.ll
+++ b/test/CodeGen/X86/rotate-extract-vector.ll
@@ -12,10 +12,10 @@
define <4 x i32> @vroll_v4i32_extract_shl(<4 x i32> %i) {
; CHECK-LABEL: vroll_v4i32_extract_shl:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpslld $3, %xmm0, %xmm1
-; CHECK-NEXT: vpslld $10, %xmm0, %xmm0
-; CHECK-NEXT: vpsrld $25, %xmm1, %xmm1
-; CHECK-NEXT: vpor %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vpslld $3, %xmm0, %xmm0
+; CHECK-NEXT: vprold $7, %zmm0, %zmm0
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
%lhs_mul = shl <4 x i32> %i, <i32 3, i32 3, i32 3, i32 3>
%rhs_mul = shl <4 x i32> %i, <i32 10, i32 10, i32 10, i32 10>
@@ -25,20 +25,12 @@ define <4 x i32> @vroll_v4i32_extract_shl(<4 x i32> %i) {
}
define <4 x i64> @vrolq_v4i64_extract_shrl(<4 x i64> %i) nounwind {
-; X86-LABEL: vrolq_v4i64_extract_shrl:
-; X86: # %bb.0:
-; X86-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; X86-NEXT: vprolq $24, %zmm0, %zmm0
-; X86-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
-; X86-NEXT: retl
-;
-; X64-LABEL: vrolq_v4i64_extract_shrl:
-; X64: # %bb.0:
-; X64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; X64-NEXT: vprolq $24, %zmm0, %zmm0
-; X64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073189457919,18446744073189457919,18446744073189457919,18446744073189457919]
-; X64-NEXT: vpand %ymm1, %ymm0, %ymm0
-; X64-NEXT: retq
+; CHECK-LABEL: vrolq_v4i64_extract_shrl:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsrlq $5, %ymm0, %ymm0
+; CHECK-NEXT: vprolq $29, %zmm0, %zmm0
+; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; CHECK-NEXT: ret{{[l|q]}}
%lhs_div = lshr <4 x i64> %i, <i64 40, i64 40, i64 40, i64 40>
%rhs_div = lshr <4 x i64> %i, <i64 5, i64 5, i64 5, i64 5>
%rhs_shift = shl <4 x i64> %rhs_div, <i64 29, i64 29, i64 29, i64 29>
@@ -49,12 +41,10 @@ define <4 x i64> @vrolq_v4i64_extract_shrl(<4 x i64> %i) nounwind {
define <8 x i32> @vroll_extract_mul(<8 x i32> %i) nounwind {
; CHECK-LABEL: vroll_extract_mul:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm1 = [640,640,640,640,640,640,640,640]
-; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm1
-; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm2 = [10,10,10,10,10,10,10,10]
-; CHECK-NEXT: vpmulld %ymm2, %ymm0, %ymm0
-; CHECK-NEXT: vpsrld $26, %ymm0, %ymm0
-; CHECK-NEXT: vpor %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10]
+; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vprold $6, %zmm0, %zmm0
+; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; CHECK-NEXT: ret{{[l|q]}}
%lhs_mul = mul <8 x i32> %i, <i32 640, i32 640, i32 640, i32 640, i32 640, i32 640, i32 640, i32 640>
%rhs_mul = mul <8 x i32> %i, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
@@ -66,7 +56,7 @@ define <8 x i32> @vroll_extract_mul(<8 x i32> %i) nounwind {
define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind {
; X86-LABEL: vrolq_extract_udiv:
; X86: # %bb.0:
-; X86-NEXT: subl $60, %esp
+; X86-NEXT: subl $44, %esp
; X86-NEXT: vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X86-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; X86-NEXT: vmovss %xmm0, (%esp)
@@ -85,53 +75,27 @@ define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind {
; X86-NEXT: vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; X86-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
; X86-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
-; X86-NEXT: vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; X86-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: vmovss %xmm0, (%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $384, {{[0-9]+}}(%esp) # imm = 0x180
-; X86-NEXT: calll __udivdi3
-; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; X86-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: vextractps $2, %xmm0, (%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $384, {{[0-9]+}}(%esp) # imm = 0x180
-; X86-NEXT: vmovd %eax, %xmm0
-; X86-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
-; X86-NEXT: vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; X86-NEXT: calll __udivdi3
-; X86-NEXT: vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; X86-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
-; X86-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
-; X86-NEXT: vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
-; X86-NEXT: vpsllq $57, %xmm1, %xmm1
-; X86-NEXT: vpor %xmm0, %xmm1, %xmm0
-; X86-NEXT: addl $60, %esp
+; X86-NEXT: vprolq $57, %zmm0, %zmm0
+; X86-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; X86-NEXT: addl $44, %esp
+; X86-NEXT: vzeroupper
; X86-NEXT: retl
;
; X64-LABEL: vrolq_extract_udiv:
; X64: # %bb.0:
; X64-NEXT: vpextrq $1, %xmm0, %rax
-; X64-NEXT: movabsq $-6148914691236517205, %rsi # imm = 0xAAAAAAAAAAAAAAAB
-; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rdx, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: vmovq %rax, %xmm1
+; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
+; X64-NEXT: mulq %rcx
+; X64-NEXT: shrq %rdx
+; X64-NEXT: vmovq %rdx, %xmm1
; X64-NEXT: vmovq %xmm0, %rax
-; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rdx, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: vmovq %rax, %xmm0
+; X64-NEXT: mulq %rcx
+; X64-NEXT: shrq %rdx
+; X64-NEXT: vmovq %rdx, %xmm0
; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; X64-NEXT: shrq $8, %rcx
-; X64-NEXT: vmovq %rcx, %xmm1
-; X64-NEXT: shrq $8, %rdx
-; X64-NEXT: vmovq %rdx, %xmm2
-; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; X64-NEXT: vpsllq $57, %xmm0, %xmm0
-; X64-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-NEXT: vprolq $57, %zmm0, %zmm0
+; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; X64-NEXT: vzeroupper
; X64-NEXT: retq
%lhs_div = udiv <2 x i64> %i, <i64 3, i64 3>
%rhs_div = udiv <2 x i64> %i, <i64 384, i64 384>
@@ -141,17 +105,23 @@ define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind {
}
define <4 x i32> @vrolw_extract_mul_with_mask(<4 x i32> %i) nounwind {
-; CHECK-LABEL: vrolw_extract_mul_with_mask:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1152,1152,1152,1152]
-; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm1
-; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [9,9,9,9]
-; CHECK-NEXT: vpmulld %xmm2, %xmm0, %xmm0
-; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [160,160,160,160]
-; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpsrld $25, %xmm0, %xmm0
-; CHECK-NEXT: vpor %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: ret{{[l|q]}}
+; X86-LABEL: vrolw_extract_mul_with_mask:
+; X86: # %bb.0:
+; X86-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
+; X86-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; X86-NEXT: vprold $7, %zmm0, %zmm0
+; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+;
+; X64-LABEL: vrolw_extract_mul_with_mask:
+; X64: # %bb.0:
+; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
+; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; X64-NEXT: vprold $7, %zmm0, %zmm0
+; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
%lhs_mul = mul <4 x i32> %i, <i32 1152, i32 1152, i32 1152, i32 1152>
%rhs_mul = mul <4 x i32> %i, <i32 9, i32 9, i32 9, i32 9>
%lhs_and = and <4 x i32> %lhs_mul, <i32 160, i32 160, i32 160, i32 160>
diff --git a/test/CodeGen/X86/rotate-extract.ll b/test/CodeGen/X86/rotate-extract.ll
index 6ce3db13e954..a1babd1d3cc3 100644
--- a/test/CodeGen/X86/rotate-extract.ll
+++ b/test/CodeGen/X86/rotate-extract.ll
@@ -24,9 +24,7 @@ define i64 @rolq_extract_shl(i64 %i) nounwind {
; X64-LABEL: rolq_extract_shl:
; X64: # %bb.0:
; X64-NEXT: leaq (,%rdi,8), %rax
-; X64-NEXT: shlq $10, %rdi
-; X64-NEXT: shrq $57, %rax
-; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rolq $7, %rax
; X64-NEXT: retq
%lhs_mul = shl i64 %i, 3
%rhs_mul = shl i64 %i, 10
@@ -39,16 +37,17 @@ define i16 @rolw_extract_shrl(i16 %i) nounwind {
; X86-LABEL: rolw_extract_shrl:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: rolw $9, %ax
-; X86-NEXT: andl $61951, %eax # imm = 0xF1FF
+; X86-NEXT: shrl $3, %eax
+; X86-NEXT: rolw $12, %ax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: rolw_extract_shrl:
; X64: # %bb.0:
-; X64-NEXT: rolw $9, %di
-; X64-NEXT: andl $61951, %edi # imm = 0xF1FF
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movzwl %di, %eax
+; X64-NEXT: shrl $3, %eax
+; X64-NEXT: rolw $12, %ax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%lhs_div = lshr i16 %i, 7
%rhs_div = lshr i16 %i, 3
@@ -60,22 +59,16 @@ define i16 @rolw_extract_shrl(i16 %i) nounwind {
define i32 @roll_extract_mul(i32 %i) nounwind {
; X86-LABEL: roll_extract_mul:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: leal (%ecx,%ecx,8), %eax
-; X86-NEXT: shll $7, %ecx
-; X86-NEXT: leal (%ecx,%ecx,8), %ecx
-; X86-NEXT: shrl $25, %eax
-; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,8), %eax
+; X86-NEXT: roll $7, %eax
; X86-NEXT: retl
;
; X64-LABEL: roll_extract_mul:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (%rdi,%rdi,8), %eax
-; X64-NEXT: shll $7, %edi
-; X64-NEXT: leal (%rdi,%rdi,8), %ecx
-; X64-NEXT: shrl $25, %eax
-; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: roll $7, %eax
; X64-NEXT: retq
%lhs_mul = mul i32 %i, 9
%rhs_mul = mul i32 %i, 1152
@@ -89,11 +82,8 @@ define i8 @rolb_extract_udiv(i8 %i) nounwind {
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $171, %eax, %eax
-; X86-NEXT: movb %ah, %cl
-; X86-NEXT: shlb $3, %cl
-; X86-NEXT: andb $-16, %cl
-; X86-NEXT: shrl $13, %eax
-; X86-NEXT: orb %cl, %al
+; X86-NEXT: shrl $9, %eax
+; X86-NEXT: rolb $4, %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
@@ -101,12 +91,8 @@ define i8 @rolb_extract_udiv(i8 %i) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: imull $171, %eax, %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: shrl $8, %ecx
-; X64-NEXT: shlb $3, %cl
-; X64-NEXT: andb $-16, %cl
-; X64-NEXT: shrl $13, %eax
-; X64-NEXT: orb %cl, %al
+; X64-NEXT: shrl $9, %eax
+; X64-NEXT: rolb $4, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%lhs_div = udiv i8 %i, 3
@@ -139,12 +125,8 @@ define i64 @rolq_extract_mul_with_mask(i64 %i) nounwind {
; X64-LABEL: rolq_extract_mul_with_mask:
; X64: # %bb.0:
; X64-NEXT: leaq (%rdi,%rdi,8), %rax
-; X64-NEXT: # kill: def $edi killed $edi killed $rdi def $rdi
-; X64-NEXT: shll $7, %edi
-; X64-NEXT: leal (%rdi,%rdi,8), %ecx
-; X64-NEXT: movzbl %cl, %ecx
-; X64-NEXT: shrq $57, %rax
-; X64-NEXT: orq %rcx, %rax
+; X64-NEXT: rolq $7, %rax
+; X64-NEXT: movzbl %al, %eax
; X64-NEXT: retq
%lhs_mul = mul i64 %i, 1152
%rhs_mul = mul i64 %i, 9
diff --git a/test/CodeGen/X86/signbit-shift.ll b/test/CodeGen/X86/signbit-shift.ll
index cee647931bcb..1579a77a2e9b 100644
--- a/test/CodeGen/X86/signbit-shift.ll
+++ b/test/CodeGen/X86/signbit-shift.ll
@@ -156,9 +156,9 @@ define i32 @sext_ifneg(i32 %x) {
define i32 @add_sext_ifneg(i32 %x) {
; CHECK-LABEL: add_sext_ifneg:
; CHECK: # %bb.0:
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: subl %edi, %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal 42(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp slt i32 %x, 0
%e = sext i1 %c to i32
@@ -169,9 +169,9 @@ define i32 @add_sext_ifneg(i32 %x) {
define i32 @sel_ifneg_fval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifneg_fval_bigger:
; CHECK: # %bb.0:
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: subl %edi, %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal 42(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp slt i32 %x, 0
%r = select i1 %c, i32 41, i32 42
@@ -231,9 +231,10 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
define i32 @sub_lshr(i32 %x, i32 %y) {
; CHECK-LABEL: sub_lshr:
; CHECK: # %bb.0:
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: subl %edi, %esi
-; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal (%rdi,%rsi), %eax
; CHECK-NEXT: retq
%sh = lshr i32 %x, 31
%r = sub i32 %y, %sh
@@ -243,9 +244,8 @@ define i32 @sub_lshr(i32 %x, i32 %y) {
define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: sub_lshr_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: psrld $31, %xmm0
-; CHECK-NEXT: psubd %xmm0, %xmm1
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: psrad $31, %xmm0
+; CHECK-NEXT: paddd %xmm1, %xmm0
; CHECK-NEXT: retq
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%r = sub <4 x i32> %y, %sh
@@ -255,9 +255,9 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
define i32 @sub_const_op_lshr(i32 %x) {
; CHECK-LABEL: sub_const_op_lshr:
; CHECK: # %bb.0:
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: xorl $43, %edi
-; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal 43(%rdi), %eax
; CHECK-NEXT: retq
%sh = lshr i32 %x, 31
%r = sub i32 43, %sh
@@ -267,10 +267,8 @@ define i32 @sub_const_op_lshr(i32 %x) {
define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) {
; CHECK-LABEL: sub_const_op_lshr_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: psrld $31, %xmm0
-; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
-; CHECK-NEXT: psubd %xmm0, %xmm1
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: psrad $31, %xmm0
+; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh
diff --git a/test/CodeGen/X86/speculative-load-hardening.ll b/test/CodeGen/X86/speculative-load-hardening.ll
index 45b9c2f29807..55f7949c0da0 100644
--- a/test/CodeGen/X86/speculative-load-hardening.ll
+++ b/test/CodeGen/X86/speculative-load-hardening.ll
@@ -8,7 +8,7 @@ declare void @leak(i32 %v1, i32 %v2)
declare void @sink(i32)
-define i32 @test_trivial_entry_load(i32* %ptr) nounwind {
+define i32 @test_trivial_entry_load(i32* %ptr) {
; X64-LABEL: test_trivial_entry_load:
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rsp, %rcx
@@ -29,12 +29,18 @@ entry:
ret i32 %v
}
-define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr2, i32** %ptr3) nounwind {
+define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr2, i32** %ptr3) {
; X64-LABEL: test_basic_conditions:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %r15
+; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: pushq %r14
+; X64-NEXT: .cfi_def_cfa_offset 24
; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 32
+; X64-NEXT: .cfi_offset %rbx, -32
+; X64-NEXT: .cfi_offset %r14, -24
+; X64-NEXT: .cfi_offset %r15, -16
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: movq $-1, %rbx
; X64-NEXT: sarq $63, %rax
@@ -50,10 +56,14 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 24
; X64-NEXT: popq %r14
+; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: popq %r15
+; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
; X64-NEXT: .LBB1_4: # %then2
+; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: movq %r8, %r15
; X64-NEXT: cmovneq %rbx, %rax
; X64-NEXT: testl %edx, %edx
@@ -90,19 +100,21 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr
;
; X64-LFENCE-LABEL: test_basic_conditions:
; X64-LFENCE: # %bb.0: # %entry
+; X64-LFENCE-NEXT: pushq %r14
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16
+; X64-LFENCE-NEXT: pushq %rbx
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24
+; X64-LFENCE-NEXT: pushq %rax
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32
+; X64-LFENCE-NEXT: .cfi_offset %rbx, -24
+; X64-LFENCE-NEXT: .cfi_offset %r14, -16
; X64-LFENCE-NEXT: testl %edi, %edi
; X64-LFENCE-NEXT: jne .LBB1_6
; X64-LFENCE-NEXT: # %bb.1: # %then1
; X64-LFENCE-NEXT: lfence
; X64-LFENCE-NEXT: testl %esi, %esi
-; X64-LFENCE-NEXT: je .LBB1_2
-; X64-LFENCE-NEXT: .LBB1_6: # %exit
-; X64-LFENCE-NEXT: lfence
-; X64-LFENCE-NEXT: retq
-; X64-LFENCE-NEXT: .LBB1_2: # %then2
-; X64-LFENCE-NEXT: pushq %r14
-; X64-LFENCE-NEXT: pushq %rbx
-; X64-LFENCE-NEXT: pushq %rax
+; X64-LFENCE-NEXT: jne .LBB1_6
+; X64-LFENCE-NEXT: # %bb.2: # %then2
; X64-LFENCE-NEXT: movq %r8, %rbx
; X64-LFENCE-NEXT: lfence
; X64-LFENCE-NEXT: testl %edx, %edx
@@ -126,10 +138,14 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr
; X64-LFENCE-NEXT: .LBB1_5: # %merge
; X64-LFENCE-NEXT: movslq (%r14), %rax
; X64-LFENCE-NEXT: movl $0, (%rbx,%rax,4)
+; X64-LFENCE-NEXT: .LBB1_6: # %exit
+; X64-LFENCE-NEXT: lfence
; X64-LFENCE-NEXT: addq $8, %rsp
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24
; X64-LFENCE-NEXT: popq %rbx
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16
; X64-LFENCE-NEXT: popq %r14
-; X64-LFENCE-NEXT: lfence
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 8
; X64-LFENCE-NEXT: retq
entry:
%a.cmp = icmp eq i32 %a, 0
@@ -465,12 +481,18 @@ declare i8* @__cxa_allocate_exception(i64) local_unnamed_addr
declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr
-define void @test_basic_eh(i32 %a, i32* %ptr1, i32* %ptr2) nounwind personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @test_basic_eh(i32 %a, i32* %ptr1, i32* %ptr2) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
; X64-LABEL: test_basic_eh:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %rbp
+; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: pushq %r14
+; X64-NEXT: .cfi_def_cfa_offset 24
; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 32
+; X64-NEXT: .cfi_offset %rbx, -32
+; X64-NEXT: .cfi_offset %r14, -24
+; X64-NEXT: .cfi_offset %rbp, -16
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: movq $-1, %rcx
; X64-NEXT: sarq $63, %rax
@@ -507,10 +529,14 @@ define void @test_basic_eh(i32 %a, i32* %ptr1, i32* %ptr2) nounwind personality
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 24
; X64-NEXT: popq %r14
+; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: popq %rbp
+; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
; X64-NEXT: .LBB4_4: # %lpad
+; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: .Ltmp2:
; X64-NEXT: movq %rsp, %rcx
; X64-NEXT: sarq $63, %rcx
@@ -529,8 +555,14 @@ define void @test_basic_eh(i32 %a, i32* %ptr1, i32* %ptr2) nounwind personality
; X64-LFENCE-LABEL: test_basic_eh:
; X64-LFENCE: # %bb.0: # %entry
; X64-LFENCE-NEXT: pushq %rbp
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16
; X64-LFENCE-NEXT: pushq %r14
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24
; X64-LFENCE-NEXT: pushq %rbx
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32
+; X64-LFENCE-NEXT: .cfi_offset %rbx, -32
+; X64-LFENCE-NEXT: .cfi_offset %r14, -24
+; X64-LFENCE-NEXT: .cfi_offset %rbp, -16
; X64-LFENCE-NEXT: cmpl $41, %edi
; X64-LFENCE-NEXT: jg .LBB4_2
; X64-LFENCE-NEXT: # %bb.1: # %thrower
@@ -551,10 +583,14 @@ define void @test_basic_eh(i32 %a, i32* %ptr1, i32* %ptr2) nounwind personality
; X64-LFENCE-NEXT: .LBB4_2: # %exit
; X64-LFENCE-NEXT: lfence
; X64-LFENCE-NEXT: popq %rbx
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24
; X64-LFENCE-NEXT: popq %r14
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16
; X64-LFENCE-NEXT: popq %rbp
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 8
; X64-LFENCE-NEXT: retq
; X64-LFENCE-NEXT: .LBB4_3: # %lpad
+; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32
; X64-LFENCE-NEXT: .Ltmp2:
; X64-LFENCE-NEXT: movl (%rax), %eax
; X64-LFENCE-NEXT: addl (%rbx), %eax
diff --git a/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/test/CodeGen/X86/vector-idiv-sdiv-128.ll
index 2416a177228e..3f251dd8d62c 100644
--- a/test/CodeGen/X86/vector-idiv-sdiv-128.ll
+++ b/test/CodeGen/X86/vector-idiv-sdiv-128.ll
@@ -301,9 +301,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
; SSE2-NEXT: sarq %rdx
; SSE2-NEXT: addq %rax, %rdx
; SSE2-NEXT: leaq (,%rdx,8), %rax
-; SSE2-NEXT: subq %rdx, %rax
-; SSE2-NEXT: subq %rax, %rcx
-; SSE2-NEXT: movq %rcx, %xmm1
+; SSE2-NEXT: subq %rax, %rdx
+; SSE2-NEXT: addq %rcx, %rdx
+; SSE2-NEXT: movq %rdx, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: movq %xmm0, %rcx
; SSE2-NEXT: movq %rcx, %rax
@@ -313,9 +313,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
; SSE2-NEXT: sarq %rdx
; SSE2-NEXT: addq %rax, %rdx
; SSE2-NEXT: leaq (,%rdx,8), %rax
-; SSE2-NEXT: subq %rdx, %rax
-; SSE2-NEXT: subq %rax, %rcx
-; SSE2-NEXT: movq %rcx, %xmm0
+; SSE2-NEXT: subq %rax, %rdx
+; SSE2-NEXT: addq %rcx, %rdx
+; SSE2-NEXT: movq %rdx, %xmm0
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
@@ -331,9 +331,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
; SSE41-NEXT: sarq %rdx
; SSE41-NEXT: addq %rax, %rdx
; SSE41-NEXT: leaq (,%rdx,8), %rax
-; SSE41-NEXT: subq %rdx, %rax
-; SSE41-NEXT: subq %rax, %rcx
-; SSE41-NEXT: movq %rcx, %xmm1
+; SSE41-NEXT: subq %rax, %rdx
+; SSE41-NEXT: addq %rcx, %rdx
+; SSE41-NEXT: movq %rdx, %xmm1
; SSE41-NEXT: movq %xmm0, %rcx
; SSE41-NEXT: movq %rcx, %rax
; SSE41-NEXT: imulq %rsi
@@ -342,9 +342,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
; SSE41-NEXT: sarq %rdx
; SSE41-NEXT: addq %rax, %rdx
; SSE41-NEXT: leaq (,%rdx,8), %rax
-; SSE41-NEXT: subq %rdx, %rax
-; SSE41-NEXT: subq %rax, %rcx
-; SSE41-NEXT: movq %rcx, %xmm0
+; SSE41-NEXT: subq %rax, %rdx
+; SSE41-NEXT: addq %rcx, %rdx
+; SSE41-NEXT: movq %rdx, %xmm0
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE41-NEXT: retq
;
@@ -359,9 +359,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
; AVX-NEXT: sarq %rdx
; AVX-NEXT: addq %rax, %rdx
; AVX-NEXT: leaq (,%rdx,8), %rax
-; AVX-NEXT: subq %rdx, %rax
-; AVX-NEXT: subq %rax, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm1
+; AVX-NEXT: subq %rax, %rdx
+; AVX-NEXT: addq %rcx, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm1
; AVX-NEXT: vmovq %xmm0, %rcx
; AVX-NEXT: movq %rcx, %rax
; AVX-NEXT: imulq %rsi
@@ -370,9 +370,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
; AVX-NEXT: sarq %rdx
; AVX-NEXT: addq %rax, %rdx
; AVX-NEXT: leaq (,%rdx,8), %rax
-; AVX-NEXT: subq %rdx, %rax
-; AVX-NEXT: subq %rax, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: subq %rax, %rdx
+; AVX-NEXT: addq %rcx, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%res = srem <2 x i64> %a, <i64 7, i64 7>
diff --git a/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/test/CodeGen/X86/vector-idiv-sdiv-256.ll
index c112e84fbf73..5df4d09e9715 100644
--- a/test/CodeGen/X86/vector-idiv-sdiv-256.ll
+++ b/test/CodeGen/X86/vector-idiv-sdiv-256.ll
@@ -263,9 +263,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: sarq %rdx
; AVX1-NEXT: addq %rax, %rdx
; AVX1-NEXT: leaq (,%rdx,8), %rax
-; AVX1-NEXT: subq %rdx, %rax
-; AVX1-NEXT: subq %rax, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: subq %rax, %rdx
+; AVX1-NEXT: addq %rcx, %rdx
+; AVX1-NEXT: vmovq %rdx, %xmm2
; AVX1-NEXT: vmovq %xmm1, %rcx
; AVX1-NEXT: movq %rcx, %rax
; AVX1-NEXT: imulq %rsi
@@ -274,9 +274,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: sarq %rdx
; AVX1-NEXT: addq %rax, %rdx
; AVX1-NEXT: leaq (,%rdx,8), %rax
-; AVX1-NEXT: subq %rdx, %rax
-; AVX1-NEXT: subq %rax, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm1
+; AVX1-NEXT: subq %rax, %rdx
+; AVX1-NEXT: addq %rcx, %rdx
+; AVX1-NEXT: vmovq %rdx, %xmm1
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
; AVX1-NEXT: movq %rcx, %rax
@@ -286,9 +286,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: sarq %rdx
; AVX1-NEXT: addq %rax, %rdx
; AVX1-NEXT: leaq (,%rdx,8), %rax
-; AVX1-NEXT: subq %rdx, %rax
-; AVX1-NEXT: subq %rax, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: subq %rax, %rdx
+; AVX1-NEXT: addq %rcx, %rdx
+; AVX1-NEXT: vmovq %rdx, %xmm2
; AVX1-NEXT: vmovq %xmm0, %rcx
; AVX1-NEXT: movq %rcx, %rax
; AVX1-NEXT: imulq %rsi
@@ -297,9 +297,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: sarq %rdx
; AVX1-NEXT: addq %rax, %rdx
; AVX1-NEXT: leaq (,%rdx,8), %rax
-; AVX1-NEXT: subq %rdx, %rax
-; AVX1-NEXT: subq %rax, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: subq %rax, %rdx
+; AVX1-NEXT: addq %rcx, %rdx
+; AVX1-NEXT: vmovq %rdx, %xmm0
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -316,9 +316,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX2-NEXT: sarq %rdx
; AVX2-NEXT: addq %rax, %rdx
; AVX2-NEXT: leaq (,%rdx,8), %rax
-; AVX2-NEXT: subq %rdx, %rax
-; AVX2-NEXT: subq %rax, %rcx
-; AVX2-NEXT: vmovq %rcx, %xmm2
+; AVX2-NEXT: subq %rax, %rdx
+; AVX2-NEXT: addq %rcx, %rdx
+; AVX2-NEXT: vmovq %rdx, %xmm2
; AVX2-NEXT: vmovq %xmm1, %rcx
; AVX2-NEXT: movq %rcx, %rax
; AVX2-NEXT: imulq %rsi
@@ -327,9 +327,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX2-NEXT: sarq %rdx
; AVX2-NEXT: addq %rax, %rdx
; AVX2-NEXT: leaq (,%rdx,8), %rax
-; AVX2-NEXT: subq %rdx, %rax
-; AVX2-NEXT: subq %rax, %rcx
-; AVX2-NEXT: vmovq %rcx, %xmm1
+; AVX2-NEXT: subq %rax, %rdx
+; AVX2-NEXT: addq %rcx, %rdx
+; AVX2-NEXT: vmovq %rdx, %xmm1
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
; AVX2-NEXT: movq %rcx, %rax
@@ -339,9 +339,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX2-NEXT: sarq %rdx
; AVX2-NEXT: addq %rax, %rdx
; AVX2-NEXT: leaq (,%rdx,8), %rax
-; AVX2-NEXT: subq %rdx, %rax
-; AVX2-NEXT: subq %rax, %rcx
-; AVX2-NEXT: vmovq %rcx, %xmm2
+; AVX2-NEXT: subq %rax, %rdx
+; AVX2-NEXT: addq %rcx, %rdx
+; AVX2-NEXT: vmovq %rdx, %xmm2
; AVX2-NEXT: vmovq %xmm0, %rcx
; AVX2-NEXT: movq %rcx, %rax
; AVX2-NEXT: imulq %rsi
@@ -350,9 +350,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX2-NEXT: sarq %rdx
; AVX2-NEXT: addq %rax, %rdx
; AVX2-NEXT: leaq (,%rdx,8), %rax
-; AVX2-NEXT: subq %rdx, %rax
-; AVX2-NEXT: subq %rax, %rcx
-; AVX2-NEXT: vmovq %rcx, %xmm0
+; AVX2-NEXT: subq %rax, %rdx
+; AVX2-NEXT: addq %rcx, %rdx
+; AVX2-NEXT: vmovq %rdx, %xmm0
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
diff --git a/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/test/CodeGen/X86/vector-idiv-sdiv-512.ll
index 310e1fc7057a..893c7d1bbd7b 100644
--- a/test/CodeGen/X86/vector-idiv-sdiv-512.ll
+++ b/test/CodeGen/X86/vector-idiv-sdiv-512.ll
@@ -214,9 +214,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: sarq %rdx
; AVX-NEXT: addq %rax, %rdx
; AVX-NEXT: leaq (,%rdx,8), %rax
-; AVX-NEXT: subq %rdx, %rax
-; AVX-NEXT: subq %rax, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm2
+; AVX-NEXT: subq %rax, %rdx
+; AVX-NEXT: addq %rcx, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm2
; AVX-NEXT: vmovq %xmm1, %rcx
; AVX-NEXT: movq %rcx, %rax
; AVX-NEXT: imulq %rsi
@@ -225,9 +225,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: sarq %rdx
; AVX-NEXT: addq %rax, %rdx
; AVX-NEXT: leaq (,%rdx,8), %rax
-; AVX-NEXT: subq %rdx, %rax
-; AVX-NEXT: subq %rax, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm1
+; AVX-NEXT: subq %rax, %rdx
+; AVX-NEXT: addq %rcx, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm1
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; AVX-NEXT: vpextrq $1, %xmm2, %rcx
@@ -238,9 +238,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: sarq %rdx
; AVX-NEXT: addq %rax, %rdx
; AVX-NEXT: leaq (,%rdx,8), %rax
-; AVX-NEXT: subq %rdx, %rax
-; AVX-NEXT: subq %rax, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm3
+; AVX-NEXT: subq %rax, %rdx
+; AVX-NEXT: addq %rcx, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm3
; AVX-NEXT: vmovq %xmm2, %rcx
; AVX-NEXT: movq %rcx, %rax
; AVX-NEXT: imulq %rsi
@@ -249,9 +249,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: sarq %rdx
; AVX-NEXT: addq %rax, %rdx
; AVX-NEXT: leaq (,%rdx,8), %rax
-; AVX-NEXT: subq %rdx, %rax
-; AVX-NEXT: subq %rax, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm2
+; AVX-NEXT: subq %rax, %rdx
+; AVX-NEXT: addq %rcx, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm2
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; AVX-NEXT: vextracti128 $1, %ymm0, %xmm2
@@ -263,9 +263,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: sarq %rdx
; AVX-NEXT: addq %rax, %rdx
; AVX-NEXT: leaq (,%rdx,8), %rax
-; AVX-NEXT: subq %rdx, %rax
-; AVX-NEXT: subq %rax, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm3
+; AVX-NEXT: subq %rax, %rdx
+; AVX-NEXT: addq %rcx, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm3
; AVX-NEXT: vmovq %xmm2, %rcx
; AVX-NEXT: movq %rcx, %rax
; AVX-NEXT: imulq %rsi
@@ -274,9 +274,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: sarq %rdx
; AVX-NEXT: addq %rax, %rdx
; AVX-NEXT: leaq (,%rdx,8), %rax
-; AVX-NEXT: subq %rdx, %rax
-; AVX-NEXT: subq %rax, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm2
+; AVX-NEXT: subq %rax, %rdx
+; AVX-NEXT: addq %rcx, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm2
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX-NEXT: vpextrq $1, %xmm0, %rcx
; AVX-NEXT: movq %rcx, %rax
@@ -286,9 +286,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: sarq %rdx
; AVX-NEXT: addq %rax, %rdx
; AVX-NEXT: leaq (,%rdx,8), %rax
-; AVX-NEXT: subq %rdx, %rax
-; AVX-NEXT: subq %rax, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm3
+; AVX-NEXT: subq %rax, %rdx
+; AVX-NEXT: addq %rcx, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm3
; AVX-NEXT: vmovq %xmm0, %rcx
; AVX-NEXT: movq %rcx, %rax
; AVX-NEXT: imulq %rsi
@@ -297,9 +297,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: sarq %rdx
; AVX-NEXT: addq %rax, %rdx
; AVX-NEXT: leaq (,%rdx,8), %rax
-; AVX-NEXT: subq %rdx, %rax
-; AVX-NEXT: subq %rax, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: subq %rax, %rdx
+; AVX-NEXT: addq %rcx, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
diff --git a/test/CodeGen/X86/vector-idiv-udiv-128.ll b/test/CodeGen/X86/vector-idiv-udiv-128.ll
index c991a905c054..598782ddd639 100644
--- a/test/CodeGen/X86/vector-idiv-udiv-128.ll
+++ b/test/CodeGen/X86/vector-idiv-udiv-128.ll
@@ -278,9 +278,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
; SSE2-NEXT: addq %rdx, %rax
; SSE2-NEXT: shrq $2, %rax
; SSE2-NEXT: leaq (,%rax,8), %rdx
-; SSE2-NEXT: subq %rax, %rdx
-; SSE2-NEXT: subq %rdx, %rcx
-; SSE2-NEXT: movq %rcx, %xmm1
+; SSE2-NEXT: subq %rdx, %rax
+; SSE2-NEXT: addq %rcx, %rax
+; SSE2-NEXT: movq %rax, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: movq %xmm0, %rcx
; SSE2-NEXT: movq %rcx, %rax
@@ -291,9 +291,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
; SSE2-NEXT: addq %rdx, %rax
; SSE2-NEXT: shrq $2, %rax
; SSE2-NEXT: leaq (,%rax,8), %rdx
-; SSE2-NEXT: subq %rax, %rdx
-; SSE2-NEXT: subq %rdx, %rcx
-; SSE2-NEXT: movq %rcx, %xmm0
+; SSE2-NEXT: subq %rdx, %rax
+; SSE2-NEXT: addq %rcx, %rax
+; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
@@ -310,9 +310,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
; SSE41-NEXT: addq %rdx, %rax
; SSE41-NEXT: shrq $2, %rax
; SSE41-NEXT: leaq (,%rax,8), %rdx
-; SSE41-NEXT: subq %rax, %rdx
-; SSE41-NEXT: subq %rdx, %rcx
-; SSE41-NEXT: movq %rcx, %xmm1
+; SSE41-NEXT: subq %rdx, %rax
+; SSE41-NEXT: addq %rcx, %rax
+; SSE41-NEXT: movq %rax, %xmm1
; SSE41-NEXT: movq %xmm0, %rcx
; SSE41-NEXT: movq %rcx, %rax
; SSE41-NEXT: mulq %rsi
@@ -322,9 +322,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
; SSE41-NEXT: addq %rdx, %rax
; SSE41-NEXT: shrq $2, %rax
; SSE41-NEXT: leaq (,%rax,8), %rdx
-; SSE41-NEXT: subq %rax, %rdx
-; SSE41-NEXT: subq %rdx, %rcx
-; SSE41-NEXT: movq %rcx, %xmm0
+; SSE41-NEXT: subq %rdx, %rax
+; SSE41-NEXT: addq %rcx, %rax
+; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE41-NEXT: retq
;
@@ -340,9 +340,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
; AVX-NEXT: addq %rdx, %rax
; AVX-NEXT: shrq $2, %rax
; AVX-NEXT: leaq (,%rax,8), %rdx
-; AVX-NEXT: subq %rax, %rdx
-; AVX-NEXT: subq %rdx, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm1
+; AVX-NEXT: subq %rdx, %rax
+; AVX-NEXT: addq %rcx, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
; AVX-NEXT: vmovq %xmm0, %rcx
; AVX-NEXT: movq %rcx, %rax
; AVX-NEXT: mulq %rsi
@@ -352,9 +352,9 @@ define <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind {
; AVX-NEXT: addq %rdx, %rax
; AVX-NEXT: shrq $2, %rax
; AVX-NEXT: leaq (,%rax,8), %rdx
-; AVX-NEXT: subq %rax, %rdx
-; AVX-NEXT: subq %rdx, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: subq %rdx, %rax
+; AVX-NEXT: addq %rcx, %rax
+; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%res = urem <2 x i64> %a, <i64 7, i64 7>
diff --git a/test/CodeGen/X86/vector-idiv-udiv-256.ll b/test/CodeGen/X86/vector-idiv-udiv-256.ll
index 81d93984e261..377ff5ea77af 100644
--- a/test/CodeGen/X86/vector-idiv-udiv-256.ll
+++ b/test/CodeGen/X86/vector-idiv-udiv-256.ll
@@ -264,9 +264,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: addq %rdx, %rax
; AVX1-NEXT: shrq $2, %rax
; AVX1-NEXT: leaq (,%rax,8), %rdx
-; AVX1-NEXT: subq %rax, %rdx
-; AVX1-NEXT: subq %rdx, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: subq %rdx, %rax
+; AVX1-NEXT: addq %rcx, %rax
+; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vmovq %xmm1, %rcx
; AVX1-NEXT: movq %rcx, %rax
; AVX1-NEXT: mulq %rsi
@@ -276,9 +276,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: addq %rdx, %rax
; AVX1-NEXT: shrq $2, %rax
; AVX1-NEXT: leaq (,%rax,8), %rdx
-; AVX1-NEXT: subq %rax, %rdx
-; AVX1-NEXT: subq %rdx, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm1
+; AVX1-NEXT: subq %rdx, %rax
+; AVX1-NEXT: addq %rcx, %rax
+; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
; AVX1-NEXT: movq %rcx, %rax
@@ -289,9 +289,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: addq %rdx, %rax
; AVX1-NEXT: shrq $2, %rax
; AVX1-NEXT: leaq (,%rax,8), %rdx
-; AVX1-NEXT: subq %rax, %rdx
-; AVX1-NEXT: subq %rdx, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: subq %rdx, %rax
+; AVX1-NEXT: addq %rcx, %rax
+; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vmovq %xmm0, %rcx
; AVX1-NEXT: movq %rcx, %rax
; AVX1-NEXT: mulq %rsi
@@ -301,9 +301,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: addq %rdx, %rax
; AVX1-NEXT: shrq $2, %rax
; AVX1-NEXT: leaq (,%rax,8), %rdx
-; AVX1-NEXT: subq %rax, %rdx
-; AVX1-NEXT: subq %rdx, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: subq %rdx, %rax
+; AVX1-NEXT: addq %rcx, %rax
+; AVX1-NEXT: vmovq %rax, %xmm0
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -321,9 +321,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX2-NEXT: addq %rdx, %rax
; AVX2-NEXT: shrq $2, %rax
; AVX2-NEXT: leaq (,%rax,8), %rdx
-; AVX2-NEXT: subq %rax, %rdx
-; AVX2-NEXT: subq %rdx, %rcx
-; AVX2-NEXT: vmovq %rcx, %xmm2
+; AVX2-NEXT: subq %rdx, %rax
+; AVX2-NEXT: addq %rcx, %rax
+; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vmovq %xmm1, %rcx
; AVX2-NEXT: movq %rcx, %rax
; AVX2-NEXT: mulq %rsi
@@ -333,9 +333,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX2-NEXT: addq %rdx, %rax
; AVX2-NEXT: shrq $2, %rax
; AVX2-NEXT: leaq (,%rax,8), %rdx
-; AVX2-NEXT: subq %rax, %rdx
-; AVX2-NEXT: subq %rdx, %rcx
-; AVX2-NEXT: vmovq %rcx, %xmm1
+; AVX2-NEXT: subq %rdx, %rax
+; AVX2-NEXT: addq %rcx, %rax
+; AVX2-NEXT: vmovq %rax, %xmm1
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
; AVX2-NEXT: movq %rcx, %rax
@@ -346,9 +346,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX2-NEXT: addq %rdx, %rax
; AVX2-NEXT: shrq $2, %rax
; AVX2-NEXT: leaq (,%rax,8), %rdx
-; AVX2-NEXT: subq %rax, %rdx
-; AVX2-NEXT: subq %rdx, %rcx
-; AVX2-NEXT: vmovq %rcx, %xmm2
+; AVX2-NEXT: subq %rdx, %rax
+; AVX2-NEXT: addq %rcx, %rax
+; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vmovq %xmm0, %rcx
; AVX2-NEXT: movq %rcx, %rax
; AVX2-NEXT: mulq %rsi
@@ -358,9 +358,9 @@ define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
; AVX2-NEXT: addq %rdx, %rax
; AVX2-NEXT: shrq $2, %rax
; AVX2-NEXT: leaq (,%rax,8), %rdx
-; AVX2-NEXT: subq %rax, %rdx
-; AVX2-NEXT: subq %rdx, %rcx
-; AVX2-NEXT: vmovq %rcx, %xmm0
+; AVX2-NEXT: subq %rdx, %rax
+; AVX2-NEXT: addq %rcx, %rax
+; AVX2-NEXT: vmovq %rax, %xmm0
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
diff --git a/test/CodeGen/X86/vector-idiv-udiv-512.ll b/test/CodeGen/X86/vector-idiv-udiv-512.ll
index 1288f5a5d5be..22c359cb7e98 100644
--- a/test/CodeGen/X86/vector-idiv-udiv-512.ll
+++ b/test/CodeGen/X86/vector-idiv-udiv-512.ll
@@ -218,9 +218,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: addq %rdx, %rax
; AVX-NEXT: shrq $2, %rax
; AVX-NEXT: leaq (,%rax,8), %rdx
-; AVX-NEXT: subq %rax, %rdx
-; AVX-NEXT: subq %rdx, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm2
+; AVX-NEXT: subq %rdx, %rax
+; AVX-NEXT: addq %rcx, %rax
+; AVX-NEXT: vmovq %rax, %xmm2
; AVX-NEXT: vmovq %xmm1, %rcx
; AVX-NEXT: movq %rcx, %rax
; AVX-NEXT: mulq %rsi
@@ -230,9 +230,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: addq %rdx, %rax
; AVX-NEXT: shrq $2, %rax
; AVX-NEXT: leaq (,%rax,8), %rdx
-; AVX-NEXT: subq %rax, %rdx
-; AVX-NEXT: subq %rdx, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm1
+; AVX-NEXT: subq %rdx, %rax
+; AVX-NEXT: addq %rcx, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; AVX-NEXT: vpextrq $1, %xmm2, %rcx
@@ -244,9 +244,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: addq %rdx, %rax
; AVX-NEXT: shrq $2, %rax
; AVX-NEXT: leaq (,%rax,8), %rdx
-; AVX-NEXT: subq %rax, %rdx
-; AVX-NEXT: subq %rdx, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm3
+; AVX-NEXT: subq %rdx, %rax
+; AVX-NEXT: addq %rcx, %rax
+; AVX-NEXT: vmovq %rax, %xmm3
; AVX-NEXT: vmovq %xmm2, %rcx
; AVX-NEXT: movq %rcx, %rax
; AVX-NEXT: mulq %rsi
@@ -256,9 +256,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: addq %rdx, %rax
; AVX-NEXT: shrq $2, %rax
; AVX-NEXT: leaq (,%rax,8), %rdx
-; AVX-NEXT: subq %rax, %rdx
-; AVX-NEXT: subq %rdx, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm2
+; AVX-NEXT: subq %rdx, %rax
+; AVX-NEXT: addq %rcx, %rax
+; AVX-NEXT: vmovq %rax, %xmm2
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; AVX-NEXT: vextracti128 $1, %ymm0, %xmm2
@@ -271,9 +271,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: addq %rdx, %rax
; AVX-NEXT: shrq $2, %rax
; AVX-NEXT: leaq (,%rax,8), %rdx
-; AVX-NEXT: subq %rax, %rdx
-; AVX-NEXT: subq %rdx, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm3
+; AVX-NEXT: subq %rdx, %rax
+; AVX-NEXT: addq %rcx, %rax
+; AVX-NEXT: vmovq %rax, %xmm3
; AVX-NEXT: vmovq %xmm2, %rcx
; AVX-NEXT: movq %rcx, %rax
; AVX-NEXT: mulq %rsi
@@ -283,9 +283,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: addq %rdx, %rax
; AVX-NEXT: shrq $2, %rax
; AVX-NEXT: leaq (,%rax,8), %rdx
-; AVX-NEXT: subq %rax, %rdx
-; AVX-NEXT: subq %rdx, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm2
+; AVX-NEXT: subq %rdx, %rax
+; AVX-NEXT: addq %rcx, %rax
+; AVX-NEXT: vmovq %rax, %xmm2
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX-NEXT: vpextrq $1, %xmm0, %rcx
; AVX-NEXT: movq %rcx, %rax
@@ -296,9 +296,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: addq %rdx, %rax
; AVX-NEXT: shrq $2, %rax
; AVX-NEXT: leaq (,%rax,8), %rdx
-; AVX-NEXT: subq %rax, %rdx
-; AVX-NEXT: subq %rdx, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm3
+; AVX-NEXT: subq %rdx, %rax
+; AVX-NEXT: addq %rcx, %rax
+; AVX-NEXT: vmovq %rax, %xmm3
; AVX-NEXT: vmovq %xmm0, %rcx
; AVX-NEXT: movq %rcx, %rax
; AVX-NEXT: mulq %rsi
@@ -308,9 +308,9 @@ define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
; AVX-NEXT: addq %rdx, %rax
; AVX-NEXT: shrq $2, %rax
; AVX-NEXT: leaq (,%rax,8), %rdx
-; AVX-NEXT: subq %rax, %rdx
-; AVX-NEXT: subq %rdx, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: subq %rdx, %rax
+; AVX-NEXT: addq %rcx, %rax
+; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
diff --git a/test/CodeGen/X86/vector-shift-lshr-128.ll b/test/CodeGen/X86/vector-shift-lshr-128.ll
index 90a0c6f291b2..b50680ff56ee 100644
--- a/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -1008,36 +1008,16 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
;
; SSE41-LABEL: constant_shift_v8i16:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $4, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; SSE41-NEXT: movdqa %xmm1, %xmm2
-; SSE41-NEXT: psrlw $2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
-; SSE41-NEXT: movdqa %xmm2, %xmm0
-; SSE41-NEXT: psrlw $1, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3],xmm2[4],xmm0[5],xmm2[6],xmm0[7]
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <u,32768,16384,8192,4096,2048,1024,512>
+; SSE41-NEXT: pmulhuw %xmm0, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: constant_shift_v8i16:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: constant_shift_v8i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v8i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; AVX-NEXT: retq
;
; XOP-LABEL: constant_shift_v8i16:
; XOP: # %bb.0:
@@ -1046,11 +1026,8 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
;
; AVX512DQ-LABEL: constant_shift_v8i16:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512DQ-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
-; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
+; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: constant_shift_v8i16:
@@ -1064,10 +1041,8 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
;
; AVX512DQVL-LABEL: constant_shift_v8i16:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512DQVL-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
-; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
+; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: constant_shift_v8i16:
diff --git a/test/CodeGen/X86/vector-shift-lshr-256.ll b/test/CodeGen/X86/vector-shift-lshr-256.ll
index f0f0bb8a8819..3ca714d7f830 100644
--- a/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -1025,21 +1025,11 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX1-LABEL: constant_shift_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
-; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
-; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
-; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: constant_shift_v16i16:
@@ -1102,21 +1092,11 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
;
; X32-AVX1-LABEL: constant_shift_v16i16:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X32-AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
-; X32-AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
-; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT: vpsrlw $2, %xmm1, %xmm2
-; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
-; X32-AVX1-NEXT: vpsrlw $1, %xmm1, %xmm2
-; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
-; X32-AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
-; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2
-; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; X32-AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2
-; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X32-AVX1-NEXT: vpmulhuw {{\.LCPI.*}}, %xmm0, %xmm1
+; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; X32-AVX1-NEXT: vpmulhuw {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X32-AVX1-NEXT: retl
;
; X32-AVX2-LABEL: constant_shift_v16i16:
diff --git a/test/CodeGen/X86/win_coreclr_chkstk.ll b/test/CodeGen/X86/win_coreclr_chkstk.ll
index 8934535d6f52..24f2b2be4308 100644
--- a/test/CodeGen/X86/win_coreclr_chkstk.ll
+++ b/test/CodeGen/X86/win_coreclr_chkstk.ll
@@ -10,8 +10,6 @@ entry:
; WIN_X64-LABEL:main4k:
; WIN_X64: # %bb.0:
; WIN_X64: movl $4096, %eax
-; WIN_X64: movq %rcx, 8(%rsp)
-; WIN_X64: movq %rdx, 16(%rsp)
; WIN_X64: xorq %rcx, %rcx
; WIN_X64: movq %rsp, %rdx
; WIN_X64: subq %rax, %rdx
@@ -27,8 +25,6 @@ entry:
; WIN_X64: cmpq %rcx, %rdx
; WIN_X64: jne .LBB0_2
; WIN_X64:.LBB0_3:
-; WIN_X64: movq 8(%rsp), %rcx
-; WIN_X64: movq 16(%rsp), %rdx
; WIN_X64: subq %rax, %rsp
; WIN_X64: xorl %eax, %eax
; WIN_X64: addq $4096, %rsp
@@ -45,7 +41,6 @@ entry:
define i32 @main4k_frame() nounwind "no-frame-pointer-elim"="true" {
entry:
; WIN_X64-LABEL:main4k_frame:
-; WIN_X64: movq %rcx, 16(%rsp)
; WIN_X64: movq %gs:16, %rcx
; LINUX-LABEL:main4k_frame:
; LINUX-NOT: movq %gs:16, %rcx
@@ -58,7 +53,6 @@ entry:
; Case with INT args
define i32 @main4k_intargs(i32 %x, i32 %y) nounwind {
entry:
-; WIN_X64: movq %rcx, 8(%rsp)
; WIN_X64: movq %gs:16, %rcx
; LINUX-NOT: movq %gs:16, %rcx
; LINUX: retq
@@ -71,7 +65,6 @@ entry:
; Case with FP regs
define i32 @main4k_fpargs(double %x, double %y) nounwind {
entry:
-; WIN_X64: movq %rcx, 8(%rsp)
; WIN_X64: movq %gs:16, %rcx
; LINUX-NOT: movq %gs:16, %rcx
; LINUX: retq
diff --git a/test/CodeGen/X86/win_coreclr_chkstk_liveins.mir b/test/CodeGen/X86/win_coreclr_chkstk_liveins.mir
new file mode 100644
index 000000000000..8da5f895063f
--- /dev/null
+++ b/test/CodeGen/X86/win_coreclr_chkstk_liveins.mir
@@ -0,0 +1,24 @@
+# RUN: llc -verify-machineinstrs %s -run-pass prologepilog -mtriple=x86_64-pc-win32-coreclr -o - | FileCheck %s
+...
+---
+name: main4k
+# CHECK-LABEL: name: main4k
+
+alignment: 4
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 8
+stack:
+ - { id: 0, size: 4096, alignment: 1, stack-id: 0 }
+body: |
+ bb.0.entry:
+ $eax = IMPLICIT_DEF
+ RET 0, killed $eax
+
+ ; CHECK: bb.1.entry:
+ ; CHECK: liveins: $rdx
+ ; CHECK: bb.2.entry:
+ ; CHECK: liveins: $rcx, $rdx
+ ; CHECK: bb.3.entry:
+ ; CHECK: liveins: $rax
+...
diff --git a/test/DebugInfo/PDB/pdb-invalid-type.test b/test/DebugInfo/PDB/pdb-invalid-type.test
new file mode 100644
index 000000000000..33ea8d90452f
--- /dev/null
+++ b/test/DebugInfo/PDB/pdb-invalid-type.test
@@ -0,0 +1,15 @@
+# RUN: llvm-pdbutil yaml2pdb -pdb=%t1.pdb %p/Inputs/one-symbol.yaml
+# RUN: llvm-pdbutil yaml2pdb -pdb=%t2.pdb %s
+# RUN: not llvm-pdbutil merge -pdb=%t.pdb %t1.pdb %t2.pdb 2>&1 | FileCheck %s
+
+# CHECK: CodeView Error: The CodeView record is corrupted.
+
+---
+TpiStream:
+ Records:
+ # uint32_t* [Index: 0x1000]
+ - Kind: LF_POINTER
+ Pointer:
+ ReferentType: 4097
+ Attrs: 32778
+...
diff --git a/test/DebugInfo/PDB/using-namespace.test b/test/DebugInfo/PDB/using-namespace.test
new file mode 100644
index 000000000000..77c37826c8fc
--- /dev/null
+++ b/test/DebugInfo/PDB/using-namespace.test
@@ -0,0 +1,51 @@
+# RUN: yaml2obj < %s > %t.obj
+# RUN: llvm-readobj -codeview %t.obj | FileCheck %s
+
+# CHECK: Kind: S_UNAMESPACE (0x1124)
+# CHECK-NEXT: Namespace: __vc_attributes
+
+--- !COFF
+header:
+ Machine: IMAGE_FILE_MACHINE_AMD64
+ Characteristics: [ ]
+sections:
+ - Name: '.debug$S'
+ Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ]
+ Alignment: 1
+ Subsections:
+ - !Symbols
+ Records:
+ - Kind: S_OBJNAME
+ ObjNameSym:
+ Signature: 0
+ ObjectName: 'SimpleFunction.obj'
+ - Kind: S_COMPILE3
+ Compile3Sym:
+ Flags: [ SecurityChecks, HotPatch ]
+ Machine: X64
+ FrontendMajor: 19
+ FrontendMinor: 14
+ FrontendBuild: 26433
+ FrontendQFE: 0
+ BackendMajor: 19
+ BackendMinor: 14
+ BackendBuild: 26433
+ BackendQFE: 0
+ Version: 'Microsoft (R) Optimizing Compiler'
+ - Kind: S_UNAMESPACE
+ UsingNamespaceSym:
+ Namespace: __vc_attributes
+ - Kind: S_UNAMESPACE
+ UsingNamespaceSym:
+ Namespace: helper_attributes
+ - Kind: S_UNAMESPACE
+ UsingNamespaceSym:
+ Namespace: atl
+ - Kind: S_UNAMESPACE
+ UsingNamespaceSym:
+ Namespace: std
+ - !StringTable
+ Strings:
+ - 'SimpleFunction.c'
+symbols:
+...
diff --git a/test/DebugInfo/RISCV/lit.local.cfg b/test/DebugInfo/RISCV/lit.local.cfg
new file mode 100644
index 000000000000..c63820126f8c
--- /dev/null
+++ b/test/DebugInfo/RISCV/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'RISCV' in config.root.targets:
+ config.unsupported = True
diff --git a/test/DebugInfo/RISCV/relax-debug-line.ll b/test/DebugInfo/RISCV/relax-debug-line.ll
new file mode 100644
index 000000000000..814b253fadff
--- /dev/null
+++ b/test/DebugInfo/RISCV/relax-debug-line.ll
@@ -0,0 +1,75 @@
+; RUN: llc -filetype=obj -mtriple=riscv32 -mattr=+relax %s -o - \
+; RUN: | llvm-readobj -r | FileCheck -check-prefix=RELAX %s
+;
+; RELAX: .rela.debug_line {
+; RELAX: R_RISCV_ADD16
+; RELAX: R_RISCV_SUB16
+source_filename = "line.c"
+
+; Function Attrs: noinline nounwind optnone
+define i32 @init() !dbg !7 {
+entry:
+ ret i32 0, !dbg !11
+}
+
+; Function Attrs: noinline nounwind optnone
+define i32 @foo(i32 signext %value) !dbg !12 {
+entry:
+ %value.addr = alloca i32, align 4
+ store i32 %value, i32* %value.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %value.addr, metadata !15, metadata !DIExpression()), !dbg !16
+ %0 = load i32, i32* %value.addr, align 4, !dbg !17
+ ret i32 %0, !dbg !18
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+; Function Attrs: noinline nounwind optnone
+define i32 @bar() !dbg !19 {
+entry:
+ %result = alloca i32, align 4
+ %v = alloca i32, align 4
+ call void @llvm.dbg.declare(metadata i32* %result, metadata !20, metadata !DIExpression()), !dbg !21
+ call void @llvm.dbg.declare(metadata i32* %v, metadata !22, metadata !DIExpression()), !dbg !23
+ %call = call i32 @init(), !dbg !24
+ store i32 %call, i32* %v, align 4, !dbg !23
+ %0 = load i32, i32* %v, align 4, !dbg !25
+ %call1 = call i32 @foo(i32 signext %0), !dbg !26
+ store i32 %call1, i32* %result, align 4, !dbg !27
+ %1 = load i32, i32* %result, align 4, !dbg !28
+ ret i32 %1, !dbg !29
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "line.c", directory: "./")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!7 = distinct !DISubprogram(name: "init", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DILocation(line: 3, column: 3, scope: !7)
+!12 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 6, type: !13, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!13 = !DISubroutineType(types: !14)
+!14 = !{!10, !10}
+!15 = !DILocalVariable(name: "value", arg: 1, scope: !12, file: !1, line: 6, type: !10)
+!16 = !DILocation(line: 6, column: 13, scope: !12)
+!17 = !DILocation(line: 8, column: 10, scope: !12)
+!18 = !DILocation(line: 8, column: 3, scope: !12)
+!19 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 11, type: !8, isLocal: false, isDefinition: true, scopeLine: 12, isOptimized: false, unit: !0, retainedNodes: !2)
+!20 = !DILocalVariable(name: "result", scope: !19, file: !1, line: 13, type: !10)
+!21 = !DILocation(line: 13, column: 7, scope: !19)
+!22 = !DILocalVariable(name: "v", scope: !19, file: !1, line: 14, type: !10)
+!23 = !DILocation(line: 14, column: 7, scope: !19)
+!24 = !DILocation(line: 14, column: 11, scope: !19)
+!25 = !DILocation(line: 16, column: 16, scope: !19)
+!26 = !DILocation(line: 16, column: 12, scope: !19)
+!27 = !DILocation(line: 16, column: 10, scope: !19)
+!28 = !DILocation(line: 18, column: 10, scope: !19)
+!29 = !DILocation(line: 18, column: 3, scope: !19)
diff --git a/test/DebugInfo/X86/accel-tables-dwarf5.ll b/test/DebugInfo/X86/accel-tables-dwarf5.ll
index 462a3bd58046..eef44e7b11c0 100644
--- a/test/DebugInfo/X86/accel-tables-dwarf5.ll
+++ b/test/DebugInfo/X86/accel-tables-dwarf5.ll
@@ -15,8 +15,11 @@
; type units. Change this once DWARF v5 type units are implemented.
; RUN: llc -mtriple=x86_64-pc-linux -filetype=obj -generate-type-units -debugger-tune=lldb < %s \
; RUN: | llvm-readobj -sections - | FileCheck --check-prefix=NONE %s
+
+; Debug types are ignored for non-ELF targets which means it shouldn't affect
+; accelerator table generation.
; RUN: llc -mtriple=x86_64-apple-darwin12 -generate-type-units -filetype=obj < %s \
-; RUN: | llvm-readobj -sections - | FileCheck --check-prefix=NONE %s
+; RUN: | llvm-readobj -sections - | FileCheck --check-prefix=DEBUG_NAMES %s
; NONE-NOT: apple_names
; NONE-NOT: debug_names
diff --git a/test/DebugInfo/X86/accel-tables.ll b/test/DebugInfo/X86/accel-tables.ll
index e4f1508029e2..813b66f1aeb6 100644
--- a/test/DebugInfo/X86/accel-tables.ll
+++ b/test/DebugInfo/X86/accel-tables.ll
@@ -12,12 +12,15 @@
; RUN: llc -mtriple=x86_64-pc-linux -filetype=obj -debugger-tune=lldb < %s \
; RUN: | llvm-readobj -sections - | FileCheck --check-prefix=DEBUG_NAMES %s
-; Neither target has accelerator tables if type units are enabled, as DWARF v4
-; type units are not compatible with accelerator tables.
+; No accelerator tables if type units are enabled, as DWARF v4 type units are
+; not compatible with accelerator tables.
; RUN: llc -mtriple=x86_64-pc-linux -filetype=obj -generate-type-units -debugger-tune=lldb < %s \
; RUN: | llvm-readobj -sections - | FileCheck --check-prefix=NONE %s
+
+; Debug types are ignored for non-ELF targets which means it shouldn't affect
+; accelerator table generation.
; RUN: llc -mtriple=x86_64-apple-darwin12 -generate-type-units -filetype=obj < %s \
-; RUN: | llvm-readobj -sections - | FileCheck --check-prefix=NONE %s
+; RUN: | llvm-readobj -sections - | FileCheck --check-prefix=APPLE %s
; APPLE-NOT: debug_names
; APPLE: apple_names
diff --git a/test/DebugInfo/X86/debug_addr.ll b/test/DebugInfo/X86/debug_addr.ll
new file mode 100644
index 000000000000..cf6b241b1de5
--- /dev/null
+++ b/test/DebugInfo/X86/debug_addr.ll
@@ -0,0 +1,79 @@
+; RUN: llc -split-dwarf-file=test.dwo -dwarf-version=4 %s -mtriple=i386-unknown-linux-gnu -filetype=obj -o - | \
+; RUN: llvm-dwarfdump -v - | FileCheck %s -check-prefix=DWARF4
+
+; RUN: llc -split-dwarf-file=test.dwo -dwarf-version=5 %s -mtriple=i386-unknown-linux-gnu -filetype=obj -o - | \
+; RUN: llvm-dwarfdump -v - | FileCheck %s -check-prefix=DWARF5
+
+; Source:
+; void foo() {
+; }
+;
+; void bar() {
+; }
+
+; DWARF4: .debug_info contents:
+; DWARF4: Compile Unit:{{.*}}version = 0x0004
+; DWARF4-NOT: Compile Unit
+; DWARF4: DW_TAG_compile_unit
+; DWARF4-NOT: DW_TAG_{{.*}}
+; DWARF4: DW_AT_GNU_dwo_name{{.*}}test.dwo
+; DWARF4: DW_AT_GNU_addr_base{{.*}}0x00000000
+; DWARF4: .debug_addr contents:
+; DWARF4-NEXT: 0x00000000: Addr Section: length = 0x00000000, version = 0x0004, addr_size = 0x04, seg_size = 0x00
+; DWARF4-NEXT: Addrs: [
+; DWARF4-NEXT: 0x00000000
+; DWARF4-NEXT: 0x00000010
+; DWARF4-NEXT: ]
+
+; DWARF5: .debug_info contents:
+; DWARF5: Compile Unit:{{.*}}version = 0x0005
+; DWARF5-NOT: Compile Unit
+; DWARF5: DW_TAG_compile_unit
+; DWARF5-NOT: DW_TAG_{{.*}}
+; DWARF5: DW_AT_GNU_dwo_name{{.*}}test.dwo
+; DWARF5: DW_AT_GNU_addr_base{{.*}}0x00000000
+; DWARF5: .debug_addr contents:
+; DWARF5-NEXT: 0x00000000: Addr Section: length = 0x0000000c, version = 0x0005, addr_size = 0x04, seg_size = 0x00
+; DWARF5-NEXT: Addrs: [
+; DWARF5-NEXT: 0x00000000
+; DWARF5-NEXT: 0x00000010
+; DWARF5-NEXT: ]
+
+; ModuleID = './test.c'
+source_filename = "./test.c"
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind optnone
+define void @foo() #0 !dbg !8 {
+entry:
+ ret void, !dbg !12
+}
+
+; Function Attrs: noinline nounwind optnone
+define void @bar() #0 !dbg !13 {
+entry:
+ ret void, !dbg !14
+}
+
+attributes #0 = { noinline nounwind optnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.1", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "test.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 1, !"NumRegisterParameters", i32 0}
+!4 = !{i32 2, !"Dwarf Version", i32 5}
+!5 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = !{i32 1, !"wchar_size", i32 4}
+!7 = !{!"clang version 6.0.1"}
+!8 = distinct !DISubprogram(name: "foo", scope: !9, file: !9, line: 1, type: !10, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0)
+!9 = !DIFile(filename: "./test.c", directory: "/tmp")
+!10 = !DISubroutineType(types: !11)
+!11 = !{null}
+!12 = !DILocation(line: 2, column: 3, scope: !8)
+!13 = distinct !DISubprogram(name: "bar", scope: !9, file: !9, line: 5, type: !10, isLocal: false, isDefinition: true, scopeLine: 5, isOptimized: false, unit: !0)
+!14 = !DILocation(line: 6, column: 3, scope: !13)
diff --git a/test/Demangle/ms-cxx11.test b/test/Demangle/ms-cxx11.test
new file mode 100644
index 000000000000..b648cc3d6e33
--- /dev/null
+++ b/test/Demangle/ms-cxx11.test
@@ -0,0 +1,148 @@
+; These tests are based on clang/test/CodeGenCXX/mangle-ms-cxx11.cpp
+
+; RUN: llvm-undname < %s | FileCheck %s
+
+; CHECK-NOT: Invalid mangled name
+
+?a@FTypeWithQuals@@3U?$S@$$A8@@BAHXZ@1@A
+; CHECK: struct FTypeWithQuals::S<int __cdecl(void) const> FTypeWithQuals::a
+
+?b@FTypeWithQuals@@3U?$S@$$A8@@CAHXZ@1@A
+; CHECK: struct FTypeWithQuals::S<int __cdecl(void) volatile> FTypeWithQuals::b
+
+?c@FTypeWithQuals@@3U?$S@$$A8@@IAAHXZ@1@A
+; CHECK: struct FTypeWithQuals::S<int __cdecl(void) __restrict> FTypeWithQuals::c
+
+?d@FTypeWithQuals@@3U?$S@$$A8@@GBAHXZ@1@A
+; CHECK: struct FTypeWithQuals::S<int __cdecl(void) const &> FTypeWithQuals::d
+
+?e@FTypeWithQuals@@3U?$S@$$A8@@GCAHXZ@1@A
+; CHECK: struct FTypeWithQuals::S<int __cdecl(void) volatile &> FTypeWithQuals::e
+
+?f@FTypeWithQuals@@3U?$S@$$A8@@IGAAHXZ@1@A
+; CHECK: struct FTypeWithQuals::S<int __cdecl(void) __restrict &> FTypeWithQuals::f
+
+?g@FTypeWithQuals@@3U?$S@$$A8@@HBAHXZ@1@A
+; CHECK: struct FTypeWithQuals::S<int __cdecl(void) const &&> FTypeWithQuals::g
+
+?h@FTypeWithQuals@@3U?$S@$$A8@@HCAHXZ@1@A
+; CHECK: struct FTypeWithQuals::S<int __cdecl(void) volatile &&> FTypeWithQuals::h
+
+?i@FTypeWithQuals@@3U?$S@$$A8@@IHAAHXZ@1@A
+; CHECK: struct FTypeWithQuals::S<int __cdecl(void) __restrict &&> FTypeWithQuals::i
+
+?j@FTypeWithQuals@@3U?$S@$$A6AHXZ@1@A
+; CHECK: struct FTypeWithQuals::S<int __cdecl(void)> FTypeWithQuals::j
+
+?k@FTypeWithQuals@@3U?$S@$$A8@@GAAHXZ@1@A
+; CHECK: struct FTypeWithQuals::S<int __cdecl(void) &> FTypeWithQuals::k
+
+?l@FTypeWithQuals@@3U?$S@$$A8@@HAAHXZ@1@A
+; CHECK: struct FTypeWithQuals::S<int __cdecl(void) &&> FTypeWithQuals::l
+
+?Char16Var@@3_SA
+; CHECK: char16_t Char16Var
+
+?Char32Var@@3_UA
+; CHECK: char32_t Char32Var
+
+?LRef@@YAXAAH@Z
+; CHECK: void __cdecl LRef(int &)
+
+?RRef@@YAH$$QAH@Z
+; CHECK: int __cdecl RRef(int &&)
+
+?Null@@YAX$$T@Z
+; CHECK: void __cdecl Null(std::nullptr_t)
+
+?fun@PR18022@@YA?AU<unnamed-type-a>@1@U21@0@Z
+; CHECK: struct PR18022::<unnamed-type-a> __cdecl PR18022::fun(struct PR18022::<unnamed-type-a>, struct PR18022::<unnamed-type-a>)
+
+; First, we have the static local variable of type "<lambda_1>" inside of "define_lambda".
+; decltype(lambda), where lambda = [] { static int local=42; return 42; };
+?lambda@?1??define_lambda@@YAHXZ@4V<lambda_1>@?0??1@YAHXZ@A
+; CHECK: class `int __cdecl define_lambda(void)'::`1'::<lambda_1> `int __cdecl define_lambda(void)'::`2'::lambda
+
+; Next, we have the "operator()" for "<lambda_1>" which is inside of "define_lambda".
+??R<lambda_1>@?0??define_lambda@@YAHXZ@QBE@XZ
+; CHECK: __thiscall `int __cdecl define_lambda(void)'::`1'::<lambda_1>::operator()(void) const
+
+; Finally, we have the local which is inside of "<lambda_1>" which is inside of "define_lambda".
+?local@?2???R<lambda_1>@?0??define_lambda@@YAHXZ@QBE@XZ@4HA
+; CHECK: __thiscall `int __cdecl define_lambda(void)'::`1'::<lambda_1>::operator()(void) const
+
+??$use_lambda_arg@V<lambda_1>@?0??call_with_lambda_arg1@@YAXXZ@@@YAXV<lambda_1>@?0??call_with_lambda_arg1@@YAXXZ@@Z
+; CHECK: void __cdecl use_lambda_arg<class `void __cdecl call_with_lambda_arg1(void)'::`1'::<lambda_1>>(class `void __cdecl call_with_lambda_arg1(void)'::`1'::<lambda_1>)
+
+?foo@A@PR19361@@QIGAEXXZ
+; CHECK: void __thiscall PR19361::A::foo(void) __restrict &
+
+?foo@A@PR19361@@QIHAEXXZ
+; CHECK: void __thiscall PR19361::A::foo(void) __restrict &&
+
+??__K_deg@@YAHO@Z
+; CHECK: int __cdecl operator ""_deg(long double)
+
+??$templ_fun_with_pack@$S@@YAXXZ
+; CHECK: void __cdecl templ_fun_with_pack<>(void)
+
+??$templ_fun_with_ty_pack@$$$V@@YAXXZ
+; CHECK: void __cdecl templ_fun_with_ty_pack<>(void)
+??$templ_fun_with_ty_pack@$$V@@YAXXZ
+; CHECK: void __cdecl templ_fun_with_ty_pack<>(void)
+
+??$f@$$YAliasA@PR20047@@@PR20047@@YAXXZ
+; CHECK: void __cdecl PR20047::f<PR20047::AliasA>(void)
+
+?f@UnnamedType@@YAXAAU<unnamed-type-TD>@A@1@@Z
+; CHECK: void __cdecl UnnamedType::f(struct UnnamedType::A::<unnamed-type-TD> &)
+
+?f@UnnamedType@@YAXPAW4<unnamed-type-e>@?$B@H@1@@Z
+; CHECK: void __cdecl UnnamedType::f(enum UnnamedType::B<int>::<unnamed-type-e> *)
+
+??$f@W4<unnamed-type-E>@?1??g@PR24651@@YAXXZ@@PR24651@@YAXW4<unnamed-type-E>@?1??g@0@YAXXZ@@Z
+; We have a back-referencing problem here, we print `void __cdecl <unnamed-type-E>::g(void)`
+; for the second occurrence of g.
+; FIXME: void __cdecl PR24651::f<enum `void __cdecl PR24651::g(void)'::`2'::<unnamed-type-E>>(enum `void __cdecl PR24651::g(void)'::`2'::<unnamed-type-E>)
+
+??$f@T<unnamed-type-$S1>@PR18204@@@PR18204@@YAHPAT<unnamed-type-$S1>@0@@Z
+; FIXME: int __cdecl PR18204::f<union PR18204::<unnamed-type-$S1>>(union PR18204::<unnamed-type-$S1> *)
+
+??R<lambda_0>@?0??PR26105@@YAHXZ@QBE@H@Z
+; CHECK: __thiscall `int __cdecl PR26105(void)'::`1'::<lambda_0>::operator()(int) const
+
+??R<lambda_1>@?0???R<lambda_0>@?0??PR26105@@YAHXZ@QBE@H@Z@QBE@H@Z
+; CHECK: __thiscall `__thiscall `int __cdecl PR26105(void)'::`1'::<lambda_0>::operator()(int) const'::`1'::<lambda_1>::operator()(int) const
+
+?unaligned_foo1@@YAPFAHXZ
+; CHECK: int __unaligned * __cdecl unaligned_foo1(void)
+
+?unaligned_foo2@@YAPFAPFAHXZ
+; CHECK: int __unaligned *__unaligned * __cdecl unaligned_foo2(void)
+
+?unaligned_foo3@@YAHXZ
+; CHECK: int __cdecl unaligned_foo3(void)
+
+?unaligned_foo4@@YAXPFAH@Z
+; CHECK: void __cdecl unaligned_foo4(int __unaligned *)
+
+?unaligned_foo5@@YAXPIFAH@Z
+; CHECK: void __cdecl unaligned_foo5(int __unaligned *__restrict)
+
+??$unaligned_foo6@PAH@@YAPAHPAH@Z
+; CHECK: int * __cdecl unaligned_foo6<int *>(int *)
+
+??$unaligned_foo6@PFAH@@YAPFAHPFAH@Z
+; CHECK: int __unaligned * __cdecl unaligned_foo6<int __unaligned *>(int __unaligned *)
+
+?unaligned_foo8@unaligned_foo8_S@@QFCEXXZ
+; CHECK: void __thiscall unaligned_foo8_S::unaligned_foo8(void) volatile __unaligned
+
+??R<lambda_1>@x@A@PR31197@@QBE@XZ
+; CHECK: __thiscall PR31197::A::x::<lambda_1>::operator()(void) const
+
+?white@?1???R<lambda_1>@x@A@PR31197@@QBE@XZ@4HA
+; CHECK: int `__thiscall PR31197::A::x::<lambda_1>::operator()(void) const'::`2'::white
+
+?f@@YAXW4<unnamed-enum-enumerator>@@@Z
+; CHECK: void __cdecl f(enum <unnamed-enum-enumerator>)
diff --git a/test/Demangle/ms-mangle.test b/test/Demangle/ms-mangle.test
index a5d0c70ee314..9a2f780f9334 100644
--- a/test/Demangle/ms-mangle.test
+++ b/test/Demangle/ms-mangle.test
@@ -265,18 +265,18 @@
?s6@PR13182@@3PBQBDB
; CHECK: char const *const *PR13182::s6
-; FIXME: We don't properly support static locals in functions yet.
+; FIXME: We don't properly support extern "C" functions yet.
; ?local@?1??extern_c_func@@9@4HA
; FIXME: int `extern_c_func'::`2'::local
; ?local@?1??extern_c_func@@9@4HA
; FIXME: int `extern_c_func'::`2'::local
-; ?v@?1??f@@YAHXZ@4U<unnamed-type-v>@?1??1@YAHXZ@A
-; FIXME: struct `int __cdecl f(void)'::`2'::<unnamed-type-v> `int __cdecl f(void)'::`2'::v
+?v@?1??f@@YAHXZ@4U<unnamed-type-v>@?1??1@YAHXZ@A
+; CHECK: struct `int __cdecl f(void)'::`2'::<unnamed-type-v> `int __cdecl f(void)'::`2'::v
-; ?v@?1???$f@H@@YAHXZ@4U<unnamed-type-v>@?1???$f@H@@YAHXZ@A
-; FIXME: struct `int __cdecl f<int>(void)'::`2'::<unnamed-type-v> `int __cdecl f<int>(void)'::`2'::v
+?v@?1???$f@H@@YAHXZ@4U<unnamed-type-v>@?1???$f@H@@YAHXZ@A
+; CHECK: struct `int __cdecl f<int>(void)'::`2'::<unnamed-type-v> `int __cdecl f<int>(void)'::`2'::v
??2OverloadedNewDelete@@SAPAXI@Z
; CHECK: static void * __cdecl OverloadedNewDelete::operator new(unsigned int)
@@ -335,8 +335,8 @@
; ?overloaded_fn@@$$J0YAXXZ
; FIXME-EXTERNC: extern \"C\" void __cdecl overloaded_fn(void)
-; ?f@UnnamedType@@YAXQAPAU<unnamed-type-T1>@S@1@@Z
-; FIXME: void __cdecl UnnamedType::f(struct UnnamedType::S::<unnamed-type-T1> ** const)
+?f@UnnamedType@@YAXQAPAU<unnamed-type-T1>@S@1@@Z
+; CHECK: void __cdecl UnnamedType::f(struct UnnamedType::S::<unnamed-type-T1> **const)
?f@UnnamedType@@YAXUT2@S@1@@Z
; CHECK: void __cdecl UnnamedType::f(struct UnnamedType::S::T2)
diff --git a/test/Demangle/ms-nested-scopes.test b/test/Demangle/ms-nested-scopes.test
new file mode 100644
index 000000000000..952b138630cc
--- /dev/null
+++ b/test/Demangle/ms-nested-scopes.test
@@ -0,0 +1,146 @@
+; RUN: llvm-undname < %s | FileCheck %s
+
+; CHECK-NOT: Invalid mangled name
+
+; Test demangling of function local scope discriminator IDs.
+?M@?@??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`0'::M
+
+?M@?0??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`1'::M
+
+?M@?1??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`2'::M
+
+?M@?2??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`3'::M
+
+?M@?3??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`4'::M
+
+?M@?4??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`5'::M
+
+?M@?5??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`6'::M
+
+?M@?6??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`7'::M
+
+?M@?7??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`8'::M
+
+?M@?8??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`9'::M
+
+?M@?9??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`10'::M
+
+?M@?L@??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`11'::M
+
+?M@?M@??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`12'::M
+
+?M@?N@??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`13'::M
+
+?M@?O@??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`14'::M
+
+?M@?P@??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`15'::M
+
+?M@?BA@??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`16'::M
+
+?M@?BB@??L@@YAHXZ@4HA
+; CHECK: int `int __cdecl L(void)'::`17'::M
+
+?j@?1??L@@YAHXZ@4UJ@@A
+; CHECK: struct J `int __cdecl L(void)'::`2'::j
+
+; Test demangling of name back-references
+?NN@0XX@@3HA
+; CHECK: int XX::NN::NN
+
+?MM@0NN@XX@@3HA
+; CHECK: int XX::NN::MM::MM
+
+?NN@MM@0XX@@3HA
+; CHECK: int XX::NN::MM::NN
+
+?OO@0NN@01XX@@3HA
+; CHECK: int XX::NN::OO::NN::OO::OO
+
+?NN@OO@010XX@@3HA
+; CHECK: int XX::NN::OO::NN::OO::NN
+
+; Test demangling of name back-references combined with function local scopes.
+?M@?1??0@YAHXZ@4HA
+; CHECK: int `int __cdecl M(void)'::`2'::M
+
+?L@?2??M@0?2??0@YAHXZ@QEAAHXZ@4HA
+; CHECK: int `int __cdecl `int __cdecl L(void)'::`3'::L::M(void)'::`3'::L
+
+?M@?2??0L@?2??1@YAHXZ@QEAAHXZ@4HA
+; CHECK: int `int __cdecl `int __cdecl L(void)'::`3'::L::M(void)'::`3'::M
+
+; Function local scopes of template functions
+?M@?1???$L@H@@YAHXZ@4HA
+; CHECK: int `int __cdecl L<int>(void)'::`2'::M
+
+; And member functions of template classes
+?SN@?$NS@H@NS@@QEAAHXZ
+; CHECK: int __cdecl NS::NS<int>::SN(void)
+
+?NS@?1??SN@?$NS@H@0@QEAAHXZ@4HA
+; CHECK: int `int __cdecl NS::NS<int>::SN(void)'::`2'::NS
+
+?SN@?1??0?$NS@H@NS@@QEAAHXZ@4HA
+; CHECK: int `int __cdecl NS::NS<int>::SN(void)'::`2'::SN
+
+?NS@?1??SN@?$NS@H@10@QEAAHXZ@4HA
+; CHECK: int `int __cdecl NS::SN::NS<int>::SN(void)'::`2'::NS
+
+?SN@?1??0?$NS@H@0NS@@QEAAHXZ@4HA
+; CHECK: int `int __cdecl NS::SN::NS<int>::SN(void)'::`2'::SN
+
+; Make sure instantiated templates participate in back-referencing.
+; In the next 3 examples there should be 3 back-references:
+; 0 = X (right most name)
+; 1 = C<int> (second from right)
+; 2 = C (third from right)
+; Make sure all 3 work as expected by having the 4th component take each value
+; from 0-2 and confirming it is the right component.
+?X@?$C@H@C@0@2HB
+; CHECK: static int const X::C::C<int>::X
+
+?X@?$C@H@C@1@2HB
+; CHECK: static int const C<int>::C::C<int>::X
+
+?X@?$C@H@C@2@2HB
+; CHECK: static int const C::C::C<int>::X
+
+; Putting everything together.
+
+; namespace A { namespace B { namespace C { namespace B { namespace C {
+; template<typename T>
+; struct C {
+; int B() {
+; static C<int> C;
+; static int B = 7;
+; static int A = 7;
+; return C.B() + B + A;
+; }
+; };
+; } } } } }
+
+?C@?1??B@?$C@H@0101A@@QEAAHXZ@4U201013@A
+; CHECK: struct A::B::C::B::C::C<int> `int __cdecl A::B::C::B::C::C<int>::B(void)'::`2'::C
+
+?B@?1??0?$C@H@C@020A@@QEAAHXZ@4HA
+; CHECK: int `int __cdecl A::B::C::B::C::C<int>::B(void)'::`2'::B
+
+?A@?1??B@?$C@H@C@1310@QEAAHXZ@4HA
+; CHECK: int `int __cdecl A::B::C::B::C::C<int>::B(void)'::`2'::A
diff --git a/test/Demangle/ms-return-qualifiers.test b/test/Demangle/ms-return-qualifiers.test
new file mode 100644
index 000000000000..7fedf6c03a11
--- /dev/null
+++ b/test/Demangle/ms-return-qualifiers.test
@@ -0,0 +1,184 @@
+; These tests are based on clang/test/CodeGenCXX/mangle-ms-return-qualifiers.cpp
+
+; RUN: llvm-undname < %s | FileCheck %s
+
+; CHECK-NOT: Invalid mangled name
+
+?a1@@YAXXZ
+; CHECK: void __cdecl a1(void)
+
+?a2@@YAHXZ
+; CHECK: int __cdecl a2(void)
+
+?a3@@YA?BHXZ
+; CHECK: int const __cdecl a3(void)
+
+?a4@@YA?CHXZ
+; CHECK: int volatile __cdecl a4(void)
+
+?a5@@YA?DHXZ
+; CHECK: int const volatile __cdecl a5(void)
+
+?a6@@YAMXZ
+; CHECK: float __cdecl a6(void)
+
+?b1@@YAPAHXZ
+; CHECK: int * __cdecl b1(void)
+
+?b2@@YAPBDXZ
+; CHECK: char const * __cdecl b2(void)
+
+?b3@@YAPAMXZ
+; CHECK: float * __cdecl b3(void)
+
+?b4@@YAPBMXZ
+; CHECK: float const * __cdecl b4(void)
+
+?b5@@YAPCMXZ
+; CHECK: float volatile * __cdecl b5(void)
+
+?b6@@YAPDMXZ
+; CHECK: float const volatile * __cdecl b6(void)
+
+?b7@@YAAAMXZ
+; CHECK: float & __cdecl b7(void)
+
+?b8@@YAABMXZ
+; CHECK: float const & __cdecl b8(void)
+
+?b9@@YAACMXZ
+; CHECK: float volatile & __cdecl b9(void)
+
+?b10@@YAADMXZ
+; CHECK: float const volatile & __cdecl b10(void)
+
+?b11@@YAPAPBDXZ
+; CHECK: char const ** __cdecl b11(void)
+
+?c1@@YA?AVA@@XZ
+; CHECK: class A __cdecl c1(void)
+
+?c2@@YA?BVA@@XZ
+; CHECK: class A const __cdecl c2(void)
+
+?c3@@YA?CVA@@XZ
+; CHECK: class A volatile __cdecl c3(void)
+
+?c4@@YA?DVA@@XZ
+; CHECK: class A const volatile __cdecl c4(void)
+
+?c5@@YAPBVA@@XZ
+; CHECK: class A const * __cdecl c5(void)
+
+?c6@@YAPCVA@@XZ
+; CHECK: class A volatile * __cdecl c6(void)
+
+?c7@@YAPDVA@@XZ
+; CHECK: class A const volatile * __cdecl c7(void)
+
+?c8@@YAAAVA@@XZ
+; CHECK: class A & __cdecl c8(void)
+
+?c9@@YAABVA@@XZ
+; CHECK: class A const & __cdecl c9(void)
+
+?c10@@YAACVA@@XZ
+; CHECK: class A volatile & __cdecl c10(void)
+
+?c11@@YAADVA@@XZ
+; CHECK: class A const volatile & __cdecl c11(void)
+
+?d1@@YA?AV?$B@H@@XZ
+; CHECK: class B<int> __cdecl d1(void)
+
+?d2@@YA?AV?$B@PBD@@XZ
+; CHECK: class B<char const *> __cdecl d2(void)
+
+?d3@@YA?AV?$B@VA@@@@XZ
+; CHECK: class B<class A> __cdecl d3(void)
+
+?d4@@YAPAV?$B@VA@@@@XZ
+; CHECK: class B<class A> * __cdecl d4(void)
+
+?d5@@YAPBV?$B@VA@@@@XZ
+; CHECK: class B<class A> const * __cdecl d5(void)
+
+?d6@@YAPCV?$B@VA@@@@XZ
+; CHECK: class B<class A> volatile * __cdecl d6(void)
+
+?d7@@YAPDV?$B@VA@@@@XZ
+; CHECK: class B<class A> const volatile * __cdecl d7(void)
+
+?d8@@YAAAV?$B@VA@@@@XZ
+; CHECK: class B<class A> & __cdecl d8(void)
+
+?d9@@YAABV?$B@VA@@@@XZ
+; CHECK: class B<class A> const & __cdecl d9(void)
+
+?d10@@YAACV?$B@VA@@@@XZ
+; CHECK: class B<class A> volatile & __cdecl d10(void)
+
+?d11@@YAADV?$B@VA@@@@XZ
+; CHECK: class B<class A> const volatile & __cdecl d11(void)
+
+?e1@@YA?AW4Enum@@XZ
+; CHECK: Enum __cdecl e1(void)
+
+?e2@@YA?BW4Enum@@XZ
+; CHECK: Enum const __cdecl e2(void)
+
+?e3@@YAPAW4Enum@@XZ
+; CHECK: Enum * __cdecl e3(void)
+
+?e4@@YAAAW4Enum@@XZ
+; CHECK: Enum & __cdecl e4(void)
+
+?f1@@YA?AUS@@XZ
+; CHECK: struct S __cdecl f1(void)
+
+?f2@@YA?BUS@@XZ
+; CHECK: struct S const __cdecl f2(void)
+
+?f3@@YAPAUS@@XZ
+; CHECK: struct S * __cdecl f3(void)
+
+?f4@@YAPBUS@@XZ
+; CHECK: struct S const * __cdecl f4(void)
+
+?f5@@YAPDUS@@XZ
+; CHECK: struct S const volatile * __cdecl f5(void)
+
+?f6@@YAAAUS@@XZ
+; CHECK: struct S & __cdecl f6(void)
+
+?f7@@YAQAUS@@XZ
+; CHECK: struct S *const __cdecl f7(void)
+
+?f8@@YAPQS@@HXZ
+; CHECK: int S::* __cdecl f8(void)
+
+?f9@@YAQQS@@HXZ
+; CHECK: int S::*const __cdecl f9(void)
+
+; We print __restrict twice here.
+?f10@@YAPIQS@@HXZ
+; FIXME: int S::* __restrict __cdecl f10(void)
+
+; We print __restrict twice here.
+?f11@@YAQIQS@@HXZ
+; FIXME: int S::* __restrict __cdecl f11(void)
+
+?g1@@YAP6AHH@ZXZ
+; CHECK: int (__cdecl * __cdecl g1(void))(int)
+
+?g2@@YAQ6AHH@ZXZ
+; CHECK: int (__cdecl *const __cdecl g2(void))(int)
+
+?g3@@YAPAP6AHH@ZXZ
+; CHECK: int (__cdecl ** __cdecl g3(void))(int)
+
+?g4@@YAPBQ6AHH@ZXZ
+; CHECK: int (__cdecl *const * __cdecl g4(void))(int)
+
+?h1@@YAAIAHXZ
+; CHECK: int &__restrict __cdecl h1(void)
diff --git a/test/Demangle/ms-template-callback.test b/test/Demangle/ms-template-callback.test
new file mode 100644
index 000000000000..88c4493d2bb6
--- /dev/null
+++ b/test/Demangle/ms-template-callback.test
@@ -0,0 +1,53 @@
+; These tests are based on clang/test/CodeGenCXX/mangle-ms-template-callback.cpp
+
+; RUN: llvm-undname < %s | FileCheck %s
+
+; CHECK-NOT: Invalid mangled name
+
+?callback_void@@3V?$C@$$A6AXXZ@@A
+; CHECK: class C<void __cdecl(void)> callback_void
+
+?callback_void_volatile@@3V?$C@$$A6AXXZ@@C
+; CHECK: class C<void __cdecl(void)> volatile callback_void_volatile
+
+?callback_int@@3V?$C@$$A6AHXZ@@A
+; CHECK: C<int __cdecl(void)> callback_int
+
+?callback_Type@@3V?$C@$$A6A?AVType@@XZ@@A
+; CHECK: C<class Type __cdecl(void)> callback_Type
+
+?callback_void_int@@3V?$C@$$A6AXH@Z@@A
+; CHECK: C<void __cdecl(int)> callback_void_int
+
+?callback_int_int@@3V?$C@$$A6AHH@Z@@A
+; CHECK: C<int __cdecl(int)> callback_int_int
+
+?callback_void_Type@@3V?$C@$$A6AXVType@@@Z@@A
+; CHECK: C<void __cdecl(class Type)> callback_void_Type
+
+?foo@@YAXV?$C@$$A6AXXZ@@@Z
+; CHECK: void __cdecl foo(class C<void __cdecl(void)>)
+
+?function@@YAXV?$C@$$A6AXXZ@@@Z
+; CHECK: void __cdecl function(class C<void __cdecl(void)>)
+
+?function_pointer@@YAXV?$C@P6AXXZ@@@Z
+; CHECK: void __cdecl function_pointer(class C<void (__cdecl *)(void)>)
+
+?member_pointer@@YAXV?$C@P8Z@@AEXXZ@@@Z
+; CHECK: void __cdecl member_pointer(class C<void (__thiscall Z::*)(void)>)
+
+??$bar@P6AHH@Z@@YAXP6AHH@Z@Z
+; CHECK: void __cdecl bar<int (__cdecl *)(int)>(int (__cdecl *)(int))
+
+??$WrapFnPtr@$1?VoidFn@@YAXXZ@@YAXXZ
+; CHECK: void __cdecl WrapFnPtr<&void __cdecl VoidFn(void)>(void)
+
+??$WrapFnRef@$1?VoidFn@@YAXXZ@@YAXXZ
+; CHECK: void __cdecl WrapFnRef<&void __cdecl VoidFn(void)>(void)
+
+??$WrapFnPtr@$1?VoidStaticMethod@Thing@@SAXXZ@@YAXXZ
+; CHECK: void __cdecl WrapFnPtr<&static void __cdecl Thing::VoidStaticMethod(void)>(void)
+
+??$WrapFnRef@$1?VoidStaticMethod@Thing@@SAXXZ@@YAXXZ
+; CHECK: void __cdecl WrapFnRef<&static void __cdecl Thing::VoidStaticMethod(void)>(void)
diff --git a/test/Instrumentation/InstrProfiling/linkage.ll b/test/Instrumentation/InstrProfiling/linkage.ll
index 6cbc88f34519..97537579b176 100644
--- a/test/Instrumentation/InstrProfiling/linkage.ll
+++ b/test/Instrumentation/InstrProfiling/linkage.ll
@@ -1,13 +1,13 @@
;; Check that runtime symbols get appropriate linkage.
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s --check-prefix=OTHER --check-prefix=COMMON
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s --check-prefix=LINUX --check-prefix=COMMON
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -passes=instrprof -S | FileCheck %s --check-prefix=OTHER --check-prefix=COMMON
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=instrprof -S | FileCheck %s --check-prefix=LINUX --check-prefix=COMMON
-; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -instrprof -S | FileCheck %s --check-prefix=COFF
-; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -passes=instrprof -S | FileCheck %s --check-prefix=COFF
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s --check-prefixes=COMMON,MACHO
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s --check-prefixes=COMMON,LINUX
+; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -instrprof -S | FileCheck %s --check-prefixes=COMMON,COFF
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -passes=instrprof -S | FileCheck %s --check-prefixes=COMMON,MACHO
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=instrprof -S | FileCheck %s --check-prefixes=COMMON,LINUX
+; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -passes=instrprof -S | FileCheck %s --check-prefixes=COMMON,COFF
-; OTHER: @__llvm_profile_runtime = external global i32
+; MACHO: @__llvm_profile_runtime = external global i32
; LINUX-NOT: @__llvm_profile_runtime = external global i32
@__profn_foo = hidden constant [3 x i8] c"foo"
@@ -32,18 +32,15 @@ define weak void @foo_weak() {
; COMMON: @"__profc_linkage.ll:foo_internal" = internal global
; COMMON: @"__profd_linkage.ll:foo_internal" = internal global
-; COFF: @"__profc_linkage.ll:foo_internal" = internal global
-; COFF: @"__profd_linkage.ll:foo_internal" = internal global
define internal void @foo_internal() {
call void @llvm.instrprof.increment(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @"__profn_linkage.ll:foo_internal", i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
}
; COMMON: @__profc_foo_inline = linkonce_odr hidden global
+; COFF-SAME: section ".lprfc", align 8
; COMMON: @__profd_foo_inline = linkonce_odr hidden global
-; FIXME: Should we put a comdat here?
-; COFF: @__profc_foo_inline = linkonce_odr hidden global {{.*}}section ".lprfc", align 8
-; COFF: @__profd_foo_inline = linkonce_odr hidden global {{.*}}section ".lprfd", align 8
+; COFF-SAME: section ".lprfd", align 8
define linkonce_odr void @foo_inline() {
call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__profn_foo_inline, i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
@@ -51,8 +48,8 @@ define linkonce_odr void @foo_inline() {
; LINUX: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_cnts", comdat($__profv_foo_extern), align 8
; LINUX: @__profd_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_data", comdat($__profv_foo_extern), align 8
-; OTHER: @__profc_foo_extern = linkonce_odr hidden global
-; OTHER: @__profd_foo_extern = linkonce_odr hidden global
+; MACHO: @__profc_foo_extern = linkonce_odr hidden global
+; MACHO: @__profd_foo_extern = linkonce_odr hidden global
; COFF: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfc", comdat, align 8
; COFF: @__profd_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfd", comdat($__profc_foo_extern), align 8
define available_externally void @foo_extern() {
@@ -62,10 +59,10 @@ define available_externally void @foo_extern() {
declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
-; OTHER: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} {
-; OTHER: %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime
-; OTHER: ret i32 %[[REG]]
-; OTHER: }
+; MACHO: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} {
+; MACHO: %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime
+; MACHO: ret i32 %[[REG]]
+; MACHO: }
; COFF: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} comdat {
; LINUX-NOT: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} {
; LINUX-NOT: %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime
diff --git a/test/Instrumentation/InstrProfiling/platform.ll b/test/Instrumentation/InstrProfiling/platform.ll
index c0c711054ff1..dbdde08b8152 100644
--- a/test/Instrumentation/InstrProfiling/platform.ll
+++ b/test/Instrumentation/InstrProfiling/platform.ll
@@ -2,27 +2,29 @@
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s -check-prefix=MACHO
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -passes=instrprof -S | FileCheck %s -check-prefix=MACHO
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s -check-prefix=LINUX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=instrprof -S | FileCheck %s -check-prefix=LINUX
-; RUN: opt < %s -mtriple=x86_64-unknown-freebsd -instrprof -S | FileCheck %s -check-prefix=FREEBSD
-; RUN: opt < %s -mtriple=x86_64-unknown-freebsd -passes=instrprof -S | FileCheck %s -check-prefix=FREEBSD
-; RUN: opt < %s -mtriple=x86_64-scei-ps4 -instrprof -S | FileCheck %s -check-prefix=PS4
-; RUN: opt < %s -mtriple=x86_64-scei-ps4 -passes=instrprof -S | FileCheck %s -check-prefix=PS4
-; RUN: opt < %s -mtriple=x86_64-pc-solaris -instrprof -S | FileCheck %s -check-prefix=SOLARIS
-; RUN: opt < %s -mtriple=x86_64-pc-solaris -passes=instrprof -S | FileCheck %s -check-prefix=SOLARIS
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s -check-prefixes=LINUX,ELF
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=instrprof -S | FileCheck %s -check-prefixes=LINUX,ELF
+; RUN: opt < %s -mtriple=x86_64-unknown-freebsd -instrprof -S | FileCheck %s -check-prefixes=FREEBSD,ELF
+; RUN: opt < %s -mtriple=x86_64-unknown-freebsd -passes=instrprof -S | FileCheck %s -check-prefixes=FREEBSD,ELF
+; RUN: opt < %s -mtriple=x86_64-scei-ps4 -instrprof -S | FileCheck %s -check-prefixes=PS4,ELF
+; RUN: opt < %s -mtriple=x86_64-scei-ps4 -passes=instrprof -S | FileCheck %s -check-prefixes=PS4,ELF
+; RUN: opt < %s -mtriple=x86_64-pc-solaris -instrprof -S | FileCheck %s -check-prefixes=SOLARIS,ELF
+; RUN: opt < %s -mtriple=x86_64-pc-solaris -passes=instrprof -S | FileCheck %s -check-prefixes=SOLARIS,ELF
+; RUN: opt < %s -mtriple=x86_64-pc-windows -instrprof -S | FileCheck %s -check-prefix=WINDOWS
+; RUN: opt < %s -mtriple=x86_64-pc-windows -passes=instrprof -S | FileCheck %s -check-prefix=WINDOWS
@__profn_foo = hidden constant [3 x i8] c"foo"
; MACHO-NOT: __profn_foo
; ELF-NOT: __profn_foo
+; WINDOWS-NOT: __profn_foo
; MACHO: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
; ELF: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
+; WINDOWS: @__profc_foo = hidden global [1 x i64] zeroinitializer, section ".lprfc", align 8
; MACHO: @__profd_foo = hidden {{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8
-; LINUX: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
-; FREEBSD: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
-; PS4: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
-; SOLARIS: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
+; ELF: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
+; WINDOWS: @__profd_foo = hidden {{.*}}, section ".lprfd", align 8
; ELF: @__llvm_prf_nm = private constant [{{.*}} x i8] c"{{.*}}", section "{{.*}}__llvm_prf_names"
@@ -40,10 +42,21 @@ declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
; LINUX-NOT: define internal void @__llvm_profile_register_functions
; FREEBSD-NOT: define internal void @__llvm_profile_register_functions
; PS4-NOT: define internal void @__llvm_profile_register_functions
+
+;; PR38340: When dynamic registration is used, we had a bug where we'd register
+;; something that's not a __profd_* variable.
+
+; WINDOWS: define internal void @__llvm_profile_register_functions()
+; WINDOWS-NOT: __llvm_profile_runtime_user
+; WINDOWS: ret void
+
; SOLARIS: define internal void @__llvm_profile_register_functions
+; SOLARIS-NOT: __llvm_profile_runtime_user
+; SOLARIS: ret void
; MACHO-NOT: define internal void @__llvm_profile_init
; LINUX-NOT: define internal void @__llvm_profile_init
; FREEBSD-NOT: define internal void @__llvm_profile_init
; PS4-NOT: define internal void @__llvm_profile_init
; SOLARIS: define internal void @__llvm_profile_init
+; WINDOWS: define internal void @__llvm_profile_init
diff --git a/test/MC/AArch64/SVE/abs.s b/test/MC/AArch64/SVE/abs.s
index 6341c4f4885d..460f846cdc03 100644
--- a/test/MC/AArch64/SVE/abs.s
+++ b/test/MC/AArch64/SVE/abs.s
@@ -54,3 +54,31 @@ abs z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xd6,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf d6 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+abs z4.d, p7/m, z31.d
+// CHECK-INST: abs z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd6,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d6 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+abs z4.d, p7/m, z31.d
+// CHECK-INST: abs z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd6,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d6 04 <unknown>
diff --git a/test/MC/AArch64/SVE/add-diagnostics.s b/test/MC/AArch64/SVE/add-diagnostics.s
index 23042b6708c0..6d95a645f068 100644
--- a/test/MC/AArch64/SVE/add-diagnostics.s
+++ b/test/MC/AArch64/SVE/add-diagnostics.s
@@ -144,3 +144,25 @@ add z0.d, z0.d, #65536
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 255] or a multiple of 256 in range [256, 65280]
// CHECK-NEXT: add z0.d, z0.d, #65536
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+add z31.d, z31.d, #65280
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: add z31.d, z31.d, #65280
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23.s, p0/z, z30.s
+add z23.s, z13.s, z8.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: add z23.s, z13.s, z8.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23, z30
+add z23.s, z13.s, z8.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: add z23.s, z13.s, z8.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/add.s b/test/MC/AArch64/SVE/add.s
index f477eb956871..2064181d18d2 100644
--- a/test/MC/AArch64/SVE/add.s
+++ b/test/MC/AArch64/SVE/add.s
@@ -283,3 +283,43 @@ add z31.d, z31.d, #65280
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff ff e0 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.b, p7/z, z6.b
+// CHECK-INST: movprfx z4.b, p7/z, z6.b
+// CHECK-ENCODING: [0xc4,0x3c,0x10,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c 10 04 <unknown>
+
+add z4.b, p7/m, z4.b, z31.b
+// CHECK-INST: add z4.b, p7/m, z4.b, z31.b
+// CHECK-ENCODING: [0xe4,0x1f,0x00,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f 00 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+add z4.b, p7/m, z4.b, z31.b
+// CHECK-INST: add z4.b, p7/m, z4.b, z31.b
+// CHECK-ENCODING: [0xe4,0x1f,0x00,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f 00 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+add z31.d, z31.d, #65280
+// CHECK-INST: add z31.d, z31.d, #65280
+// CHECK-ENCODING: [0xff,0xff,0xe0,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff ff e0 25 <unknown>
diff --git a/test/MC/AArch64/SVE/adr-diagnostics.s b/test/MC/AArch64/SVE/adr-diagnostics.s
index 99890ff79304..2bab1f7faeaa 100644
--- a/test/MC/AArch64/SVE/adr-diagnostics.s
+++ b/test/MC/AArch64/SVE/adr-diagnostics.s
@@ -57,3 +57,19 @@ adr z0.d, [z0.d, z0.d, sxtw #4]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid shift/extend specified, expected 'z[0..31].d, (lsl|uxtw|sxtw) #3'
// CHECK-NEXT: adr z0.d, [z0.d, z0.d, sxtw #4]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+adr z0.d, [z0.d, z0.d, sxtw #3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: adr z0.d, [z0.d, z0.d, sxtw #3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+adr z0.d, [z0.d, z0.d, sxtw #3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: adr z0.d, [z0.d, z0.d, sxtw #3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/and-diagnostics.s b/test/MC/AArch64/SVE/and-diagnostics.s
index 2ea6b3eb2721..ff7332e60e70 100644
--- a/test/MC/AArch64/SVE/and-diagnostics.s
+++ b/test/MC/AArch64/SVE/and-diagnostics.s
@@ -92,3 +92,25 @@ and p0.b, p0/m, p1.b, p2.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: and p0.b, p0/m, p1.b, p2.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+and z0.d, z0.d, #0x6
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: and z0.d, z0.d, #0x6
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23.d, p0/z, z30.d
+and z23.d, z13.d, z8.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: and z23.d, z13.d, z8.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23, z30
+and z23.d, z13.d, z8.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: and z23.d, z13.d, z8.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/and.s b/test/MC/AArch64/SVE/and.s
index 88e2439c44ed..0d0edc73e8f5 100644
--- a/test/MC/AArch64/SVE/and.s
+++ b/test/MC/AArch64/SVE/and.s
@@ -108,3 +108,43 @@ and p15.b, p15/z, p15.b, p15.b
// CHECK-ENCODING: [0xef,0x7d,0x0f,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ef 7d 0f 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+and z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: and z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xda,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f da 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+and z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: and z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xda,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f da 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+and z0.d, z0.d, #0x6
+// CHECK-INST: and z0.d, z0.d, #0x6
+// CHECK-ENCODING: [0x20,0xf8,0x83,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 f8 83 05 <unknown>
diff --git a/test/MC/AArch64/SVE/andv-diagnostics.s b/test/MC/AArch64/SVE/andv-diagnostics.s
index 60a42f826049..2048d77ff900 100644
--- a/test/MC/AArch64/SVE/andv-diagnostics.s
+++ b/test/MC/AArch64/SVE/andv-diagnostics.s
@@ -31,4 +31,19 @@ andv v0.2d, p7, z31.d
andv h0, p8, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: andv h0, p8, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+andv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: andv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+andv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: andv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/asr-diagnostics.s b/test/MC/AArch64/SVE/asr-diagnostics.s
index a4811324d264..9cec835dbe2f 100644
--- a/test/MC/AArch64/SVE/asr-diagnostics.s
+++ b/test/MC/AArch64/SVE/asr-diagnostics.s
@@ -122,3 +122,31 @@ asr z0.b, p8/m, z0.b, z1.b
// CHECK-NEXT: asr z0.b, p8/m, z0.b, z1.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+asr z31.d, z31.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: asr z31.d, z31.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+asr z31.d, z31.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: asr z31.d, z31.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+asr z0.s, z1.s, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: asr z0.s, z1.s, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+asr z0.s, z1.s, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: asr z0.s, z1.s, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/asr.s b/test/MC/AArch64/SVE/asr.s
index 7204a27155d4..d514eda2af13 100644
--- a/test/MC/AArch64/SVE/asr.s
+++ b/test/MC/AArch64/SVE/asr.s
@@ -162,3 +162,55 @@ asr z0.s, z1.s, z2.d
// CHECK-ENCODING: [0x20,0x80,0xa2,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 80 a2 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+asr z31.d, p0/m, z31.d, #64
+// CHECK-INST: asr z31.d, p0/m, z31.d, #64
+// CHECK-ENCODING: [0x1f,0x80,0x80,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 80 80 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+asr z31.d, p0/m, z31.d, #64
+// CHECK-INST: asr z31.d, p0/m, z31.d, #64
+// CHECK-ENCODING: [0x1f,0x80,0x80,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 80 80 04 <unknown>
+
+movprfx z0.s, p0/z, z7.s
+// CHECK-INST: movprfx z0.s, p0/z, z7.s
+// CHECK-ENCODING: [0xe0,0x20,0x90,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 20 90 04 <unknown>
+
+asr z0.s, p0/m, z0.s, z1.d
+// CHECK-INST: asr z0.s, p0/m, z0.s, z1.d
+// CHECK-ENCODING: [0x20,0x80,0x98,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 80 98 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+asr z0.s, p0/m, z0.s, z1.d
+// CHECK-INST: asr z0.s, p0/m, z0.s, z1.d
+// CHECK-ENCODING: [0x20,0x80,0x98,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 80 98 04 <unknown>
diff --git a/test/MC/AArch64/SVE/asrd.s b/test/MC/AArch64/SVE/asrd.s
index 69805517c1cb..78fbbba5b338 100644
--- a/test/MC/AArch64/SVE/asrd.s
+++ b/test/MC/AArch64/SVE/asrd.s
@@ -54,3 +54,31 @@ asrd z31.d, p0/m, z31.d, #64
// CHECK-ENCODING: [0x1f,0x80,0x84,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 1f 80 84 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+asrd z31.d, p0/m, z31.d, #64
+// CHECK-INST: asrd z31.d, p0/m, z31.d, #64
+// CHECK-ENCODING: [0x1f,0x80,0x84,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 80 84 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+asrd z31.d, p0/m, z31.d, #64
+// CHECK-INST: asrd z31.d, p0/m, z31.d, #64
+// CHECK-ENCODING: [0x1f,0x80,0x84,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 80 84 04 <unknown>
diff --git a/test/MC/AArch64/SVE/asrr.s b/test/MC/AArch64/SVE/asrr.s
index e7f7cc5afa2c..f9cc7ea6f8ee 100644
--- a/test/MC/AArch64/SVE/asrr.s
+++ b/test/MC/AArch64/SVE/asrr.s
@@ -30,3 +30,31 @@ asrr z0.d, p0/m, z0.d, z0.d
// CHECK-ENCODING: [0x00,0x80,0xd4,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 00 80 d4 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z5.d, p0/z, z7.d
+// CHECK-INST: movprfx z5.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe5,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 20 d0 04 <unknown>
+
+asrr z5.d, p0/m, z5.d, z0.d
+// CHECK-INST: asrr z5.d, p0/m, z5.d, z0.d
+// CHECK-ENCODING: [0x05,0x80,0xd4,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 80 d4 04 <unknown>
+
+movprfx z5, z7
+// CHECK-INST: movprfx z5, z7
+// CHECK-ENCODING: [0xe5,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 bc 20 04 <unknown>
+
+asrr z5.d, p0/m, z5.d, z0.d
+// CHECK-INST: asrr z5.d, p0/m, z5.d, z0.d
+// CHECK-ENCODING: [0x05,0x80,0xd4,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 80 d4 04 <unknown>
diff --git a/test/MC/AArch64/SVE/bic-diagnostics.s b/test/MC/AArch64/SVE/bic-diagnostics.s
index 61d0231e4cf4..abdd52028d01 100644
--- a/test/MC/AArch64/SVE/bic-diagnostics.s
+++ b/test/MC/AArch64/SVE/bic-diagnostics.s
@@ -92,3 +92,25 @@ bic p0.b, p0/m, p1.b, p2.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: bic p0.b, p0/m, p1.b, p2.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+bic z0.d, z0.d, #0x6
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: bic z0.d, z0.d, #0x6
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23.d, p0/z, z30.d
+bic z23.d, z13.d, z8.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bic z23.d, z13.d, z8.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23, z30
+bic z23.d, z13.d, z8.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bic z23.d, z13.d, z8.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/bic.s b/test/MC/AArch64/SVE/bic.s
index bd19fcd9fcfd..c9e6d9b82665 100644
--- a/test/MC/AArch64/SVE/bic.s
+++ b/test/MC/AArch64/SVE/bic.s
@@ -102,3 +102,43 @@ bic p0.b, p0/z, p0.b, p0.b
// CHECK-ENCODING: [0x10,0x40,0x00,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 10 40 00 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+bic z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: bic z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xdb,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f db 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+bic z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: bic z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xdb,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f db 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+bic z0.d, z0.d, #0x6
+// CHECK-INST: and z0.d, z0.d, #0xfffffffffffffff9
+// CHECK-ENCODING: [0xa0,0xef,0x83,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: a0 ef 83 05 <unknown>
diff --git a/test/MC/AArch64/SVE/brka-diagnostics.s b/test/MC/AArch64/SVE/brka-diagnostics.s
new file mode 100644
index 000000000000..140ac004fe07
--- /dev/null
+++ b/test/MC/AArch64/SVE/brka-diagnostics.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Only .b is supported
+
+brka p0.s, p15/z, p15.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: brka p0.s, p15/z, p15.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/brka.s b/test/MC/AArch64/SVE/brka.s
new file mode 100644
index 000000000000..d87e7632e3fd
--- /dev/null
+++ b/test/MC/AArch64/SVE/brka.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+brka p0.b, p15/m, p15.b
+// CHECK-INST: brka p0.b, p15/m, p15.b
+// CHECK-ENCODING: [0xf0,0x7d,0x10,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: f0 7d 10 25 <unknown>
+
+brka p0.b, p15/z, p15.b
+// CHECK-INST: brka p0.b, p15/z, p15.b
+// CHECK-ENCODING: [0xe0,0x7d,0x10,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 7d 10 25 <unknown>
diff --git a/test/MC/AArch64/SVE/brkas-diagnostics.s b/test/MC/AArch64/SVE/brkas-diagnostics.s
new file mode 100644
index 000000000000..14fe7cf6ba6d
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkas-diagnostics.s
@@ -0,0 +1,19 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Only .b is supported
+
+brkas p0.s, p15/z, p15.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: brkas p0.s, p15/z, p15.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// flag-setting variant does not have merging predication
+
+brkas p0.b, p15/m, p15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
+// CHECK-NEXT: brkas p0.b, p15/m, p15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/brkas.s b/test/MC/AArch64/SVE/brkas.s
new file mode 100644
index 000000000000..f75c5bc8069d
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkas.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+brkas p0.b, p15/z, p15.b
+// CHECK-INST: brkas p0.b, p15/z, p15.b
+// CHECK-ENCODING: [0xe0,0x7d,0x50,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 7d 50 25 <unknown>
diff --git a/test/MC/AArch64/SVE/brkb-diagnostics.s b/test/MC/AArch64/SVE/brkb-diagnostics.s
new file mode 100644
index 000000000000..7f87f15d3885
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkb-diagnostics.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Only .b is supported
+
+brkb p0.s, p15/z, p15.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: brkb p0.s, p15/z, p15.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/brkb.s b/test/MC/AArch64/SVE/brkb.s
new file mode 100644
index 000000000000..393d29449588
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkb.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+brkb p0.b, p15/m, p15.b
+// CHECK-INST: brkb p0.b, p15/m, p15.b
+// CHECK-ENCODING: [0xf0,0x7d,0x90,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: f0 7d 90 25 <unknown>
+
+brkb p0.b, p15/z, p15.b
+// CHECK-INST: brkb p0.b, p15/z, p15.b
+// CHECK-ENCODING: [0xe0,0x7d,0x90,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 7d 90 25 <unknown>
diff --git a/test/MC/AArch64/SVE/brkbs-diagnostics.s b/test/MC/AArch64/SVE/brkbs-diagnostics.s
new file mode 100644
index 000000000000..bcb9a6464b1e
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkbs-diagnostics.s
@@ -0,0 +1,19 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Only .b is supported
+
+brkbs p0.s, p15/z, p15.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: brkbs p0.s, p15/z, p15.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// flag-setting variant does not have merging predication
+
+brkbs p0.b, p15/m, p15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
+// CHECK-NEXT: brkbs p0.b, p15/m, p15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/brkbs.s b/test/MC/AArch64/SVE/brkbs.s
new file mode 100644
index 000000000000..708ec91345c3
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkbs.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+brkbs p0.b, p15/z, p15.b
+// CHECK-INST: brkbs p0.b, p15/z, p15.b
+// CHECK-ENCODING: [0xe0,0x7d,0xd0,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 7d d0 25 <unknown>
diff --git a/test/MC/AArch64/SVE/brkn-diagnostics.s b/test/MC/AArch64/SVE/brkn-diagnostics.s
new file mode 100644
index 000000000000..87587fbe373c
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkn-diagnostics.s
@@ -0,0 +1,28 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// BRKN only supports merging predication
+
+brkn p0.b, p15/m, p1.b, p0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
+// CHECK-NEXT: brkn p0.b, p15/m, p1.b, p0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Check tied operand constraints
+
+brkn p0.b, p15/z, p1.b, p1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: brkn p0.b, p15/z, p1.b, p1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Only .b is supported
+
+brkn p15.s, p15/z, p15.s, p15.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: brkn p15.s, p15/z, p15.s, p15.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/brkn.s b/test/MC/AArch64/SVE/brkn.s
new file mode 100644
index 000000000000..8494e547732f
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkn.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+brkn p0.b, p15/z, p1.b, p0.b
+// CHECK-INST: brkn p0.b, p15/z, p1.b, p0.b
+// CHECK-ENCODING: [0x20,0x7c,0x18,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 7c 18 25 <unknown>
+
+brkn p15.b, p15/z, p15.b, p15.b
+// CHECK-INST: brkn p15.b, p15/z, p15.b, p15.b
+// CHECK-ENCODING: [0xef,0x7d,0x18,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ef 7d 18 25 <unknown>
diff --git a/test/MC/AArch64/SVE/brkns-diagnostics.s b/test/MC/AArch64/SVE/brkns-diagnostics.s
new file mode 100644
index 000000000000..c22d4cdb0d8d
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkns-diagnostics.s
@@ -0,0 +1,28 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// BRKN only supports merging predication
+
+brkns p0.b, p15/m, p1.b, p0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
+// CHECK-NEXT: brkns p0.b, p15/m, p1.b, p0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Check tied operand constraints
+
+brkns p0.b, p15/z, p1.b, p1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: brkns p0.b, p15/z, p1.b, p1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Only .b is supported
+
+brkns p15.s, p15/z, p15.s, p15.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: brkns p15.s, p15/z, p15.s, p15.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/brkns.s b/test/MC/AArch64/SVE/brkns.s
new file mode 100644
index 000000000000..6fd47f69c91f
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkns.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+brkns p0.b, p15/z, p1.b, p0.b
+// CHECK-INST: brkns p0.b, p15/z, p1.b, p0.b
+// CHECK-ENCODING: [0x20,0x7c,0x58,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 7c 58 25 <unknown>
+
+brkns p15.b, p15/z, p15.b, p15.b
+// CHECK-INST: brkns p15.b, p15/z, p15.b, p15.b
+// CHECK-ENCODING: [0xef,0x7d,0x58,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ef 7d 58 25 <unknown>
diff --git a/test/MC/AArch64/SVE/brkpa-diagnostics.s b/test/MC/AArch64/SVE/brkpa-diagnostics.s
new file mode 100644
index 000000000000..d7693c62a279
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkpa-diagnostics.s
@@ -0,0 +1,11 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+brkpa p15.b, p15/m, p15.b, p15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: brkpa p15.b, p15/m, p15.b, p15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+brkpa p15.s, p15/z, p15.s, p15.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: brkpa p15.s, p15/z, p15.s, p15.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/brkpa.s b/test/MC/AArch64/SVE/brkpa.s
new file mode 100644
index 000000000000..c283ee2fdea6
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkpa.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+brkpa p0.b, p15/z, p1.b, p2.b
+// CHECK-INST: brkpa p0.b, p15/z, p1.b, p2.b
+// CHECK-ENCODING: [0x20,0xfc,0x02,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 fc 02 25 <unknown>
+
+brkpa p15.b, p15/z, p15.b, p15.b
+// CHECK-INST: brkpa p15.b, p15/z, p15.b, p15.b
+// CHECK-ENCODING: [0xef,0xfd,0x0f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ef fd 0f 25 <unknown>
diff --git a/test/MC/AArch64/SVE/brkpas-diagnostics.s b/test/MC/AArch64/SVE/brkpas-diagnostics.s
new file mode 100644
index 000000000000..a88e11075c74
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkpas-diagnostics.s
@@ -0,0 +1,11 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+brkpas p15.b, p15/m, p15.b, p15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: brkpas p15.b, p15/m, p15.b, p15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+brkpas p15.s, p15/z, p15.s, p15.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: brkpas p15.s, p15/z, p15.s, p15.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/brkpas.s b/test/MC/AArch64/SVE/brkpas.s
new file mode 100644
index 000000000000..81d590096681
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkpas.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+brkpas p0.b, p15/z, p1.b, p2.b
+// CHECK-INST: brkpas p0.b, p15/z, p1.b, p2.b
+// CHECK-ENCODING: [0x20,0xfc,0x42,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 fc 42 25 <unknown>
+
+brkpas p15.b, p15/z, p15.b, p15.b
+// CHECK-INST: brkpas p15.b, p15/z, p15.b, p15.b
+// CHECK-ENCODING: [0xef,0xfd,0x4f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ef fd 4f 25 <unknown>
diff --git a/test/MC/AArch64/SVE/brkpb-diagnostics.s b/test/MC/AArch64/SVE/brkpb-diagnostics.s
new file mode 100644
index 000000000000..e03f0bfbf93d
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkpb-diagnostics.s
@@ -0,0 +1,11 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+brkpb p15.b, p15/m, p15.b, p15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: brkpb p15.b, p15/m, p15.b, p15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+brkpb p15.s, p15/z, p15.s, p15.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: brkpb p15.s, p15/z, p15.s, p15.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/brkpb.s b/test/MC/AArch64/SVE/brkpb.s
new file mode 100644
index 000000000000..039e71f5e322
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkpb.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+brkpb p0.b, p15/z, p1.b, p2.b
+// CHECK-INST: brkpb p0.b, p15/z, p1.b, p2.b
+// CHECK-ENCODING: [0x30,0xfc,0x02,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 30 fc 02 25 <unknown>
+
+brkpb p15.b, p15/z, p15.b, p15.b
+// CHECK-INST: brkpb p15.b, p15/z, p15.b, p15.b
+// CHECK-ENCODING: [0xff,0xfd,0x0f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff fd 0f 25 <unknown>
diff --git a/test/MC/AArch64/SVE/brkpbs-diagnostics.s b/test/MC/AArch64/SVE/brkpbs-diagnostics.s
new file mode 100644
index 000000000000..25e230e2e63c
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkpbs-diagnostics.s
@@ -0,0 +1,11 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+brkpbs p15.b, p15/m, p15.b, p15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: brkpbs p15.b, p15/m, p15.b, p15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+brkpbs p15.s, p15/z, p15.s, p15.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: brkpbs p15.s, p15/z, p15.s, p15.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/brkpbs.s b/test/MC/AArch64/SVE/brkpbs.s
new file mode 100644
index 000000000000..1d44178659dd
--- /dev/null
+++ b/test/MC/AArch64/SVE/brkpbs.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+brkpbs p0.b, p15/z, p1.b, p2.b
+// CHECK-INST: brkpbs p0.b, p15/z, p1.b, p2.b
+// CHECK-ENCODING: [0x30,0xfc,0x42,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 30 fc 42 25 <unknown>
+
+brkpbs p15.b, p15/z, p15.b, p15.b
+// CHECK-INST: brkpbs p15.b, p15/z, p15.b, p15.b
+// CHECK-ENCODING: [0xff,0xfd,0x4f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff fd 4f 25 <unknown>
diff --git a/test/MC/AArch64/SVE/clasta-diagnostics.s b/test/MC/AArch64/SVE/clasta-diagnostics.s
index cfc1db15b2cd..c0924bf689c7 100644
--- a/test/MC/AArch64/SVE/clasta-diagnostics.s
+++ b/test/MC/AArch64/SVE/clasta-diagnostics.s
@@ -77,3 +77,37 @@ clasta z0.d, p7, z0.d, z31.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: clasta z0.d, p7, z0.d, z31.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+clasta x0, p7, x0, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: clasta x0, p7, x0, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+clasta x0, p7, x0, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: clasta x0, p7, x0, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.d, p7/z, z6.d
+clasta d0, p7, d0, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: clasta d0, p7, d0, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+clasta d0, p7, d0, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: clasta d0, p7, d0, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p7/z, z7.d
+clasta z0.d, p7, z0.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: clasta z0.d, p7, z0.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/clasta.s b/test/MC/AArch64/SVE/clasta.s
index 3e120310298b..05f6f92afac9 100644
--- a/test/MC/AArch64/SVE/clasta.s
+++ b/test/MC/AArch64/SVE/clasta.s
@@ -78,3 +78,19 @@ clasta z0.d, p7, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xe8,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f e8 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+clasta z0.d, p7, z0.d, z31.d
+// CHECK-INST: clasta z0.d, p7, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xe8,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f e8 05 <unknown>
diff --git a/test/MC/AArch64/SVE/clastb-diagnostics.s b/test/MC/AArch64/SVE/clastb-diagnostics.s
index 62f696458c3e..0f0f677ec225 100644
--- a/test/MC/AArch64/SVE/clastb-diagnostics.s
+++ b/test/MC/AArch64/SVE/clastb-diagnostics.s
@@ -77,3 +77,37 @@ clastb z0.d, p7, z0.d, z31.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: clastb z0.d, p7, z0.d, z31.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+clastb x0, p7, x0, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: clastb x0, p7, x0, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+clastb x0, p7, x0, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: clastb x0, p7, x0, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.d, p7/z, z6.d
+clastb d0, p7, d0, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: clastb d0, p7, d0, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+clastb d0, p7, d0, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: clastb d0, p7, d0, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p7/z, z7.d
+clastb z0.d, p7, z0.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: clastb z0.d, p7, z0.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/clastb.s b/test/MC/AArch64/SVE/clastb.s
index 654ab1133e78..6785edb87cee 100644
--- a/test/MC/AArch64/SVE/clastb.s
+++ b/test/MC/AArch64/SVE/clastb.s
@@ -78,3 +78,19 @@ clastb z0.d, p7, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xe9,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f e9 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+clastb z0.d, p7, z0.d, z31.d
+// CHECK-INST: clastb z0.d, p7, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xe9,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f e9 05 <unknown>
diff --git a/test/MC/AArch64/SVE/cls.s b/test/MC/AArch64/SVE/cls.s
index db1c0b2a7cea..e860a7b40401 100644
--- a/test/MC/AArch64/SVE/cls.s
+++ b/test/MC/AArch64/SVE/cls.s
@@ -30,3 +30,31 @@ cls z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xd8,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf d8 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+cls z4.d, p7/m, z31.d
+// CHECK-INST: cls z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd8,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d8 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+cls z4.d, p7/m, z31.d
+// CHECK-INST: cls z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd8,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d8 04 <unknown>
diff --git a/test/MC/AArch64/SVE/clz.s b/test/MC/AArch64/SVE/clz.s
index 76e9d1bf83f4..ff69f383e70c 100644
--- a/test/MC/AArch64/SVE/clz.s
+++ b/test/MC/AArch64/SVE/clz.s
@@ -30,3 +30,31 @@ clz z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xd9,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf d9 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+clz z4.d, p7/m, z31.d
+// CHECK-INST: clz z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd9,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d9 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+clz z4.d, p7/m, z31.d
+// CHECK-INST: clz z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd9,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d9 04 <unknown>
diff --git a/test/MC/AArch64/SVE/cmpeq-diagnostics.s b/test/MC/AArch64/SVE/cmpeq-diagnostics.s
index bcda4ac02821..fbf292ec8872 100644
--- a/test/MC/AArch64/SVE/cmpeq-diagnostics.s
+++ b/test/MC/AArch64/SVE/cmpeq-diagnostics.s
@@ -74,3 +74,31 @@ cmpeq p0.s, p0/z, z0.s, #16
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-16, 15].
// CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #16
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+cmpeq p0.d, p0/z, z0.d, #15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmpeq p0.d, p0/z, z0.d, #15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+cmpeq p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmpeq p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/cmpge-diagnostics.s b/test/MC/AArch64/SVE/cmpge-diagnostics.s
index 49520ed5ecd5..44ab473808e7 100644
--- a/test/MC/AArch64/SVE/cmpge-diagnostics.s
+++ b/test/MC/AArch64/SVE/cmpge-diagnostics.s
@@ -74,3 +74,31 @@ cmpge p0.s, p0/z, z0.s, #16
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-16, 15].
// CHECK-NEXT: cmpge p0.s, p0/z, z0.s, #16
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+cmpge p0.d, p0/z, z0.d, #15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpge p0.d, p0/z, z0.d, #15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmpge p0.d, p0/z, z0.d, #15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpge p0.d, p0/z, z0.d, #15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+cmpge p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmpge p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/cmpgt-diagnostics.s b/test/MC/AArch64/SVE/cmpgt-diagnostics.s
index fcc972c41011..1745aab97ea7 100644
--- a/test/MC/AArch64/SVE/cmpgt-diagnostics.s
+++ b/test/MC/AArch64/SVE/cmpgt-diagnostics.s
@@ -74,3 +74,31 @@ cmpgt p0.s, p0/z, z0.s, #16
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-16, 15].
// CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, #16
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+cmpgt p0.d, p0/z, z0.d, #15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpgt p0.d, p0/z, z0.d, #15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmpgt p0.d, p0/z, z0.d, #15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpgt p0.d, p0/z, z0.d, #15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+cmpgt p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmpgt p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/cmphi-diagnostics.s b/test/MC/AArch64/SVE/cmphi-diagnostics.s
index b0b3010183f3..5d7c0d82f5bf 100644
--- a/test/MC/AArch64/SVE/cmphi-diagnostics.s
+++ b/test/MC/AArch64/SVE/cmphi-diagnostics.s
@@ -74,3 +74,31 @@ cmphi p0.s, p0/z, z0.s, #128
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 127].
// CHECK-NEXT: cmphi p0.s, p0/z, z0.s, #128
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+cmphi p0.d, p0/z, z0.d, #127
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmphi p0.d, p0/z, z0.d, #127
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmphi p0.d, p0/z, z0.d, #127
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmphi p0.d, p0/z, z0.d, #127
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+cmphi p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmphi p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/cmphs-diagnostics.s b/test/MC/AArch64/SVE/cmphs-diagnostics.s
index 955c1af1b8d9..5bfff46ee406 100644
--- a/test/MC/AArch64/SVE/cmphs-diagnostics.s
+++ b/test/MC/AArch64/SVE/cmphs-diagnostics.s
@@ -74,3 +74,31 @@ cmphs p0.s, p0/z, z0.s, #128
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 127].
// CHECK-NEXT: cmphs p0.s, p0/z, z0.s, #128
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+cmphs p0.d, p0/z, z0.d, #127
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmphs p0.d, p0/z, z0.d, #127
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmphs p0.d, p0/z, z0.d, #127
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmphs p0.d, p0/z, z0.d, #127
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+cmphs p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmphs p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/cmple-diagnostics.s b/test/MC/AArch64/SVE/cmple-diagnostics.s
index e40ab1419d5d..d03d2153fd69 100644
--- a/test/MC/AArch64/SVE/cmple-diagnostics.s
+++ b/test/MC/AArch64/SVE/cmple-diagnostics.s
@@ -74,3 +74,31 @@ cmple p0.s, p0/z, z0.s, #16
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-16, 15].
// CHECK-NEXT: cmple p0.s, p0/z, z0.s, #16
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+cmple p0.d, p0/z, z0.d, #15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmple p0.d, p0/z, z0.d, #15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmple p0.d, p0/z, z0.d, #15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmple p0.d, p0/z, z0.d, #15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+cmple p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmple p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmple p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmple p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/cmplo-diagnostics.s b/test/MC/AArch64/SVE/cmplo-diagnostics.s
index 825ad13b4dd7..ecbed6081ac2 100644
--- a/test/MC/AArch64/SVE/cmplo-diagnostics.s
+++ b/test/MC/AArch64/SVE/cmplo-diagnostics.s
@@ -74,3 +74,31 @@ cmplo p0.s, p0/z, z0.s, #128
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 127].
// CHECK-NEXT: cmplo p0.s, p0/z, z0.s, #128
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+cmplo p0.d, p0/z, z0.d, #127
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmplo p0.d, p0/z, z0.d, #127
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmplo p0.d, p0/z, z0.d, #127
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmplo p0.d, p0/z, z0.d, #127
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+cmplo p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmplo p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmplo p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmplo p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/cmpls-diagnostics.s b/test/MC/AArch64/SVE/cmpls-diagnostics.s
index 349a1b3ccf68..c42ce36ae3c9 100644
--- a/test/MC/AArch64/SVE/cmpls-diagnostics.s
+++ b/test/MC/AArch64/SVE/cmpls-diagnostics.s
@@ -74,3 +74,31 @@ cmpls p0.s, p0/z, z0.s, #128
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 127].
// CHECK-NEXT: cmpls p0.s, p0/z, z0.s, #128
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+cmpls p0.d, p0/z, z0.d, #127
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpls p0.d, p0/z, z0.d, #127
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmpls p0.d, p0/z, z0.d, #127
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpls p0.d, p0/z, z0.d, #127
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+cmpls p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpls p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmpls p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpls p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/cmplt-diagnostics.s b/test/MC/AArch64/SVE/cmplt-diagnostics.s
index 7878ed56d409..0f023e0f866f 100644
--- a/test/MC/AArch64/SVE/cmplt-diagnostics.s
+++ b/test/MC/AArch64/SVE/cmplt-diagnostics.s
@@ -74,3 +74,31 @@ cmplt p0.s, p0/z, z0.s, #16
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-16, 15].
// CHECK-NEXT: cmplt p0.s, p0/z, z0.s, #16
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+cmplt p0.d, p0/z, z0.d, #15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmplt p0.d, p0/z, z0.d, #15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmplt p0.d, p0/z, z0.d, #15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmplt p0.d, p0/z, z0.d, #15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+cmplt p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmplt p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmplt p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmplt p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/cmpne-diagnostics.s b/test/MC/AArch64/SVE/cmpne-diagnostics.s
index 98bd8b7857b7..74c74c7eaa7c 100644
--- a/test/MC/AArch64/SVE/cmpne-diagnostics.s
+++ b/test/MC/AArch64/SVE/cmpne-diagnostics.s
@@ -74,3 +74,31 @@ cmpne p0.s, p0/z, z0.s, #16
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-16, 15].
// CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #16
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+cmpne p0.d, p0/z, z0.d, #15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmpne p0.d, p0/z, z0.d, #15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+cmpne p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cmpne p0.s, p0/z, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/cnot.s b/test/MC/AArch64/SVE/cnot.s
index 06ac6c8660df..83f21816c859 100644
--- a/test/MC/AArch64/SVE/cnot.s
+++ b/test/MC/AArch64/SVE/cnot.s
@@ -30,3 +30,31 @@ cnot z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xdb,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf db 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+cnot z4.d, p7/m, z31.d
+// CHECK-INST: cnot z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xdb,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf db 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+cnot z4.d, p7/m, z31.d
+// CHECK-INST: cnot z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xdb,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf db 04 <unknown>
diff --git a/test/MC/AArch64/SVE/cnt.s b/test/MC/AArch64/SVE/cnt.s
index 4a81b1a9a9b9..86373cbe08e3 100644
--- a/test/MC/AArch64/SVE/cnt.s
+++ b/test/MC/AArch64/SVE/cnt.s
@@ -30,3 +30,31 @@ cnt z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xda,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf da 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+cnt z4.d, p7/m, z31.d
+// CHECK-INST: cnt z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xda,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf da 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+cnt z4.d, p7/m, z31.d
+// CHECK-INST: cnt z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xda,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf da 04 <unknown>
diff --git a/test/MC/AArch64/SVE/compact-diagnostics.s b/test/MC/AArch64/SVE/compact-diagnostics.s
index 817e97a83c5a..d252f60e55af 100644
--- a/test/MC/AArch64/SVE/compact-diagnostics.s
+++ b/test/MC/AArch64/SVE/compact-diagnostics.s
@@ -26,3 +26,19 @@ compact z31.h, p7, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: compact z31.h, p7, z31.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+compact z31.d, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: compact z31.d, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+compact z31.d, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: compact z31.d, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/cpy.s b/test/MC/AArch64/SVE/cpy.s
index 7d11d9288214..968bc5ddfe8d 100644
--- a/test/MC/AArch64/SVE/cpy.s
+++ b/test/MC/AArch64/SVE/cpy.s
@@ -275,3 +275,79 @@ cpy z21.d, p15/m, #-128, lsl #8
// CHECK-ENCODING: [0x15,0x70,0xdf,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 15 70 df 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p7/z, z6.d
+// CHECK-INST: movprfx z31.d, p7/z, z6.d
+// CHECK-ENCODING: [0xdf,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 3c d0 04 <unknown>
+
+cpy z31.d, p7/m, sp
+// CHECK-INST: mov z31.d, p7/m, sp
+// CHECK-ENCODING: [0xff,0xbf,0xe8,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff bf e8 05 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+cpy z31.d, p7/m, sp
+// CHECK-INST: mov z31.d, p7/m, sp
+// CHECK-ENCODING: [0xff,0xbf,0xe8,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff bf e8 05 <unknown>
+
+movprfx z21.d, p7/z, z28.d
+// CHECK-INST: movprfx z21.d, p7/z, z28.d
+// CHECK-ENCODING: [0x95,0x3f,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 3f d0 04 <unknown>
+
+cpy z21.d, p7/m, #-128, lsl #8
+// CHECK-INST: mov z21.d, p7/m, #-32768
+// CHECK-ENCODING: [0x15,0x70,0xd7,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 15 70 d7 05 <unknown>
+
+movprfx z21, z28
+// CHECK-INST: movprfx z21, z28
+// CHECK-ENCODING: [0x95,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 bf 20 04 <unknown>
+
+cpy z21.d, p15/m, #-128, lsl #8
+// CHECK-INST: mov z21.d, p15/m, #-32768
+// CHECK-ENCODING: [0x15,0x70,0xdf,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 15 70 df 05 <unknown>
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+cpy z4.d, p7/m, d31
+// CHECK-INST: mov z4.d, p7/m, d31
+// CHECK-ENCODING: [0xe4,0x9f,0xe0,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 9f e0 05 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+cpy z4.d, p7/m, d31
+// CHECK-INST: mov z4.d, p7/m, d31
+// CHECK-ENCODING: [0xe4,0x9f,0xe0,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 9f e0 05 <unknown>
diff --git a/test/MC/AArch64/SVE/ctermeq-diagnostics.s b/test/MC/AArch64/SVE/ctermeq-diagnostics.s
new file mode 100644
index 000000000000..74afc10cb572
--- /dev/null
+++ b/test/MC/AArch64/SVE/ctermeq-diagnostics.s
@@ -0,0 +1,25 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid scalar registers
+
+ctermeq w30, wsp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ctermeq w30, wsp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ctermeq w30, x0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ctermeq w30, x0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ctermeq wsp, w30
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ctermeq wsp, w30
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ctermeq x0, w30
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ctermeq x0, w30
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ctermeq.s b/test/MC/AArch64/SVE/ctermeq.s
new file mode 100644
index 000000000000..3dfac002350a
--- /dev/null
+++ b/test/MC/AArch64/SVE/ctermeq.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ctermeq w30, wzr
+// CHECK-INST: ctermeq w30, wzr
+// CHECK-ENCODING: [0xc0,0x23,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c0 23 bf 25 <unknown>
+
+ctermeq wzr, w30
+// CHECK-INST: ctermeq wzr, w30
+// CHECK-ENCODING: [0xe0,0x23,0xbe,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 23 be 25 <unknown>
+
+ctermeq x30, xzr
+// CHECK-INST: ctermeq x30, xzr
+// CHECK-ENCODING: [0xc0,0x23,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c0 23 ff 25 <unknown>
+
+ctermeq xzr, x30
+// CHECK-INST: ctermeq xzr, x30
+// CHECK-ENCODING: [0xe0,0x23,0xfe,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 23 fe 25 <unknown>
diff --git a/test/MC/AArch64/SVE/ctermne-diagnostics.s b/test/MC/AArch64/SVE/ctermne-diagnostics.s
new file mode 100644
index 000000000000..96346f44449f
--- /dev/null
+++ b/test/MC/AArch64/SVE/ctermne-diagnostics.s
@@ -0,0 +1,25 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid scalar registers
+
+ctermne w30, wsp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ctermne w30, wsp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ctermne w30, x0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ctermne w30, x0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ctermne wsp, w30
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ctermne wsp, w30
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ctermne x0, w30
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ctermne x0, w30
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ctermne.s b/test/MC/AArch64/SVE/ctermne.s
new file mode 100644
index 000000000000..54dc5e23a9c5
--- /dev/null
+++ b/test/MC/AArch64/SVE/ctermne.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ctermne w30, wzr
+// CHECK-INST: ctermne w30, wzr
+// CHECK-ENCODING: [0xd0,0x23,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: d0 23 bf 25 <unknown>
+
+ctermne wzr, w30
+// CHECK-INST: ctermne wzr, w30
+// CHECK-ENCODING: [0xf0,0x23,0xbe,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: f0 23 be 25 <unknown>
+
+ctermne x30, xzr
+// CHECK-INST: ctermne x30, xzr
+// CHECK-ENCODING: [0xd0,0x23,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: d0 23 ff 25 <unknown>
+
+ctermne xzr, x30
+// CHECK-INST: ctermne xzr, x30
+// CHECK-ENCODING: [0xf0,0x23,0xfe,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: f0 23 fe 25 <unknown>
diff --git a/test/MC/AArch64/SVE/decp-diagnostics.s b/test/MC/AArch64/SVE/decp-diagnostics.s
index 2c8cc4293319..b1e4c9144cca 100644
--- a/test/MC/AArch64/SVE/decp-diagnostics.s
+++ b/test/MC/AArch64/SVE/decp-diagnostics.s
@@ -36,3 +36,13 @@ decp x0, p0.q
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
// CHECK-NEXT: decp x0, p0.q
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+decp z31.d, p7
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: decp z31.d, p7
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/decp.s b/test/MC/AArch64/SVE/decp.s
index 4866c766aaff..8bbe726a422d 100644
--- a/test/MC/AArch64/SVE/decp.s
+++ b/test/MC/AArch64/SVE/decp.s
@@ -72,3 +72,19 @@ decp z31.d, p15
// CHECK-ENCODING: [0xff,0x81,0xed,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff 81 ed 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+decp z31.d, p15
+// CHECK-INST: decp z31.d, p15
+// CHECK-ENCODING: [0xff,0x81,0xed,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff 81 ed 25 <unknown>
diff --git a/test/MC/AArch64/SVE/dup-diagnostics.s b/test/MC/AArch64/SVE/dup-diagnostics.s
index 6636708c457e..f97ac83aae09 100644
--- a/test/MC/AArch64/SVE/dup-diagnostics.s
+++ b/test/MC/AArch64/SVE/dup-diagnostics.s
@@ -215,3 +215,43 @@ dup z24.q, z21.q[4]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: dup z24.q, z21.q[4]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.b, p0/z, z6.b
+dup z31.b, wsp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: dup z31.b, wsp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+dup z31.b, wsp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: dup z31.b, wsp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21.d, p0/z, z28.d
+dup z21.d, #32512
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: dup z21.d, #32512
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+dup z21.d, #32512
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: dup z21.d, #32512
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.d, p0/z, z6.d
+dup z31.d, z31.d[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: dup z31.d, z31.d[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+dup z31.d, z31.d[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: dup z31.d, z31.d[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/dupm-diagnostics.s b/test/MC/AArch64/SVE/dupm-diagnostics.s
index cb2277a1f871..f82c849437b0 100644
--- a/test/MC/AArch64/SVE/dupm-diagnostics.s
+++ b/test/MC/AArch64/SVE/dupm-diagnostics.s
@@ -37,3 +37,19 @@ dupm z15.d, #0xfffffffffffffffa
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expected compatible register or logical immediate
// CHECK-NEXT: dupm z15.d, #0xfffffffffffffffa
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+dupm z0.d, #0xfffffffffffffff9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: dupm z0.d, #0xfffffffffffffff9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+dupm z0.d, #0xfffffffffffffff9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: dupm z0.d, #0xfffffffffffffff9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/eon-diagnostics.s b/test/MC/AArch64/SVE/eon-diagnostics.s
index 5b6f59ce23de..ffdd7222cb20 100644
--- a/test/MC/AArch64/SVE/eon-diagnostics.s
+++ b/test/MC/AArch64/SVE/eon-diagnostics.s
@@ -50,3 +50,13 @@ eon z7.d, z8.d, #254
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
// CHECK-NEXT: eon z7.d, z8.d, #254
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+eon z0.d, z0.d, #0x6
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: eon z0.d, z0.d, #0x6
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/eon.s b/test/MC/AArch64/SVE/eon.s
index 9f6dd2655061..d03755d5a94e 100644
--- a/test/MC/AArch64/SVE/eon.s
+++ b/test/MC/AArch64/SVE/eon.s
@@ -54,3 +54,19 @@ eon z0.d, z0.d, #0x6
// CHECK-ENCODING: [0xa0,0xef,0x43,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: a0 ef 43 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+eon z0.d, z0.d, #0x6
+// CHECK-INST: eor z0.d, z0.d, #0xfffffffffffffff9
+// CHECK-ENCODING: [0xa0,0xef,0x43,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: a0 ef 43 05 <unknown>
diff --git a/test/MC/AArch64/SVE/eor-diagnostics.s b/test/MC/AArch64/SVE/eor-diagnostics.s
index dbed470e7bd4..e8d28737db22 100644
--- a/test/MC/AArch64/SVE/eor-diagnostics.s
+++ b/test/MC/AArch64/SVE/eor-diagnostics.s
@@ -92,3 +92,25 @@ eor p0.b, p0/m, p1.b, p2.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: eor p0.b, p0/m, p1.b, p2.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+eor z0.d, z0.d, #0x6
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: eor z0.d, z0.d, #0x6
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+eor z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: eor z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+eor z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: eor z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/eor.s b/test/MC/AArch64/SVE/eor.s
index 8c68ec9c7835..f7b4247ad80b 100644
--- a/test/MC/AArch64/SVE/eor.s
+++ b/test/MC/AArch64/SVE/eor.s
@@ -108,3 +108,43 @@ eor p15.b, p15/z, p15.b, p15.b
// CHECK-ENCODING: [0xef,0x7f,0x0f,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ef 7f 0f 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.b, p7/z, z6.b
+// CHECK-INST: movprfx z4.b, p7/z, z6.b
+// CHECK-ENCODING: [0xc4,0x3c,0x10,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c 10 04 <unknown>
+
+eor z4.b, p7/m, z4.b, z31.b
+// CHECK-INST: eor z4.b, p7/m, z4.b, z31.b
+// CHECK-ENCODING: [0xe4,0x1f,0x19,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f 19 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+eor z4.b, p7/m, z4.b, z31.b
+// CHECK-INST: eor z4.b, p7/m, z4.b, z31.b
+// CHECK-ENCODING: [0xe4,0x1f,0x19,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f 19 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+eor z0.d, z0.d, #0x6
+// CHECK-INST: eor z0.d, z0.d, #0x6
+// CHECK-ENCODING: [0x20,0xf8,0x43,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 f8 43 05 <unknown>
diff --git a/test/MC/AArch64/SVE/eorv-diagnostics.s b/test/MC/AArch64/SVE/eorv-diagnostics.s
index c182615be5d6..6f231a8c6269 100644
--- a/test/MC/AArch64/SVE/eorv-diagnostics.s
+++ b/test/MC/AArch64/SVE/eorv-diagnostics.s
@@ -31,4 +31,19 @@ eorv v0.2d, p7, z31.d
eorv h0, p8, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: eorv h0, p8, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+eorv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: eorv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+eorv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: eorv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ext-diagnostics.s b/test/MC/AArch64/SVE/ext-diagnostics.s
index 8f9bee79b85c..23ae5e2ddd5c 100644
--- a/test/MC/AArch64/SVE/ext-diagnostics.s
+++ b/test/MC/AArch64/SVE/ext-diagnostics.s
@@ -31,3 +31,13 @@ ext z0.b, z0.b, z1.b, #256
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 255].
// CHECK-NEXT: ext z0.b, z0.b, z1.b, #256
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.b, p0/z, z6.b
+ext z31.b, z31.b, z0.b, #255
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: ext z31.b, z31.b, z0.b, #255
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ext.s b/test/MC/AArch64/SVE/ext.s
index 2afc5f09771a..632f61eb107c 100644
--- a/test/MC/AArch64/SVE/ext.s
+++ b/test/MC/AArch64/SVE/ext.s
@@ -18,3 +18,19 @@ ext z31.b, z31.b, z0.b, #255
// CHECK-ENCODING: [0x1f,0x1c,0x3f,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 1f 1c 3f 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+ext z31.b, z31.b, z0.b, #255
+// CHECK-INST: ext z31.b, z31.b, z0.b, #255
+// CHECK-ENCODING: [0x1f,0x1c,0x3f,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 1c 3f 05 <unknown>
diff --git a/test/MC/AArch64/SVE/fabd.s b/test/MC/AArch64/SVE/fabd.s
index 98bbc5050a37..7549f554a95f 100644
--- a/test/MC/AArch64/SVE/fabd.s
+++ b/test/MC/AArch64/SVE/fabd.s
@@ -24,3 +24,31 @@ fabd z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xc8,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f c8 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fabd z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fabd z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc8,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c8 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fabd z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fabd z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc8,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c8 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fabs.s b/test/MC/AArch64/SVE/fabs.s
index c9d4d8405bef..1bd2c833d1cd 100644
--- a/test/MC/AArch64/SVE/fabs.s
+++ b/test/MC/AArch64/SVE/fabs.s
@@ -24,3 +24,31 @@ fabs z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xdc,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf dc 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+fabs z4.d, p7/m, z31.d
+// CHECK-INST: fabs z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xdc,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf dc 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+fabs z4.d, p7/m, z31.d
+// CHECK-INST: fabs z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xdc,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf dc 04 <unknown>
diff --git a/test/MC/AArch64/SVE/facge-diagnostics.s b/test/MC/AArch64/SVE/facge-diagnostics.s
index 08710681bf4c..a33627aa68ee 100644
--- a/test/MC/AArch64/SVE/facge-diagnostics.s
+++ b/test/MC/AArch64/SVE/facge-diagnostics.s
@@ -9,3 +9,19 @@ facge p0.b, p0/z, z0.b, #0.0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: unexpected floating point literal
// CHECK-NEXT: facge p0.b, p0/z, z0.b, #0.0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+facge p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: facge p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+facge p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: facge p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/facgt-diagnostics.s b/test/MC/AArch64/SVE/facgt-diagnostics.s
index 12c1ed53527a..c92690bc9de8 100644
--- a/test/MC/AArch64/SVE/facgt-diagnostics.s
+++ b/test/MC/AArch64/SVE/facgt-diagnostics.s
@@ -9,3 +9,19 @@ facgt p0.b, p0/z, z0.b, #0.0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: unexpected floating point literal
// CHECK-NEXT: facgt p0.b, p0/z, z0.b, #0.0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+facgt p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: facgt p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+facgt p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: facgt p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/facle-diagnostics.s b/test/MC/AArch64/SVE/facle-diagnostics.s
index 6ecfccffc263..0f3b777308d9 100644
--- a/test/MC/AArch64/SVE/facle-diagnostics.s
+++ b/test/MC/AArch64/SVE/facle-diagnostics.s
@@ -9,3 +9,19 @@ facle p0.b, p0/z, z0.b, #0.0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: unexpected floating point literal
// CHECK-NEXT: facle p0.b, p0/z, z0.b, #0.0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+facle p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: facle p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+facle p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: facle p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/faclt-diagnostics.s b/test/MC/AArch64/SVE/faclt-diagnostics.s
index b0ef736ffcaa..12baa18615d9 100644
--- a/test/MC/AArch64/SVE/faclt-diagnostics.s
+++ b/test/MC/AArch64/SVE/faclt-diagnostics.s
@@ -9,3 +9,19 @@ faclt p0.b, p0/z, z0.b, #0.0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: unexpected floating point literal
// CHECK-NEXT: faclt p0.b, p0/z, z0.b, #0.0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+faclt p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: faclt p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+faclt p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: faclt p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fadd-diagnostics.s b/test/MC/AArch64/SVE/fadd-diagnostics.s
index be8a85fa49b5..b809e2e72b28 100644
--- a/test/MC/AArch64/SVE/fadd-diagnostics.s
+++ b/test/MC/AArch64/SVE/fadd-diagnostics.s
@@ -68,3 +68,19 @@ fadd z0.h, p8/m, z0.h, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: fadd z0.h, p8/m, z0.h, z31.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fadd z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fadd z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fadd z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fadd z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fadd.s b/test/MC/AArch64/SVE/fadd.s
index 36c3171bba89..10935c37fc05 100644
--- a/test/MC/AArch64/SVE/fadd.s
+++ b/test/MC/AArch64/SVE/fadd.s
@@ -90,3 +90,55 @@ fadd z0.d, z1.d, z31.d
// CHECK-ENCODING: [0x20,0x00,0xdf,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 00 df 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p7/z, z6.d
+// CHECK-INST: movprfx z31.d, p7/z, z6.d
+// CHECK-ENCODING: [0xdf,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 3c d0 04 <unknown>
+
+fadd z31.d, p7/m, z31.d, #1.0
+// CHECK-INST: fadd z31.d, p7/m, z31.d, #1.0
+// CHECK-ENCODING: [0x3f,0x9c,0xd8,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c d8 65 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+fadd z31.d, p7/m, z31.d, #1.0
+// CHECK-INST: fadd z31.d, p7/m, z31.d, #1.0
+// CHECK-ENCODING: [0x3f,0x9c,0xd8,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c d8 65 <unknown>
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fadd z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fadd z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc0,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c0 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fadd z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fadd z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc0,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c0 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fadda-diagnostics.s b/test/MC/AArch64/SVE/fadda-diagnostics.s
index bff63367b159..6386707efa5d 100644
--- a/test/MC/AArch64/SVE/fadda-diagnostics.s
+++ b/test/MC/AArch64/SVE/fadda-diagnostics.s
@@ -18,4 +18,19 @@ fadda h0, p8, h0, z31.h
fadda v0.8h, p7, v0.8h, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fadda v0.8h, p7, v0.8h, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+fadda d0, p7, d0, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fadda d0, p7, d0, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+fadda d0, p7, d0, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fadda d0, p7, d0, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/faddv-diagnostics.s b/test/MC/AArch64/SVE/faddv-diagnostics.s
index 37cb19337d94..f8fa774d8bec 100644
--- a/test/MC/AArch64/SVE/faddv-diagnostics.s
+++ b/test/MC/AArch64/SVE/faddv-diagnostics.s
@@ -17,4 +17,19 @@ faddv h0, p8, z31.h
faddv v0, p7, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: faddv v0, p7, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+faddv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: faddv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+faddv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: faddv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fcadd.s b/test/MC/AArch64/SVE/fcadd.s
index 5e8c8324c773..9d2398ab25a8 100644
--- a/test/MC/AArch64/SVE/fcadd.s
+++ b/test/MC/AArch64/SVE/fcadd.s
@@ -42,3 +42,31 @@ fcadd z31.d, p7/m, z31.d, z31.d, #270
// CHECK-ENCODING: [0xff,0x9f,0xc1,0x64]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff 9f c1 64 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+fcadd z4.d, p7/m, z4.d, z31.d, #270
+// CHECK-INST: fcadd z4.d, p7/m, z4.d, z31.d, #270
+// CHECK-ENCODING: [0xe4,0x9f,0xc1,0x64]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 9f c1 64 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+fcadd z4.d, p7/m, z4.d, z31.d, #270
+// CHECK-INST: fcadd z4.d, p7/m, z4.d, z31.d, #270
+// CHECK-ENCODING: [0xe4,0x9f,0xc1,0x64]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 9f c1 64 <unknown>
diff --git a/test/MC/AArch64/SVE/fcmeq-diagnostics.s b/test/MC/AArch64/SVE/fcmeq-diagnostics.s
index d2e8dddda3ed..08dc7dc4010c 100644
--- a/test/MC/AArch64/SVE/fcmeq-diagnostics.s
+++ b/test/MC/AArch64/SVE/fcmeq-diagnostics.s
@@ -9,3 +9,31 @@ fcmeq p0.s, p0/z, z0.s, #1.0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expected floating-point constant #0.0
// CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #1.0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fcmeq p0.d, p0/z, z0.d, #0.0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmeq p0.d, p0/z, z0.d, #0.0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+fcmeq p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmeq p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fcmge-diagnostics.s b/test/MC/AArch64/SVE/fcmge-diagnostics.s
index d6d7b589c861..249d6fc5af12 100644
--- a/test/MC/AArch64/SVE/fcmge-diagnostics.s
+++ b/test/MC/AArch64/SVE/fcmge-diagnostics.s
@@ -9,3 +9,31 @@ fcmge p0.s, p0/z, z0.s, #1.0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expected floating-point constant #0.0
// CHECK-NEXT: fcmge p0.s, p0/z, z0.s, #1.0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fcmge p0.d, p0/z, z0.d, #0.0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmge p0.d, p0/z, z0.d, #0.0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmge p0.d, p0/z, z0.d, #0.0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmge p0.d, p0/z, z0.d, #0.0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+fcmge p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmge p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmge p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmge p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fcmgt-diagnostics.s b/test/MC/AArch64/SVE/fcmgt-diagnostics.s
index 4ec876922bb3..d9c42217ad2d 100644
--- a/test/MC/AArch64/SVE/fcmgt-diagnostics.s
+++ b/test/MC/AArch64/SVE/fcmgt-diagnostics.s
@@ -9,3 +9,31 @@ fcmgt p0.s, p0/z, z0.s, #1.0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expected floating-point constant #0.0
// CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, #1.0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fcmgt p0.d, p0/z, z0.d, #0.0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, #0.0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmgt p0.d, p0/z, z0.d, #0.0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, #0.0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+fcmgt p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmgt p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fcmla-diagnostics.s b/test/MC/AArch64/SVE/fcmla-diagnostics.s
index 560b15f3de90..7ad6db4f1985 100644
--- a/test/MC/AArch64/SVE/fcmla-diagnostics.s
+++ b/test/MC/AArch64/SVE/fcmla-diagnostics.s
@@ -50,3 +50,13 @@ fcmla z0.d, z1.d, z2.d[0], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: fcmla z0.d, z1.d, z2.d[0], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.s, p0/z, z28.s
+fcmla z21.s, z10.s, z5.s[1], #90
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: fcmla z21.s, z10.s, z5.s[1], #90
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fcmla.s b/test/MC/AArch64/SVE/fcmla.s
index 3a0954e7c8ef..2e4dd7f55357 100644
--- a/test/MC/AArch64/SVE/fcmla.s
+++ b/test/MC/AArch64/SVE/fcmla.s
@@ -102,3 +102,43 @@ fcmla z21.s, z10.s, z5.s[1], #90
// CHECK-ENCODING: [0x55,0x15,0xf5,0x64]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 55 15 f5 64 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+fcmla z4.d, p7/m, z31.d, z31.d, #270
+// CHECK-INST: fcmla z4.d, p7/m, z31.d, z31.d, #270
+// CHECK-ENCODING: [0xe4,0x7f,0xdf,0x64]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 7f df 64 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+fcmla z4.d, p7/m, z31.d, z31.d, #270
+// CHECK-INST: fcmla z4.d, p7/m, z31.d, z31.d, #270
+// CHECK-ENCODING: [0xe4,0x7f,0xdf,0x64]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 7f df 64 <unknown>
+
+movprfx z21, z28
+// CHECK-INST: movprfx z21, z28
+// CHECK-ENCODING: [0x95,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 bf 20 04 <unknown>
+
+fcmla z21.s, z10.s, z5.s[1], #90
+// CHECK-INST: fcmla z21.s, z10.s, z5.s[1], #90
+// CHECK-ENCODING: [0x55,0x15,0xf5,0x64]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 55 15 f5 64 <unknown>
diff --git a/test/MC/AArch64/SVE/fcmle-diagnostics.s b/test/MC/AArch64/SVE/fcmle-diagnostics.s
index 4ce8fb679e81..8c3dbe9c634d 100644
--- a/test/MC/AArch64/SVE/fcmle-diagnostics.s
+++ b/test/MC/AArch64/SVE/fcmle-diagnostics.s
@@ -9,3 +9,31 @@ fcmle p0.s, p0/z, z0.s, #1.0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expected floating-point constant #0.0
// CHECK-NEXT: fcmle p0.s, p0/z, z0.s, #1.0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fcmle p0.d, p0/z, z0.d, #0.0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmle p0.d, p0/z, z0.d, #0.0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmle p0.d, p0/z, z0.d, #0.0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmle p0.d, p0/z, z0.d, #0.0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+fcmle p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmle p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmle p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmle p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fcmlt-diagnostics.s b/test/MC/AArch64/SVE/fcmlt-diagnostics.s
index 1e13a3d2e03a..44a39bb41ac2 100644
--- a/test/MC/AArch64/SVE/fcmlt-diagnostics.s
+++ b/test/MC/AArch64/SVE/fcmlt-diagnostics.s
@@ -9,3 +9,31 @@ fcmlt p0.s, p0/z, z0.s, #1.0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expected floating-point constant #0.0
// CHECK-NEXT: fcmlt p0.s, p0/z, z0.s, #1.0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fcmlt p0.d, p0/z, z0.d, #0.0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmlt p0.d, p0/z, z0.d, #0.0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmlt p0.d, p0/z, z0.d, #0.0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmlt p0.d, p0/z, z0.d, #0.0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+fcmlt p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmlt p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmlt p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmlt p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fcmne-diagnostics.s b/test/MC/AArch64/SVE/fcmne-diagnostics.s
index d0e996b4b7bf..8e8173d4c46d 100644
--- a/test/MC/AArch64/SVE/fcmne-diagnostics.s
+++ b/test/MC/AArch64/SVE/fcmne-diagnostics.s
@@ -9,3 +9,31 @@ fcmne p0.s, p0/z, z0.s, #1.0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expected floating-point constant #0.0
// CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #1.0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fcmne p0.d, p0/z, z0.d, #0.0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmne p0.d, p0/z, z0.d, #0.0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmne p0.d, p0/z, z0.d, #0.0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmne p0.d, p0/z, z0.d, #0.0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+fcmne p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmne p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmne p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmne p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fcmuo-diagnostics.s b/test/MC/AArch64/SVE/fcmuo-diagnostics.s
index c9a33346b5f0..0905d9b240fe 100644
--- a/test/MC/AArch64/SVE/fcmuo-diagnostics.s
+++ b/test/MC/AArch64/SVE/fcmuo-diagnostics.s
@@ -9,3 +9,19 @@ fcmuo p0.s, p0/z, z0.s, #0.0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: unexpected floating point literal
// CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, #0.0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fcmuo p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcmuo p0.d, p0/z, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fcpy.s b/test/MC/AArch64/SVE/fcpy.s
index 712f16a6affa..5510e035dd1d 100644
--- a/test/MC/AArch64/SVE/fcpy.s
+++ b/test/MC/AArch64/SVE/fcpy.s
@@ -1554,3 +1554,31 @@ fcpy z0.d, p0/m, #31.00000000
// CHECK-ENCODING: [0xe0,0xc7,0xd0,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 c7 d0 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p0/z, z7.d
+// CHECK-INST: movprfx z0.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe0,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 20 d0 04 <unknown>
+
+fcpy z0.d, p0/m, #31.00000000
+// CHECK-INST: fmov z0.d, p0/m, #31.00000000
+// CHECK-ENCODING: [0xe0,0xc7,0xd0,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c7 d0 05 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fcpy z0.d, p0/m, #31.00000000
+// CHECK-INST: fmov z0.d, p0/m, #31.00000000
+// CHECK-ENCODING: [0xe0,0xc7,0xd0,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c7 d0 05 <unknown>
diff --git a/test/MC/AArch64/SVE/fcvt.s b/test/MC/AArch64/SVE/fcvt.s
index e0e9e7679473..3a0d52471186 100644
--- a/test/MC/AArch64/SVE/fcvt.s
+++ b/test/MC/AArch64/SVE/fcvt.s
@@ -42,3 +42,31 @@ fcvt z0.d, p0/m, z0.s
// CHECK-ENCODING: [0x00,0xa0,0xcb,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 00 a0 cb 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z5.d, p0/z, z7.d
+// CHECK-INST: movprfx z5.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe5,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 20 d0 04 <unknown>
+
+fcvt z5.d, p0/m, z0.s
+// CHECK-INST: fcvt z5.d, p0/m, z0.s
+// CHECK-ENCODING: [0x05,0xa0,0xcb,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 a0 cb 65 <unknown>
+
+movprfx z5, z7
+// CHECK-INST: movprfx z5, z7
+// CHECK-ENCODING: [0xe5,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 bc 20 04 <unknown>
+
+fcvt z5.d, p0/m, z0.s
+// CHECK-INST: fcvt z5.d, p0/m, z0.s
+// CHECK-ENCODING: [0x05,0xa0,0xcb,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 a0 cb 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fcvtzs.s b/test/MC/AArch64/SVE/fcvtzs.s
index 40e4c44a98d2..ecb7adc46b84 100644
--- a/test/MC/AArch64/SVE/fcvtzs.s
+++ b/test/MC/AArch64/SVE/fcvtzs.s
@@ -48,3 +48,31 @@ fcvtzs z0.d, p0/m, z0.d
// CHECK-ENCODING: [0x00,0xa0,0xde,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 00 a0 de 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z5.d, p0/z, z7.d
+// CHECK-INST: movprfx z5.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe5,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 20 d0 04 <unknown>
+
+fcvtzs z5.d, p0/m, z0.d
+// CHECK-INST: fcvtzs z5.d, p0/m, z0.d
+// CHECK-ENCODING: [0x05,0xa0,0xde,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 a0 de 65 <unknown>
+
+movprfx z5, z7
+// CHECK-INST: movprfx z5, z7
+// CHECK-ENCODING: [0xe5,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 bc 20 04 <unknown>
+
+fcvtzs z5.d, p0/m, z0.d
+// CHECK-INST: fcvtzs z5.d, p0/m, z0.d
+// CHECK-ENCODING: [0x05,0xa0,0xde,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 a0 de 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fcvtzu.s b/test/MC/AArch64/SVE/fcvtzu.s
index 37505d998c5c..cd6993c0f589 100644
--- a/test/MC/AArch64/SVE/fcvtzu.s
+++ b/test/MC/AArch64/SVE/fcvtzu.s
@@ -48,3 +48,31 @@ fcvtzu z0.d, p0/m, z0.d
// CHECK-ENCODING: [0x00,0xa0,0xdf,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 00 a0 df 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z5.d, p0/z, z7.d
+// CHECK-INST: movprfx z5.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe5,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 20 d0 04 <unknown>
+
+fcvtzu z5.d, p0/m, z0.d
+// CHECK-INST: fcvtzu z5.d, p0/m, z0.d
+// CHECK-ENCODING: [0x05,0xa0,0xdf,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 a0 df 65 <unknown>
+
+movprfx z5, z7
+// CHECK-INST: movprfx z5, z7
+// CHECK-ENCODING: [0xe5,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 bc 20 04 <unknown>
+
+fcvtzu z5.d, p0/m, z0.d
+// CHECK-INST: fcvtzu z5.d, p0/m, z0.d
+// CHECK-ENCODING: [0x05,0xa0,0xdf,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 a0 df 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fdiv.s b/test/MC/AArch64/SVE/fdiv.s
index 112d202d5abb..9f1d4ddf1b77 100644
--- a/test/MC/AArch64/SVE/fdiv.s
+++ b/test/MC/AArch64/SVE/fdiv.s
@@ -24,3 +24,31 @@ fdiv z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xcd,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f cd 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fdiv z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fdiv z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xcd,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f cd 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fdiv z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fdiv z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xcd,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f cd 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fdivr.s b/test/MC/AArch64/SVE/fdivr.s
index 3744dd95c694..37fd871461e3 100644
--- a/test/MC/AArch64/SVE/fdivr.s
+++ b/test/MC/AArch64/SVE/fdivr.s
@@ -24,3 +24,31 @@ fdivr z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xcc,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f cc 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fdivr z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fdivr z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xcc,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f cc 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fdivr z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fdivr z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xcc,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f cc 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fdup-diagnostics.s b/test/MC/AArch64/SVE/fdup-diagnostics.s
index 6128bbe85bba..4432393bfa26 100644
--- a/test/MC/AArch64/SVE/fdup-diagnostics.s
+++ b/test/MC/AArch64/SVE/fdup-diagnostics.s
@@ -62,3 +62,19 @@ fdup z0.d, #64.00000000 // r = 5, n = 32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expected compatible register or floating-point constant
// CHECK-NEXT: fdup z0.d, #64.00000000 // r = 5, n = 32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fdup z0.d, #31.00000000
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fdup z0.d, #31.00000000
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fdup z0.d, #31.00000000
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fdup z0.d, #31.00000000
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fexpa-diagnostics.s b/test/MC/AArch64/SVE/fexpa-diagnostics.s
index 2269ae0fdfc2..33964f24d4a6 100644
--- a/test/MC/AArch64/SVE/fexpa-diagnostics.s
+++ b/test/MC/AArch64/SVE/fexpa-diagnostics.s
@@ -12,4 +12,19 @@ fexpa z0.b, z31.b
fexpa z0.s, z31.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: fexpa z0.s, z31.d
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fexpa z0.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fexpa z0.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fexpa z0.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fexpa z0.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fmad.s b/test/MC/AArch64/SVE/fmad.s
index dbcec437c32b..2cfb6a74156c 100644
--- a/test/MC/AArch64/SVE/fmad.s
+++ b/test/MC/AArch64/SVE/fmad.s
@@ -24,3 +24,31 @@ fmad z0.d, p7/m, z1.d, z31.d
// CHECK-ENCODING: [0x20,0x9c,0xff,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 9c ff 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fmad z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fmad z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x9c,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 9c ff 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmad z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fmad z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x9c,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 9c ff 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fmax.s b/test/MC/AArch64/SVE/fmax.s
index 0e4a6d4c196e..bcdc90403949 100644
--- a/test/MC/AArch64/SVE/fmax.s
+++ b/test/MC/AArch64/SVE/fmax.s
@@ -66,3 +66,55 @@ fmax z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xc6,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f c6 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p0/z, z7.d
+// CHECK-INST: movprfx z0.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe0,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 20 d0 04 <unknown>
+
+fmax z0.d, p0/m, z0.d, #0.0
+// CHECK-INST: fmax z0.d, p0/m, z0.d, #0.0
+// CHECK-ENCODING: [0x00,0x80,0xde,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 80 de 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmax z0.d, p0/m, z0.d, #0.0
+// CHECK-INST: fmax z0.d, p0/m, z0.d, #0.0
+// CHECK-ENCODING: [0x00,0x80,0xde,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 80 de 65 <unknown>
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fmax z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fmax z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc6,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c6 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmax z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fmax z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc6,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c6 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fmaxnm.s b/test/MC/AArch64/SVE/fmaxnm.s
index 3016f63154a2..5c44861ea954 100644
--- a/test/MC/AArch64/SVE/fmaxnm.s
+++ b/test/MC/AArch64/SVE/fmaxnm.s
@@ -72,3 +72,55 @@ fmaxnm z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xc4,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f c4 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p7/z, z6.d
+// CHECK-INST: movprfx z31.d, p7/z, z6.d
+// CHECK-ENCODING: [0xdf,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 3c d0 04 <unknown>
+
+fmaxnm z31.d, p7/m, z31.d, #1.0
+// CHECK-INST: fmaxnm z31.d, p7/m, z31.d, #1.0
+// CHECK-ENCODING: [0x3f,0x9c,0xdc,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c dc 65 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+fmaxnm z31.d, p7/m, z31.d, #1.0
+// CHECK-INST: fmaxnm z31.d, p7/m, z31.d, #1.0
+// CHECK-ENCODING: [0x3f,0x9c,0xdc,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c dc 65 <unknown>
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fmaxnm z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fmaxnm z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc4,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c4 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmaxnm z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fmaxnm z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc4,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c4 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fmaxnmv-diagnostics.s b/test/MC/AArch64/SVE/fmaxnmv-diagnostics.s
index 2eb6191b3893..094831296559 100644
--- a/test/MC/AArch64/SVE/fmaxnmv-diagnostics.s
+++ b/test/MC/AArch64/SVE/fmaxnmv-diagnostics.s
@@ -17,4 +17,19 @@ fmaxnmv h0, p8, z31.h
fmaxnmv v0, p7, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmaxnmv v0, p7, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+fmaxnmv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fmaxnmv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+fmaxnmv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fmaxnmv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fmaxv-diagnostics.s b/test/MC/AArch64/SVE/fmaxv-diagnostics.s
index d679734e2d62..0923dc167f8e 100644
--- a/test/MC/AArch64/SVE/fmaxv-diagnostics.s
+++ b/test/MC/AArch64/SVE/fmaxv-diagnostics.s
@@ -17,4 +17,19 @@ fmaxv h0, p8, z31.h
fmaxv v0, p7, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmaxv v0, p7, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+fmaxv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fmaxv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+fmaxv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fmaxv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fmin.s b/test/MC/AArch64/SVE/fmin.s
index 2db4d5ca842a..1b0cb4e589c3 100644
--- a/test/MC/AArch64/SVE/fmin.s
+++ b/test/MC/AArch64/SVE/fmin.s
@@ -72,3 +72,55 @@ fmin z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xc7,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f c7 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p7/z, z6.d
+// CHECK-INST: movprfx z31.d, p7/z, z6.d
+// CHECK-ENCODING: [0xdf,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 3c d0 04 <unknown>
+
+fmin z31.d, p7/m, z31.d, #1.0
+// CHECK-INST: fmin z31.d, p7/m, z31.d, #1.0
+// CHECK-ENCODING: [0x3f,0x9c,0xdf,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c df 65 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+fmin z31.d, p7/m, z31.d, #1.0
+// CHECK-INST: fmin z31.d, p7/m, z31.d, #1.0
+// CHECK-ENCODING: [0x3f,0x9c,0xdf,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c df 65 <unknown>
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fmin z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fmin z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc7,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c7 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmin z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fmin z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc7,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c7 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fminnm.s b/test/MC/AArch64/SVE/fminnm.s
index 492d5898518c..482b7f561ef2 100644
--- a/test/MC/AArch64/SVE/fminnm.s
+++ b/test/MC/AArch64/SVE/fminnm.s
@@ -72,3 +72,55 @@ fminnm z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xc5,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f c5 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p7/z, z6.d
+// CHECK-INST: movprfx z31.d, p7/z, z6.d
+// CHECK-ENCODING: [0xdf,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 3c d0 04 <unknown>
+
+fminnm z31.d, p7/m, z31.d, #1.0
+// CHECK-INST: fminnm z31.d, p7/m, z31.d, #1.0
+// CHECK-ENCODING: [0x3f,0x9c,0xdd,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c dd 65 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+fminnm z31.d, p7/m, z31.d, #1.0
+// CHECK-INST: fminnm z31.d, p7/m, z31.d, #1.0
+// CHECK-ENCODING: [0x3f,0x9c,0xdd,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c dd 65 <unknown>
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fminnm z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fminnm z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc5,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c5 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fminnm z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fminnm z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc5,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c5 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fminnmv-diagnostics.s b/test/MC/AArch64/SVE/fminnmv-diagnostics.s
index d613d053a5f3..bf9062b5b66d 100644
--- a/test/MC/AArch64/SVE/fminnmv-diagnostics.s
+++ b/test/MC/AArch64/SVE/fminnmv-diagnostics.s
@@ -17,4 +17,19 @@ fminnmv h0, p8, z31.h
fminnmv v0, p7, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fminnmv v0, p7, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+fminnmv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fminnmv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+fminnmv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fminnmv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fminv-diagnostics.s b/test/MC/AArch64/SVE/fminv-diagnostics.s
index e1dc7715396c..793d7f7562b2 100644
--- a/test/MC/AArch64/SVE/fminv-diagnostics.s
+++ b/test/MC/AArch64/SVE/fminv-diagnostics.s
@@ -17,4 +17,19 @@ fminv h0, p8, z31.h
fminv v0, p7, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fminv v0, p7, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+fminv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fminv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+fminv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fminv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fmla-diagnostics.s b/test/MC/AArch64/SVE/fmla-diagnostics.s
index 43b452f1e7be..e9892590c232 100644
--- a/test/MC/AArch64/SVE/fmla-diagnostics.s
+++ b/test/MC/AArch64/SVE/fmla-diagnostics.s
@@ -70,3 +70,13 @@ fmla z0.d, z1.d, z2.d[2]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
// CHECK-NEXT: fmla z0.d, z1.d, z2.d[2]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fmla z0.d, z1.d, z7.d[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: fmla z0.d, z1.d, z7.d[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fmla.s b/test/MC/AArch64/SVE/fmla.s
index 3b1f54c37598..a28204da0a14 100644
--- a/test/MC/AArch64/SVE/fmla.s
+++ b/test/MC/AArch64/SVE/fmla.s
@@ -42,3 +42,43 @@ fmla z0.d, z1.d, z7.d[1]
// CHECK-ENCODING: [0x20,0x00,0xf7,0x64]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 00 f7 64 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fmla z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fmla z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x1c,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 1c ff 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmla z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fmla z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x1c,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 1c ff 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmla z0.d, z1.d, z7.d[1]
+// CHECK-INST: fmla z0.d, z1.d, z7.d[1]
+// CHECK-ENCODING: [0x20,0x00,0xf7,0x64]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 00 f7 64 <unknown>
diff --git a/test/MC/AArch64/SVE/fmls-diagnostics.s b/test/MC/AArch64/SVE/fmls-diagnostics.s
index f7734f8fe9a4..8c2d175642c8 100644
--- a/test/MC/AArch64/SVE/fmls-diagnostics.s
+++ b/test/MC/AArch64/SVE/fmls-diagnostics.s
@@ -70,3 +70,13 @@ fmls z0.d, z1.d, z2.d[2]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
// CHECK-NEXT: fmls z0.d, z1.d, z2.d[2]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fmls z0.d, z1.d, z7.d[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: fmls z0.d, z1.d, z7.d[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fmls.s b/test/MC/AArch64/SVE/fmls.s
index c337d8c3aec2..390a3128fc44 100644
--- a/test/MC/AArch64/SVE/fmls.s
+++ b/test/MC/AArch64/SVE/fmls.s
@@ -42,3 +42,43 @@ fmls z0.d, z1.d, z7.d[1]
// CHECK-ENCODING: [0x20,0x04,0xf7,0x64]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 04 f7 64 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fmls z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fmls z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x3c,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 3c ff 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmls z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fmls z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x3c,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 3c ff 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmls z0.d, z1.d, z7.d[1]
+// CHECK-INST: fmls z0.d, z1.d, z7.d[1]
+// CHECK-ENCODING: [0x20,0x04,0xf7,0x64]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 04 f7 64 <unknown>
diff --git a/test/MC/AArch64/SVE/fmov-diagnostics.s b/test/MC/AArch64/SVE/fmov-diagnostics.s
index 3225ff1b1581..fcf53644b50c 100644
--- a/test/MC/AArch64/SVE/fmov-diagnostics.s
+++ b/test/MC/AArch64/SVE/fmov-diagnostics.s
@@ -140,3 +140,19 @@ fmov z0.d, p0/m, #64.00000000 // r = 5, n = 32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expected compatible register or floating-point constant
// CHECK-NEXT: fmov z0.d, p0/m, #64.00000000 // r = 5, n = 32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fmov z0.d, #31.00000000
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fmov z0.d, #31.00000000
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fmov z0.d, #31.00000000
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fmov z0.d, #31.00000000
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fmov.s b/test/MC/AArch64/SVE/fmov.s
index 72bac32ba49c..abd1044bc163 100644
--- a/test/MC/AArch64/SVE/fmov.s
+++ b/test/MC/AArch64/SVE/fmov.s
@@ -1596,3 +1596,31 @@ fmov z0.d, p0/m, #31.00000000
// CHECK-ENCODING: [0xe0,0xc7,0xd0,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 c7 d0 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p0/z, z7.d
+// CHECK-INST: movprfx z0.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe0,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 20 d0 04 <unknown>
+
+fmov z0.d, p0/m, #31.00000000
+// CHECK-INST: fmov z0.d, p0/m, #31.00000000
+// CHECK-ENCODING: [0xe0,0xc7,0xd0,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c7 d0 05 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmov z0.d, p0/m, #31.00000000
+// CHECK-INST: fmov z0.d, p0/m, #31.00000000
+// CHECK-ENCODING: [0xe0,0xc7,0xd0,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c7 d0 05 <unknown>
diff --git a/test/MC/AArch64/SVE/fmsb.s b/test/MC/AArch64/SVE/fmsb.s
index 4e34073ff107..c1203e2f1f38 100644
--- a/test/MC/AArch64/SVE/fmsb.s
+++ b/test/MC/AArch64/SVE/fmsb.s
@@ -24,3 +24,31 @@ fmsb z0.d, p7/m, z1.d, z31.d
// CHECK-ENCODING: [0x20,0xbc,0xff,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 bc ff 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fmsb z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fmsb z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xbc,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 bc ff 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmsb z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fmsb z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xbc,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 bc ff 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fmul-diagnostics.s b/test/MC/AArch64/SVE/fmul-diagnostics.s
index 55bfbd7cef68..be91dc312f36 100644
--- a/test/MC/AArch64/SVE/fmul-diagnostics.s
+++ b/test/MC/AArch64/SVE/fmul-diagnostics.s
@@ -132,3 +132,31 @@ fmul z0.h, p8/m, z0.h, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: fmul z0.h, p8/m, z0.h, z31.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fmul z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fmul z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fmul z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fmul z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.d, p0/z, z6.d
+fmul z31.d, z31.d, z15.d[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fmul z31.d, z31.d, z15.d[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+fmul z31.d, z31.d, z15.d[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fmul z31.d, z31.d, z15.d[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fmul.s b/test/MC/AArch64/SVE/fmul.s
index fd75166319bf..3cf35f33f8ca 100644
--- a/test/MC/AArch64/SVE/fmul.s
+++ b/test/MC/AArch64/SVE/fmul.s
@@ -120,3 +120,55 @@ fmul z0.d, z1.d, z31.d
// CHECK-ENCODING: [0x20,0x08,0xdf,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 08 df 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p7/z, z6.d
+// CHECK-INST: movprfx z31.d, p7/z, z6.d
+// CHECK-ENCODING: [0xdf,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 3c d0 04 <unknown>
+
+fmul z31.d, p7/m, z31.d, #2.0
+// CHECK-INST: fmul z31.d, p7/m, z31.d, #2.0
+// CHECK-ENCODING: [0x3f,0x9c,0xda,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c da 65 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+fmul z31.d, p7/m, z31.d, #2.0
+// CHECK-INST: fmul z31.d, p7/m, z31.d, #2.0
+// CHECK-ENCODING: [0x3f,0x9c,0xda,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c da 65 <unknown>
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fmul z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fmul z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc2,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c2 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmul z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fmul z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc2,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c2 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fmulx.s b/test/MC/AArch64/SVE/fmulx.s
index a1c8e6b93258..0c49421fddbc 100644
--- a/test/MC/AArch64/SVE/fmulx.s
+++ b/test/MC/AArch64/SVE/fmulx.s
@@ -24,3 +24,31 @@ fmulx z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xca,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f ca 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fmulx z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fmulx z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xca,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f ca 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fmulx z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fmulx z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xca,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f ca 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fneg.s b/test/MC/AArch64/SVE/fneg.s
index 9b9ca9041fb8..47f833e9b81c 100644
--- a/test/MC/AArch64/SVE/fneg.s
+++ b/test/MC/AArch64/SVE/fneg.s
@@ -24,3 +24,31 @@ fneg z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xdd,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf dd 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+fneg z4.d, p7/m, z31.d
+// CHECK-INST: fneg z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xdd,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf dd 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+fneg z4.d, p7/m, z31.d
+// CHECK-INST: fneg z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xdd,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf dd 04 <unknown>
diff --git a/test/MC/AArch64/SVE/fnmad.s b/test/MC/AArch64/SVE/fnmad.s
index 6bb736c9eb42..3bec7b6be884 100644
--- a/test/MC/AArch64/SVE/fnmad.s
+++ b/test/MC/AArch64/SVE/fnmad.s
@@ -24,3 +24,31 @@ fnmad z0.d, p7/m, z1.d, z31.d
// CHECK-ENCODING: [0x20,0xdc,0xff,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 dc ff 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fnmad z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fnmad z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xdc,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 dc ff 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fnmad z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fnmad z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xdc,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 dc ff 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fnmla.s b/test/MC/AArch64/SVE/fnmla.s
index 49d443f26103..aa3eb3be9195 100644
--- a/test/MC/AArch64/SVE/fnmla.s
+++ b/test/MC/AArch64/SVE/fnmla.s
@@ -24,3 +24,31 @@ fnmla z0.d, p7/m, z1.d, z31.d
// CHECK-ENCODING: [0x20,0x5c,0xff,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 5c ff 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fnmla z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fnmla z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x5c,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 5c ff 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fnmla z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fnmla z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x5c,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 5c ff 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fnmls.s b/test/MC/AArch64/SVE/fnmls.s
index 438fbaeed6d0..90bde2edc4ce 100644
--- a/test/MC/AArch64/SVE/fnmls.s
+++ b/test/MC/AArch64/SVE/fnmls.s
@@ -24,3 +24,31 @@ fnmls z0.d, p7/m, z1.d, z31.d
// CHECK-ENCODING: [0x20,0x7c,0xff,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 7c ff 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fnmls z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fnmls z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x7c,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 7c ff 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fnmls z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fnmls z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x7c,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 7c ff 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fnmsb.s b/test/MC/AArch64/SVE/fnmsb.s
index f06de48afbf1..a1767c4091cf 100644
--- a/test/MC/AArch64/SVE/fnmsb.s
+++ b/test/MC/AArch64/SVE/fnmsb.s
@@ -24,3 +24,31 @@ fnmsb z0.d, p7/m, z1.d, z31.d
// CHECK-ENCODING: [0x20,0xfc,0xff,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 fc ff 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fnmsb z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fnmsb z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xfc,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 fc ff 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fnmsb z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: fnmsb z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xfc,0xff,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 fc ff 65 <unknown>
diff --git a/test/MC/AArch64/SVE/frecpe-diagnostics.s b/test/MC/AArch64/SVE/frecpe-diagnostics.s
index 80467f26f5a3..6457ad0837bc 100644
--- a/test/MC/AArch64/SVE/frecpe-diagnostics.s
+++ b/test/MC/AArch64/SVE/frecpe-diagnostics.s
@@ -3,4 +3,19 @@
frecpe z0.b, z31.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: frecpe z0.b, z31.b
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+frecpe z0.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: frecpe z0.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+frecpe z0.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: frecpe z0.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/frecps-diagnostics.s b/test/MC/AArch64/SVE/frecps-diagnostics.s
index 7de58cff80fa..ec2d6c3954c4 100644
--- a/test/MC/AArch64/SVE/frecps-diagnostics.s
+++ b/test/MC/AArch64/SVE/frecps-diagnostics.s
@@ -13,3 +13,19 @@ frecps z0.h, z1.s, z2.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: frecps z0.h, z1.s, z2.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+frecps z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: frecps z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+frecps z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: frecps z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/frecpx.s b/test/MC/AArch64/SVE/frecpx.s
index 49226abc33b8..a044703ab714 100644
--- a/test/MC/AArch64/SVE/frecpx.s
+++ b/test/MC/AArch64/SVE/frecpx.s
@@ -24,3 +24,31 @@ frecpx z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xcc,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf cc 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+frecpx z4.d, p7/m, z31.d
+// CHECK-INST: frecpx z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xcc,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf cc 65 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+frecpx z4.d, p7/m, z31.d
+// CHECK-INST: frecpx z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xcc,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf cc 65 <unknown>
diff --git a/test/MC/AArch64/SVE/frinta.s b/test/MC/AArch64/SVE/frinta.s
index ea7a48a29b9b..c89e1618ae7b 100644
--- a/test/MC/AArch64/SVE/frinta.s
+++ b/test/MC/AArch64/SVE/frinta.s
@@ -24,3 +24,31 @@ frinta z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xc4,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf c4 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+frinta z4.d, p7/m, z31.d
+// CHECK-INST: frinta z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc4,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c4 65 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+frinta z4.d, p7/m, z31.d
+// CHECK-INST: frinta z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc4,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c4 65 <unknown>
diff --git a/test/MC/AArch64/SVE/frinti.s b/test/MC/AArch64/SVE/frinti.s
index 7fde35c4a184..a05cc9fe7b46 100644
--- a/test/MC/AArch64/SVE/frinti.s
+++ b/test/MC/AArch64/SVE/frinti.s
@@ -24,3 +24,31 @@ frinti z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xc7,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf c7 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+frinti z4.d, p7/m, z31.d
+// CHECK-INST: frinti z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc7,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c7 65 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+frinti z4.d, p7/m, z31.d
+// CHECK-INST: frinti z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc7,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c7 65 <unknown>
diff --git a/test/MC/AArch64/SVE/frintm.s b/test/MC/AArch64/SVE/frintm.s
index b33e922bbd5f..e085683f9d17 100644
--- a/test/MC/AArch64/SVE/frintm.s
+++ b/test/MC/AArch64/SVE/frintm.s
@@ -24,3 +24,31 @@ frintm z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xc2,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf c2 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+frintm z4.d, p7/m, z31.d
+// CHECK-INST: frintm z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc2,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c2 65 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+frintm z4.d, p7/m, z31.d
+// CHECK-INST: frintm z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc2,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c2 65 <unknown>
diff --git a/test/MC/AArch64/SVE/frintn.s b/test/MC/AArch64/SVE/frintn.s
index d19f85520644..1318c8b88318 100644
--- a/test/MC/AArch64/SVE/frintn.s
+++ b/test/MC/AArch64/SVE/frintn.s
@@ -24,3 +24,31 @@ frintn z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xc0,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf c0 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+frintn z4.d, p7/m, z31.d
+// CHECK-INST: frintn z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc0,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c0 65 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+frintn z4.d, p7/m, z31.d
+// CHECK-INST: frintn z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc0,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c0 65 <unknown>
diff --git a/test/MC/AArch64/SVE/frintp.s b/test/MC/AArch64/SVE/frintp.s
index 12fce742c798..a36831ecc2dd 100644
--- a/test/MC/AArch64/SVE/frintp.s
+++ b/test/MC/AArch64/SVE/frintp.s
@@ -24,3 +24,31 @@ frintp z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xc1,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf c1 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+frintp z4.d, p7/m, z31.d
+// CHECK-INST: frintp z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc1,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c1 65 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+frintp z4.d, p7/m, z31.d
+// CHECK-INST: frintp z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc1,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c1 65 <unknown>
diff --git a/test/MC/AArch64/SVE/frintx.s b/test/MC/AArch64/SVE/frintx.s
index b33f4281d3d5..60244a9cb334 100644
--- a/test/MC/AArch64/SVE/frintx.s
+++ b/test/MC/AArch64/SVE/frintx.s
@@ -24,3 +24,31 @@ frintx z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xc6,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf c6 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+frintx z4.d, p7/m, z31.d
+// CHECK-INST: frintx z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc6,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c6 65 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+frintx z4.d, p7/m, z31.d
+// CHECK-INST: frintx z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc6,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c6 65 <unknown>
diff --git a/test/MC/AArch64/SVE/frintz.s b/test/MC/AArch64/SVE/frintz.s
index 22e2aef98b8f..af794b79a2f2 100644
--- a/test/MC/AArch64/SVE/frintz.s
+++ b/test/MC/AArch64/SVE/frintz.s
@@ -24,3 +24,31 @@ frintz z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xc3,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf c3 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+frintz z4.d, p7/m, z31.d
+// CHECK-INST: frintz z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc3,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c3 65 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+frintz z4.d, p7/m, z31.d
+// CHECK-INST: frintz z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xc3,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf c3 65 <unknown>
diff --git a/test/MC/AArch64/SVE/frsqrte-diagnostics.s b/test/MC/AArch64/SVE/frsqrte-diagnostics.s
index b7325e164aa1..b38c9370b85b 100644
--- a/test/MC/AArch64/SVE/frsqrte-diagnostics.s
+++ b/test/MC/AArch64/SVE/frsqrte-diagnostics.s
@@ -3,4 +3,19 @@
frsqrte z0.b, z31.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: frsqrte z0.b, z31.b
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+frsqrte z0.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: frsqrte z0.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+frsqrte z0.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: frsqrte z0.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/frsqrts-diagnostics.s b/test/MC/AArch64/SVE/frsqrts-diagnostics.s
index 07dde46ad459..2032e74bfc28 100644
--- a/test/MC/AArch64/SVE/frsqrts-diagnostics.s
+++ b/test/MC/AArch64/SVE/frsqrts-diagnostics.s
@@ -13,3 +13,19 @@ frsqrts z0.h, z1.s, z2.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: frsqrts z0.h, z1.s, z2.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+frsqrts z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: frsqrts z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+frsqrts z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: frsqrts z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fscale.s b/test/MC/AArch64/SVE/fscale.s
index 0ce3d7ecb9a8..ab928b4c77f1 100644
--- a/test/MC/AArch64/SVE/fscale.s
+++ b/test/MC/AArch64/SVE/fscale.s
@@ -24,3 +24,31 @@ fscale z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xc9,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f c9 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fscale z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fscale z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc9,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c9 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fscale z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fscale z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc9,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c9 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fsqrt.s b/test/MC/AArch64/SVE/fsqrt.s
index 949faba227b6..b72d2438b33f 100644
--- a/test/MC/AArch64/SVE/fsqrt.s
+++ b/test/MC/AArch64/SVE/fsqrt.s
@@ -24,3 +24,31 @@ fsqrt z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xcd,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf cd 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+fsqrt z4.d, p7/m, z31.d
+// CHECK-INST: fsqrt z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xcd,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf cd 65 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+fsqrt z4.d, p7/m, z31.d
+// CHECK-INST: fsqrt z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xcd,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf cd 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fsub-diagnostics.s b/test/MC/AArch64/SVE/fsub-diagnostics.s
index 27d1b3f7b918..41b36bdbea4d 100644
--- a/test/MC/AArch64/SVE/fsub-diagnostics.s
+++ b/test/MC/AArch64/SVE/fsub-diagnostics.s
@@ -68,3 +68,19 @@ fsub z0.h, p8/m, z0.h, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: fsub z0.h, p8/m, z0.h, z31.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fsub z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fsub z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fsub z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fsub z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/fsub.s b/test/MC/AArch64/SVE/fsub.s
index 8e96cbb65ad0..49d7448e8574 100644
--- a/test/MC/AArch64/SVE/fsub.s
+++ b/test/MC/AArch64/SVE/fsub.s
@@ -90,3 +90,55 @@ fsub z0.d, z1.d, z31.d
// CHECK-ENCODING: [0x20,0x04,0xdf,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 04 df 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p7/z, z6.d
+// CHECK-INST: movprfx z31.d, p7/z, z6.d
+// CHECK-ENCODING: [0xdf,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 3c d0 04 <unknown>
+
+fsub z31.d, p7/m, z31.d, #1.0
+// CHECK-INST: fsub z31.d, p7/m, z31.d, #1.0
+// CHECK-ENCODING: [0x3f,0x9c,0xd9,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c d9 65 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+fsub z31.d, p7/m, z31.d, #1.0
+// CHECK-INST: fsub z31.d, p7/m, z31.d, #1.0
+// CHECK-ENCODING: [0x3f,0x9c,0xd9,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c d9 65 <unknown>
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fsub z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fsub z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc1,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c1 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fsub z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fsub z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc1,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c1 65 <unknown>
diff --git a/test/MC/AArch64/SVE/fsubr.s b/test/MC/AArch64/SVE/fsubr.s
index 990a0b1ac520..349871702498 100644
--- a/test/MC/AArch64/SVE/fsubr.s
+++ b/test/MC/AArch64/SVE/fsubr.s
@@ -72,3 +72,55 @@ fsubr z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xc3,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f c3 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p7/z, z6.d
+// CHECK-INST: movprfx z31.d, p7/z, z6.d
+// CHECK-ENCODING: [0xdf,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 3c d0 04 <unknown>
+
+fsubr z31.d, p7/m, z31.d, #1.0
+// CHECK-INST: fsubr z31.d, p7/m, z31.d, #1.0
+// CHECK-ENCODING: [0x3f,0x9c,0xdb,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c db 65 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+fsubr z31.d, p7/m, z31.d, #1.0
+// CHECK-INST: fsubr z31.d, p7/m, z31.d, #1.0
+// CHECK-ENCODING: [0x3f,0x9c,0xdb,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 3f 9c db 65 <unknown>
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+fsubr z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fsubr z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc3,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c3 65 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+fsubr z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: fsubr z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xc3,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f c3 65 <unknown>
diff --git a/test/MC/AArch64/SVE/ftmad-diagnostics.s b/test/MC/AArch64/SVE/ftmad-diagnostics.s
index 5b63106fb48b..ec7741810b3c 100644
--- a/test/MC/AArch64/SVE/ftmad-diagnostics.s
+++ b/test/MC/AArch64/SVE/ftmad-diagnostics.s
@@ -36,3 +36,13 @@ ftmad z0.h, z0.h, z1.h, #8
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
// CHECK-NEXT: ftmad z0.h, z0.h, z1.h, #8
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ftmad z0.d, z0.d, z31.d, #7
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: ftmad z0.d, z0.d, z31.d, #7
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ftmad.s b/test/MC/AArch64/SVE/ftmad.s
index c18009ec0cb6..3a59a1fb4fd7 100644
--- a/test/MC/AArch64/SVE/ftmad.s
+++ b/test/MC/AArch64/SVE/ftmad.s
@@ -24,3 +24,19 @@ ftmad z0.d, z0.d, z31.d, #7
// CHECK-ENCODING: [0xe0,0x83,0xd7,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 83 d7 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+ftmad z0.d, z0.d, z31.d, #7
+// CHECK-INST: ftmad z0.d, z0.d, z31.d, #7
+// CHECK-ENCODING: [0xe0,0x83,0xd7,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 83 d7 65 <unknown>
diff --git a/test/MC/AArch64/SVE/ftsmul-diagnostics.s b/test/MC/AArch64/SVE/ftsmul-diagnostics.s
index 5ad0a14d798c..08b2966893fe 100644
--- a/test/MC/AArch64/SVE/ftsmul-diagnostics.s
+++ b/test/MC/AArch64/SVE/ftsmul-diagnostics.s
@@ -13,3 +13,19 @@ ftsmul z0.h, z1.s, z2.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: ftsmul z0.h, z1.s, z2.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ftsmul z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ftsmul z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ftsmul z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ftsmul z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ftssel-diagnostics.s b/test/MC/AArch64/SVE/ftssel-diagnostics.s
index 92991246163f..c0cfac061311 100644
--- a/test/MC/AArch64/SVE/ftssel-diagnostics.s
+++ b/test/MC/AArch64/SVE/ftssel-diagnostics.s
@@ -3,4 +3,19 @@
ftssel z0.b, z1.b, z31.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: ftssel z0.b, z1.b, z31.b
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ftssel z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ftssel z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ftssel z0.d, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ftssel z0.d, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/incd-diagnostics.s b/test/MC/AArch64/SVE/incd-diagnostics.s
index ddd232062bf5..2c32eea91ef0 100644
--- a/test/MC/AArch64/SVE/incd-diagnostics.s
+++ b/test/MC/AArch64/SVE/incd-diagnostics.s
@@ -61,3 +61,25 @@ incd x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: incd x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+incd z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: incd z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+incd z0.d, all, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: incd z0.d, all, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+incd z0.d, all
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: incd z0.d, all
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/incd.s b/test/MC/AArch64/SVE/incd.s
index 6d146a4612b3..9774c450ed12 100644
--- a/test/MC/AArch64/SVE/incd.s
+++ b/test/MC/AArch64/SVE/incd.s
@@ -164,3 +164,43 @@ incd x0, #28
// CHECK-ENCODING: [0x80,0xe3,0xf0,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 e3 f0 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+incd z0.d
+// CHECK-INST: incd z0.d
+// CHECK-ENCODING: [0xe0,0xc3,0xf0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c3 f0 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+incd z0.d, all, mul #16
+// CHECK-INST: incd z0.d, all, mul #16
+// CHECK-ENCODING: [0xe0,0xc3,0xff,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c3 ff 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+incd z0.d, all
+// CHECK-INST: incd z0.d
+// CHECK-ENCODING: [0xe0,0xc3,0xf0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c3 f0 04 <unknown>
diff --git a/test/MC/AArch64/SVE/inch-diagnostics.s b/test/MC/AArch64/SVE/inch-diagnostics.s
index 31b71cfd49df..b25019bd9bd4 100644
--- a/test/MC/AArch64/SVE/inch-diagnostics.s
+++ b/test/MC/AArch64/SVE/inch-diagnostics.s
@@ -61,3 +61,25 @@ inch x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: inch x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.h, p0/z, z7.h
+inch z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: inch z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+inch z0.h, all, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: inch z0.h, all, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+inch z0.h, all
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: inch z0.h, all
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/inch.s b/test/MC/AArch64/SVE/inch.s
index 02519067271d..fd952cd7d3db 100644
--- a/test/MC/AArch64/SVE/inch.s
+++ b/test/MC/AArch64/SVE/inch.s
@@ -164,3 +164,43 @@ inch x0, #28
// CHECK-ENCODING: [0x80,0xe3,0x70,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 e3 70 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+inch z0.h
+// CHECK-INST: inch z0.h
+// CHECK-ENCODING: [0xe0,0xc3,0x70,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c3 70 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+inch z0.h, all, mul #16
+// CHECK-INST: inch z0.h, all, mul #16
+// CHECK-ENCODING: [0xe0,0xc3,0x7f,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c3 7f 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+inch z0.h, all
+// CHECK-INST: inch z0.h
+// CHECK-ENCODING: [0xe0,0xc3,0x70,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c3 70 04 <unknown>
diff --git a/test/MC/AArch64/SVE/incp-diagnostics.s b/test/MC/AArch64/SVE/incp-diagnostics.s
index 71cc91f6d02d..1cc766fcd8b2 100644
--- a/test/MC/AArch64/SVE/incp-diagnostics.s
+++ b/test/MC/AArch64/SVE/incp-diagnostics.s
@@ -36,3 +36,13 @@ incp x0, p0.q
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
// CHECK-NEXT: incp x0, p0.q
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+incp z31.d, p7
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: incp z31.d, p7
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/incp.s b/test/MC/AArch64/SVE/incp.s
index c897e846fd05..6bc2c5160925 100644
--- a/test/MC/AArch64/SVE/incp.s
+++ b/test/MC/AArch64/SVE/incp.s
@@ -72,3 +72,19 @@ incp z31.d, p15
// CHECK-ENCODING: [0xff,0x81,0xec,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff 81 ec 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+incp z31.d, p15
+// CHECK-INST: incp z31.d, p15
+// CHECK-ENCODING: [0xff,0x81,0xec,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff 81 ec 25 <unknown>
diff --git a/test/MC/AArch64/SVE/incw-diagnostics.s b/test/MC/AArch64/SVE/incw-diagnostics.s
index e1a85edc931c..88335e9c5d72 100644
--- a/test/MC/AArch64/SVE/incw-diagnostics.s
+++ b/test/MC/AArch64/SVE/incw-diagnostics.s
@@ -61,3 +61,25 @@ incw x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: incw x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/z, z7.s
+incw z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: incw z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+incw z0.s, all, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: incw z0.s, all, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+incw z0.s, all
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: incw z0.s, all
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/incw.s b/test/MC/AArch64/SVE/incw.s
index a9e34dad08c3..fb6a05ee6f57 100644
--- a/test/MC/AArch64/SVE/incw.s
+++ b/test/MC/AArch64/SVE/incw.s
@@ -165,3 +165,43 @@ incw x0, #28
// CHECK-ENCODING: [0x80,0xe3,0xb0,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 e3 b0 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+incw z0.s
+// CHECK-INST: incw z0.s
+// CHECK-ENCODING: [0xe0,0xc3,0xb0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c3 b0 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+incw z0.s, all, mul #16
+// CHECK-INST: incw z0.s, all, mul #16
+// CHECK-ENCODING: [0xe0,0xc3,0xbf,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c3 bf 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+incw z0.s, all
+// CHECK-INST: incw z0.s
+// CHECK-ENCODING: [0xe0,0xc3,0xb0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c3 b0 04 <unknown>
diff --git a/test/MC/AArch64/SVE/index-diagnostics.s b/test/MC/AArch64/SVE/index-diagnostics.s
index be42c9107233..3b2a4aa656fd 100644
--- a/test/MC/AArch64/SVE/index-diagnostics.s
+++ b/test/MC/AArch64/SVE/index-diagnostics.s
@@ -56,3 +56,43 @@ index z17.d, w9, w7
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-16, 15].
// CHECK-NEXT: index z17.d, w9, w7
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p0/z, z28.d
+index z21.d, x10, x21
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: index z21.d, x10, x21
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+index z21.d, x10, x21
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: index z21.d, x10, x21
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23.d, p0/z, z30.d
+index z23.d, x13, #8
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: index z23.d, x13, #8
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23, z30
+index z23.d, x13, #8
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: index z23.d, x13, #8
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23.d, p0/z, z30.d
+index z23.d, #13, x8
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: index z23.d, #13, x8
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23, z30
+index z23.d, #13, x8
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: index z23.d, #13, x8
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/insr-diagnostics.s b/test/MC/AArch64/SVE/insr-diagnostics.s
index e0ec3e6414c8..a0afb22f8fdf 100644
--- a/test/MC/AArch64/SVE/insr-diagnostics.s
+++ b/test/MC/AArch64/SVE/insr-diagnostics.s
@@ -43,3 +43,19 @@ insr z31.d, b0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: insr z31.d, b0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+insr z31.d, xzr
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: insr z31.d, xzr
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z4.d, p0/z, z6.d
+insr z4.d, d31
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: insr z4.d, d31
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/insr.s b/test/MC/AArch64/SVE/insr.s
index 7e13a1b93fe5..262611300019 100644
--- a/test/MC/AArch64/SVE/insr.s
+++ b/test/MC/AArch64/SVE/insr.s
@@ -78,3 +78,31 @@ insr z31.d, d31
// CHECK-ENCODING: [0xff,0x3b,0xf4,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff 3b f4 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+insr z31.d, xzr
+// CHECK-INST: insr z31.d, xzr
+// CHECK-ENCODING: [0xff,0x3b,0xe4,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff 3b e4 05 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+insr z4.d, d31
+// CHECK-INST: insr z4.d, d31
+// CHECK-ENCODING: [0xe4,0x3b,0xf4,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 3b f4 05 <unknown>
diff --git a/test/MC/AArch64/SVE/lasta-diagnostics.s b/test/MC/AArch64/SVE/lasta-diagnostics.s
index dd8155521f5e..b153a67f35f6 100644
--- a/test/MC/AArch64/SVE/lasta-diagnostics.s
+++ b/test/MC/AArch64/SVE/lasta-diagnostics.s
@@ -52,3 +52,31 @@ lasta d0, p7, z31.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: lasta d0, p7, z31.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+lasta x0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lasta x0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+lasta x0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lasta x0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.d, p7/z, z6.d
+lasta d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lasta d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+lasta d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lasta d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/lastb-diagnostics.s b/test/MC/AArch64/SVE/lastb-diagnostics.s
index ed92c76cadf6..b86654ffc12d 100644
--- a/test/MC/AArch64/SVE/lastb-diagnostics.s
+++ b/test/MC/AArch64/SVE/lastb-diagnostics.s
@@ -52,3 +52,31 @@ lastb d0, p7, z31.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: lastb d0, p7, z31.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+lastb x0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lastb x0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+lastb x0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lastb x0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.d, p7/z, z6.d
+lastb d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lastb d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+lastb d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lastb d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1b-diagnostics.s b/test/MC/AArch64/SVE/ld1b-diagnostics.s
index 4d4da4021e33..70fecbf5805d 100644
--- a/test/MC/AArch64/SVE/ld1b-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1b-diagnostics.s
@@ -177,3 +177,19 @@ ld1b z0.d, p0/z, [z0.d, #32]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31].
// CHECK-NEXT: ld1b z0.d, p0/z, [z0.d, #32]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ld1b { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ld1b { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1d-diagnostics.s b/test/MC/AArch64/SVE/ld1d-diagnostics.s
index cedb48b41719..2ba540699567 100644
--- a/test/MC/AArch64/SVE/ld1d-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1d-diagnostics.s
@@ -132,3 +132,19 @@ ld1d z0.d, p0/z, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 248].
// CHECK-NEXT: ld1d z0.d, p0/z, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ld1d { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ld1d { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1h-diagnostics.s b/test/MC/AArch64/SVE/ld1h-diagnostics.s
index 7c8694131f86..8643a051fe9b 100644
--- a/test/MC/AArch64/SVE/ld1h-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1h-diagnostics.s
@@ -192,3 +192,19 @@ ld1h z0.d, p0/z, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 2 in range [0, 62].
// CHECK-NEXT: ld1h z0.d, p0/z, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ld1h { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ld1h { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1rb-diagnostics.s b/test/MC/AArch64/SVE/ld1rb-diagnostics.s
index 18532c78c87a..c862eb5cdb16 100644
--- a/test/MC/AArch64/SVE/ld1rb-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1rb-diagnostics.s
@@ -21,3 +21,19 @@ ld1rb z0.b, p8/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: ld1rb z0.b, p8/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+ld1rb { z31.d }, p7/z, [sp, #63]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rb { z31.d }, p7/z, [sp, #63]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+ld1rb { z31.d }, p7/z, [sp, #63]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rb { z31.d }, p7/z, [sp, #63]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1rd-diagnostics.s b/test/MC/AArch64/SVE/ld1rd-diagnostics.s
index 66eaa8b43a00..e85ffa16ef52 100644
--- a/test/MC/AArch64/SVE/ld1rd-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1rd-diagnostics.s
@@ -55,3 +55,19 @@ ld1rd z0.d, p8/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: ld1rd z0.d, p8/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+ld1rd { z31.d }, p7/z, [sp, #504]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rd { z31.d }, p7/z, [sp, #504]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+ld1rd { z31.d }, p7/z, [sp, #504]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rd { z31.d }, p7/z, [sp, #504]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1rh-diagnostics.s b/test/MC/AArch64/SVE/ld1rh-diagnostics.s
index bdd4b06c4f62..2665e859d81e 100644
--- a/test/MC/AArch64/SVE/ld1rh-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1rh-diagnostics.s
@@ -45,3 +45,19 @@ ld1rh z0.h, p8/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: ld1rh z0.h, p8/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+ld1rh { z31.d }, p7/z, [sp, #126]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rh { z31.d }, p7/z, [sp, #126]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+ld1rh { z31.d }, p7/z, [sp, #126]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rh { z31.d }, p7/z, [sp, #126]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1rqb-diagnostics.s b/test/MC/AArch64/SVE/ld1rqb-diagnostics.s
index 0a8a766d6137..94a8d5c3f5c8 100644
--- a/test/MC/AArch64/SVE/ld1rqb-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1rqb-diagnostics.s
@@ -79,3 +79,19 @@ ld1rqb z0.b, p0/z, [x0, w1, uxtw]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: register must be x0..x30 without shift
// CHECK-NEXT: ld1rqb z0.b, p0/z, [x0, w1, uxtw]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.b, p5/z, z28.b
+ld1rqb { z21.b }, p5/z, [x10, #112]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rqb { z21.b }, p5/z, [x10, #112]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld1rqb { z21.b }, p5/z, [x10, #112]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rqb { z21.b }, p5/z, [x10, #112]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1rqd-diagnostics.s b/test/MC/AArch64/SVE/ld1rqd-diagnostics.s
index 1ea8188d6074..8c4ac3ecad6d 100644
--- a/test/MC/AArch64/SVE/ld1rqd-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1rqd-diagnostics.s
@@ -79,3 +79,19 @@ ld1rqd z0.d, p0/z, [x0, w1, uxtw #1]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: register must be x0..x30 with required shift 'lsl #3'
// CHECK-NEXT: ld1rqd z0.d, p0/z, [x0, w1, uxtw #1]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z23.d, p3/z, z30.d
+ld1rqd { z23.d }, p3/z, [x13, #112]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rqd { z23.d }, p3/z, [x13, #112]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23, z30
+ld1rqd { z23.d }, p3/z, [x13, #112]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rqd { z23.d }, p3/z, [x13, #112]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1rqh-diagnostics.s b/test/MC/AArch64/SVE/ld1rqh-diagnostics.s
index aecc63e76a10..960dda35db6e 100644
--- a/test/MC/AArch64/SVE/ld1rqh-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1rqh-diagnostics.s
@@ -79,3 +79,19 @@ ld1rqh z0.h, p0/z, [x0, w1, uxtw #1]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: register must be x0..x30 with required shift 'lsl #1'
// CHECK-NEXT: ld1rqh z0.h, p0/z, [x0, w1, uxtw #1]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z23.h, p3/z, z30.h
+ld1rqh { z23.h }, p3/z, [x13, #112]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rqh { z23.h }, p3/z, [x13, #112]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23, z30
+ld1rqh { z23.h }, p3/z, [x13, #112]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rqh { z23.h }, p3/z, [x13, #112]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1rqw-diagnostics.s b/test/MC/AArch64/SVE/ld1rqw-diagnostics.s
index 477af4242e9a..b4d45e5ceb6d 100644
--- a/test/MC/AArch64/SVE/ld1rqw-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1rqw-diagnostics.s
@@ -79,3 +79,19 @@ ld1rqw z0.s, p0/z, [x0, w1, uxtw #1]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: register must be x0..x30 with required shift 'lsl #2'
// CHECK-NEXT: ld1rqw z0.s, p0/z, [x0, w1, uxtw #1]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z23.s, p3/z, z30.s
+ld1rqw { z23.s }, p3/z, [x13, #112]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rqw { z23.s }, p3/z, [x13, #112]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23, z30
+ld1rqw { z23.s }, p3/z, [x13, #112]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rqw { z23.s }, p3/z, [x13, #112]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1rsb-diagnostics.s b/test/MC/AArch64/SVE/ld1rsb-diagnostics.s
index 7950eca94adf..28c6ecd9013d 100644
--- a/test/MC/AArch64/SVE/ld1rsb-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1rsb-diagnostics.s
@@ -30,3 +30,19 @@ ld1rsb z0.h, p8/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: ld1rsb z0.h, p8/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+ld1rsb { z31.d }, p7/z, [sp, #63]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rsb { z31.d }, p7/z, [sp, #63]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+ld1rsb { z31.d }, p7/z, [sp, #63]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rsb { z31.d }, p7/z, [sp, #63]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1rsh-diagnostics.s b/test/MC/AArch64/SVE/ld1rsh-diagnostics.s
index b33c7934ccb8..ae13b87b61ee 100644
--- a/test/MC/AArch64/SVE/ld1rsh-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1rsh-diagnostics.s
@@ -40,3 +40,19 @@ ld1rsh z0.s, p8/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: ld1rsh z0.s, p8/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+ld1rsh { z31.d }, p7/z, [sp, #126]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rsh { z31.d }, p7/z, [sp, #126]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+ld1rsh { z31.d }, p7/z, [sp, #126]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rsh { z31.d }, p7/z, [sp, #126]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1rsw-diagnostics.s b/test/MC/AArch64/SVE/ld1rsw-diagnostics.s
index 4d7cf63980dc..02da02d5cf5b 100644
--- a/test/MC/AArch64/SVE/ld1rsw-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1rsw-diagnostics.s
@@ -45,3 +45,19 @@ ld1rsw z0.d, p8/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: ld1rsw z0.d, p8/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+ld1rsw { z31.d }, p7/z, [sp, #252]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rsw { z31.d }, p7/z, [sp, #252]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+ld1rsw { z31.d }, p7/z, [sp, #252]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rsw { z31.d }, p7/z, [sp, #252]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1rw-diagnostics.s b/test/MC/AArch64/SVE/ld1rw-diagnostics.s
index e5e55c9da562..c4bc362d7d0b 100644
--- a/test/MC/AArch64/SVE/ld1rw-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1rw-diagnostics.s
@@ -50,3 +50,19 @@ ld1rw z0.s, p8/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: ld1rw z0.s, p8/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+ld1rw { z31.d }, p7/z, [sp, #252]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rw { z31.d }, p7/z, [sp, #252]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+ld1rw { z31.d }, p7/z, [sp, #252]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1rw { z31.d }, p7/z, [sp, #252]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1sb-diagnostics.s b/test/MC/AArch64/SVE/ld1sb-diagnostics.s
index e936f576fdef..0bbccfc0c956 100644
--- a/test/MC/AArch64/SVE/ld1sb-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1sb-diagnostics.s
@@ -175,3 +175,19 @@ ld1sb z0.d, p0/z, [z0.d, #32]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31].
// CHECK-NEXT: ld1sb z0.d, p0/z, [z0.d, #32]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ld1sb { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ld1sb { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1sh-diagnostics.s b/test/MC/AArch64/SVE/ld1sh-diagnostics.s
index 91fc364d7dc3..d423480a2844 100644
--- a/test/MC/AArch64/SVE/ld1sh-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1sh-diagnostics.s
@@ -191,3 +191,19 @@ ld1sh z0.d, p0/z, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 2 in range [0, 62].
// CHECK-NEXT: ld1sh z0.d, p0/z, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ld1sh { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1sh { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ld1sh { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1sh { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1sw-diagnostics.s b/test/MC/AArch64/SVE/ld1sw-diagnostics.s
index 2ecf3b7fce32..1061e87301dc 100644
--- a/test/MC/AArch64/SVE/ld1sw-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1sw-diagnostics.s
@@ -161,3 +161,19 @@ ld1sw z0.d, p0/z, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [0, 124].
// CHECK-NEXT: ld1sw z0.d, p0/z, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ld1sw { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ld1sw { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld1w-diagnostics.s b/test/MC/AArch64/SVE/ld1w-diagnostics.s
index d1031eac60bf..1aafc27f8585 100644
--- a/test/MC/AArch64/SVE/ld1w-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld1w-diagnostics.s
@@ -177,3 +177,19 @@ ld1w z0.d, p0/z, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [0, 124].
// CHECK-NEXT: ld1w z0.d, p0/z, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ld1w { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ld1w { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld2b-diagnostics.s b/test/MC/AArch64/SVE/ld2b-diagnostics.s
index 136bf026c6ae..3eae31f363d4 100644
--- a/test/MC/AArch64/SVE/ld2b-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld2b-diagnostics.s
@@ -89,3 +89,19 @@ ld2b { v0.2d, v1.2d }, p0/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ld2b { v0.2d, v1.2d }, p0/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.b, p5/z, z28.b
+ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld2d-diagnostics.s b/test/MC/AArch64/SVE/ld2d-diagnostics.s
index 7c58ef11530c..7b64621635d8 100644
--- a/test/MC/AArch64/SVE/ld2d-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld2d-diagnostics.s
@@ -94,3 +94,19 @@ ld2d { v0.2d, v1.2d }, p0/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ld2d { v0.2d, v1.2d }, p0/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld2h-diagnostics.s b/test/MC/AArch64/SVE/ld2h-diagnostics.s
index e3d29dc2c34d..4568fe396ee1 100644
--- a/test/MC/AArch64/SVE/ld2h-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld2h-diagnostics.s
@@ -94,3 +94,19 @@ ld2h { v0.2d, v1.2d }, p0/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ld2h { v0.2d, v1.2d }, p0/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.h, p5/z, z28.h
+ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld2w-diagnostics.s b/test/MC/AArch64/SVE/ld2w-diagnostics.s
index cf09aa26d1f7..66acedab1e9f 100644
--- a/test/MC/AArch64/SVE/ld2w-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld2w-diagnostics.s
@@ -94,3 +94,19 @@ ld2w { v0.2d, v1.2d }, p0/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ld2w { v0.2d, v1.2d }, p0/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.s, p5/z, z28.s
+ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld3b-diagnostics.s b/test/MC/AArch64/SVE/ld3b-diagnostics.s
index c93ec32ae867..388545258bc6 100644
--- a/test/MC/AArch64/SVE/ld3b-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld3b-diagnostics.s
@@ -89,3 +89,19 @@ ld3b { v0.16b, v1.16b, v2.16b }, p0/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ld3b { v0.16b, v1.16b, v2.16b }, p0/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.b, p5/z, z28.b
+ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld3d-diagnostics.s b/test/MC/AArch64/SVE/ld3d-diagnostics.s
index 33b063733e1b..f672fb832004 100644
--- a/test/MC/AArch64/SVE/ld3d-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld3d-diagnostics.s
@@ -94,3 +94,19 @@ ld3d { v0.2d, v1.2d, v2.2d }, p0/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ld3d { v0.2d, v1.2d, v2.2d }, p0/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld3h-diagnostics.s b/test/MC/AArch64/SVE/ld3h-diagnostics.s
index cf0423a128ab..c5ae5dfcda53 100644
--- a/test/MC/AArch64/SVE/ld3h-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld3h-diagnostics.s
@@ -94,3 +94,19 @@ ld3h { v0.8h, v1.8h, v2.8h }, p0/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ld3h { v0.8h, v1.8h, v2.8h }, p0/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.h, p5/z, z28.h
+ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld3w-diagnostics.s b/test/MC/AArch64/SVE/ld3w-diagnostics.s
index 758fc04667df..d3082c14b8fe 100644
--- a/test/MC/AArch64/SVE/ld3w-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld3w-diagnostics.s
@@ -94,3 +94,19 @@ ld3w { v0.4s, v1.4s, v2.4s }, p0/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ld3w { v0.4s, v1.4s, v2.4s }, p0/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.s, p5/z, z28.s
+ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld4b-diagnostics.s b/test/MC/AArch64/SVE/ld4b-diagnostics.s
index 3120c49fb1ba..65c5bcd26fc6 100644
--- a/test/MC/AArch64/SVE/ld4b-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld4b-diagnostics.s
@@ -89,3 +89,19 @@ ld4b { v0.16b, v1.16b, v2.16b }, p0/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ld4b { v0.16b, v1.16b, v2.16b }, p0/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.b, p5/z, z28.b
+ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld4d-diagnostics.s b/test/MC/AArch64/SVE/ld4d-diagnostics.s
index 8b5715d2249c..ea70ea037012 100644
--- a/test/MC/AArch64/SVE/ld4d-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld4d-diagnostics.s
@@ -94,3 +94,19 @@ ld4d { v0.2d, v1.2d, v2.2d }, p0/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ld4d { v0.2d, v1.2d, v2.2d }, p0/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld4h-diagnostics.s b/test/MC/AArch64/SVE/ld4h-diagnostics.s
index 45f5bd5108cd..dc5485484f7b 100644
--- a/test/MC/AArch64/SVE/ld4h-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld4h-diagnostics.s
@@ -94,3 +94,19 @@ ld4h { v0.8h, v1.8h, v2.8h }, p0/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ld4h { v0.8h, v1.8h, v2.8h }, p0/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.h, p5/z, z28.h
+ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ld4w-diagnostics.s b/test/MC/AArch64/SVE/ld4w-diagnostics.s
index 86547c22b1a5..d4633dd4d55d 100644
--- a/test/MC/AArch64/SVE/ld4w-diagnostics.s
+++ b/test/MC/AArch64/SVE/ld4w-diagnostics.s
@@ -94,3 +94,19 @@ ld4w { v0.4s, v1.4s, v2.4s }, p0/z, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ld4w { v0.4s, v1.4s, v2.4s }, p0/z, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.s, p5/z, z28.s
+ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldff1b-diagnostics.s b/test/MC/AArch64/SVE/ldff1b-diagnostics.s
index 2ce58d894fb5..8de73ef3e6f8 100644
--- a/test/MC/AArch64/SVE/ldff1b-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldff1b-diagnostics.s
@@ -112,3 +112,19 @@ ldff1b z0.d, p0/z, [z0.d, #32]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31].
// CHECK-NEXT: ldff1b z0.d, p0/z, [z0.d, #32]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldff1b { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1b { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldff1b { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1b { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldff1d-diagnostics.s b/test/MC/AArch64/SVE/ldff1d-diagnostics.s
index e5aec40163f2..29978c7303bc 100644
--- a/test/MC/AArch64/SVE/ldff1d-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldff1d-diagnostics.s
@@ -111,3 +111,19 @@ ldff1d z0.d, p0/z, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 248].
// CHECK-NEXT: ldff1d z0.d, p0/z, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldff1d { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1d { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldff1d { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1d { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldff1h-diagnostics.s b/test/MC/AArch64/SVE/ldff1h-diagnostics.s
index e568031c4791..8674d095351b 100644
--- a/test/MC/AArch64/SVE/ldff1h-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldff1h-diagnostics.s
@@ -141,3 +141,19 @@ ldff1h z0.d, p0/z, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 2 in range [0, 62].
// CHECK-NEXT: ldff1h z0.d, p0/z, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldff1h { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1h { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldff1h { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1h { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldff1sb-diagnostics.s b/test/MC/AArch64/SVE/ldff1sb-diagnostics.s
index b9bd9e470c34..23cca8c14545 100644
--- a/test/MC/AArch64/SVE/ldff1sb-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldff1sb-diagnostics.s
@@ -115,3 +115,19 @@ ldff1sb z0.d, p0/z, [z0.d, #32]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31].
// CHECK-NEXT: ldff1sb z0.d, p0/z, [z0.d, #32]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldff1sb { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1sb { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldff1sb { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1sb { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldff1sh-diagnostics.s b/test/MC/AArch64/SVE/ldff1sh-diagnostics.s
index e58d9151b85d..4a8d329f973c 100644
--- a/test/MC/AArch64/SVE/ldff1sh-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldff1sh-diagnostics.s
@@ -135,3 +135,19 @@ ldff1sh z0.d, p0/z, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 2 in range [0, 62].
// CHECK-NEXT: ldff1sh z0.d, p0/z, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldff1sh { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1sh { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldff1sh { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1sh { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldff1sw-diagnostics.s b/test/MC/AArch64/SVE/ldff1sw-diagnostics.s
index 4571ee664618..ab096f36b8c1 100644
--- a/test/MC/AArch64/SVE/ldff1sw-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldff1sw-diagnostics.s
@@ -116,3 +116,19 @@ ldff1sw z0.d, p0/z, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [0, 124].
// CHECK-NEXT: ldff1sw z0.d, p0/z, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldff1sw { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1sw { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldff1sw { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1sw { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldff1w-diagnostics.s b/test/MC/AArch64/SVE/ldff1w-diagnostics.s
index 36a7eec8af56..e2cec7c77101 100644
--- a/test/MC/AArch64/SVE/ldff1w-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldff1w-diagnostics.s
@@ -141,3 +141,19 @@ ldff1w z0.d, p0/z, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [0, 124].
// CHECK-NEXT: ldff1w z0.d, p0/z, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldff1w { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1w { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldff1w { z0.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldff1w { z0.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldnf1b-diagnostics.s b/test/MC/AArch64/SVE/ldnf1b-diagnostics.s
index d680595801dc..b24c15308f4c 100644
--- a/test/MC/AArch64/SVE/ldnf1b-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldnf1b-diagnostics.s
@@ -85,3 +85,19 @@ ldnf1b { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ldnf1b { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+ldnf1b { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1b { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ldnf1b { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1b { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldnf1d-diagnostics.s b/test/MC/AArch64/SVE/ldnf1d-diagnostics.s
index c1a47ba02119..8c29bddea0c7 100644
--- a/test/MC/AArch64/SVE/ldnf1d-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldnf1d-diagnostics.s
@@ -40,3 +40,19 @@ ldnf1d { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ldnf1d { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+ldnf1d { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1d { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ldnf1d { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1d { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldnf1h-diagnostics.s b/test/MC/AArch64/SVE/ldnf1h-diagnostics.s
index ef4d80a60a7d..6d61423f9997 100644
--- a/test/MC/AArch64/SVE/ldnf1h-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldnf1h-diagnostics.s
@@ -70,3 +70,19 @@ ldnf1h { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ldnf1h { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+ldnf1h { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1h { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ldnf1h { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1h { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldnf1sb-diagnostics.s b/test/MC/AArch64/SVE/ldnf1sb-diagnostics.s
index a4d42559d01a..23e816425c5d 100644
--- a/test/MC/AArch64/SVE/ldnf1sb-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldnf1sb-diagnostics.s
@@ -84,3 +84,19 @@ ldnf1sb { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ldnf1sb { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldnf1sh-diagnostics.s b/test/MC/AArch64/SVE/ldnf1sh-diagnostics.s
index 7e62d7959576..3071cc6e4694 100644
--- a/test/MC/AArch64/SVE/ldnf1sh-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldnf1sh-diagnostics.s
@@ -69,3 +69,19 @@ ldnf1sh { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ldnf1sh { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldnf1sw-diagnostics.s b/test/MC/AArch64/SVE/ldnf1sw-diagnostics.s
index 0312815614a6..5c133f7a0095 100644
--- a/test/MC/AArch64/SVE/ldnf1sw-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldnf1sw-diagnostics.s
@@ -54,3 +54,19 @@ ldnf1sw { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ldnf1sw { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldnf1w-diagnostics.s b/test/MC/AArch64/SVE/ldnf1w-diagnostics.s
index 688816e56da8..2fd7123b4925 100644
--- a/test/MC/AArch64/SVE/ldnf1w-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldnf1w-diagnostics.s
@@ -55,3 +55,19 @@ ldnf1w { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ldnf1w { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+ldnf1w { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1w { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+ldnf1w { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnf1w { z21.d }, p5/z, [x10, #5, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldnt1b-diagnostics.s b/test/MC/AArch64/SVE/ldnt1b-diagnostics.s
index 8fa065b13b52..b02f633f5ca1 100644
--- a/test/MC/AArch64/SVE/ldnt1b-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldnt1b-diagnostics.s
@@ -59,3 +59,19 @@ ldnt1b { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ldnt1b { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.b, p0/z, z7.b
+ldnt1b { z0.b }, p0/z, [x0, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1b { z0.b }, p0/z, [x0, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldnt1b { z0.b }, p0/z, [x0, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1b { z0.b }, p0/z, [x0, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldnt1d-diagnostics.s b/test/MC/AArch64/SVE/ldnt1d-diagnostics.s
index aff76a998623..2aba4dc2e946 100644
--- a/test/MC/AArch64/SVE/ldnt1d-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldnt1d-diagnostics.s
@@ -59,3 +59,19 @@ ldnt1d { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ldnt1d { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldnt1h-diagnostics.s b/test/MC/AArch64/SVE/ldnt1h-diagnostics.s
index 2af29bab9fe1..fce1fb5ae5b5 100644
--- a/test/MC/AArch64/SVE/ldnt1h-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldnt1h-diagnostics.s
@@ -59,3 +59,19 @@ ldnt1h { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ldnt1h { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.h, p0/z, z7.h
+ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ldnt1w-diagnostics.s b/test/MC/AArch64/SVE/ldnt1w-diagnostics.s
index 67a9644078e6..23ba16a0315c 100644
--- a/test/MC/AArch64/SVE/ldnt1w-diagnostics.s
+++ b/test/MC/AArch64/SVE/ldnt1w-diagnostics.s
@@ -59,3 +59,19 @@ ldnt1w { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: ldnt1w { v0.2d }, p0/z, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/z, z7.s
+ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/lsl-diagnostics.s b/test/MC/AArch64/SVE/lsl-diagnostics.s
index 067acf9ace6e..517384896c1c 100644
--- a/test/MC/AArch64/SVE/lsl-diagnostics.s
+++ b/test/MC/AArch64/SVE/lsl-diagnostics.s
@@ -120,3 +120,31 @@ lsl z0.b, p8/m, z0.b, z1.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: lsl z0.b, p8/m, z0.b, z1.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+lsl z31.d, z31.d, #63
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lsl z31.d, z31.d, #63
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+lsl z31.d, z31.d, #63
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lsl z31.d, z31.d, #63
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+lsl z0.s, z1.s, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lsl z0.s, z1.s, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+lsl z0.s, z1.s, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lsl z0.s, z1.s, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/lsl.s b/test/MC/AArch64/SVE/lsl.s
index 0e8820e19865..38b895b745a6 100644
--- a/test/MC/AArch64/SVE/lsl.s
+++ b/test/MC/AArch64/SVE/lsl.s
@@ -162,3 +162,55 @@ lsl z0.s, z1.s, z2.d
// CHECK-ENCODING: [0x20,0x8c,0xa2,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 8c a2 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+lsl z31.d, p0/m, z31.d, #63
+// CHECK-INST: lsl z31.d, p0/m, z31.d, #63
+// CHECK-ENCODING: [0xff,0x83,0xc3,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff 83 c3 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+lsl z31.d, p0/m, z31.d, #63
+// CHECK-INST: lsl z31.d, p0/m, z31.d, #63
+// CHECK-ENCODING: [0xff,0x83,0xc3,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff 83 c3 04 <unknown>
+
+movprfx z0.s, p0/z, z7.s
+// CHECK-INST: movprfx z0.s, p0/z, z7.s
+// CHECK-ENCODING: [0xe0,0x20,0x90,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 20 90 04 <unknown>
+
+lsl z0.s, p0/m, z0.s, z1.d
+// CHECK-INST: lsl z0.s, p0/m, z0.s, z1.d
+// CHECK-ENCODING: [0x20,0x80,0x9b,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 80 9b 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+lsl z0.s, p0/m, z0.s, z1.d
+// CHECK-INST: lsl z0.s, p0/m, z0.s, z1.d
+// CHECK-ENCODING: [0x20,0x80,0x9b,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 80 9b 04 <unknown>
diff --git a/test/MC/AArch64/SVE/lslr.s b/test/MC/AArch64/SVE/lslr.s
index ad8941f201fe..16e19dec9562 100644
--- a/test/MC/AArch64/SVE/lslr.s
+++ b/test/MC/AArch64/SVE/lslr.s
@@ -30,3 +30,31 @@ lslr z0.d, p0/m, z0.d, z0.d
// CHECK-ENCODING: [0x00,0x80,0xd7,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 00 80 d7 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z5.d, p0/z, z7.d
+// CHECK-INST: movprfx z5.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe5,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 20 d0 04 <unknown>
+
+lslr z5.d, p0/m, z5.d, z0.d
+// CHECK-INST: lslr z5.d, p0/m, z5.d, z0.d
+// CHECK-ENCODING: [0x05,0x80,0xd7,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 80 d7 04 <unknown>
+
+movprfx z5, z7
+// CHECK-INST: movprfx z5, z7
+// CHECK-ENCODING: [0xe5,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 bc 20 04 <unknown>
+
+lslr z5.d, p0/m, z5.d, z0.d
+// CHECK-INST: lslr z5.d, p0/m, z5.d, z0.d
+// CHECK-ENCODING: [0x05,0x80,0xd7,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 80 d7 04 <unknown>
diff --git a/test/MC/AArch64/SVE/lsr-diagnostics.s b/test/MC/AArch64/SVE/lsr-diagnostics.s
index 77ad88d6676f..0de5bcd522c9 100644
--- a/test/MC/AArch64/SVE/lsr-diagnostics.s
+++ b/test/MC/AArch64/SVE/lsr-diagnostics.s
@@ -121,3 +121,31 @@ lsr z0.b, p8/m, z0.b, z1.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: lsr z0.b, p8/m, z0.b, z1.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+lsr z31.d, z31.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lsr z31.d, z31.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+lsr z31.d, z31.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lsr z31.d, z31.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+lsr z0.s, z1.s, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lsr z0.s, z1.s, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+lsr z0.s, z1.s, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: lsr z0.s, z1.s, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/lsr.s b/test/MC/AArch64/SVE/lsr.s
index 9ef662302a94..481326124930 100644
--- a/test/MC/AArch64/SVE/lsr.s
+++ b/test/MC/AArch64/SVE/lsr.s
@@ -162,3 +162,55 @@ lsr z0.s, z1.s, z2.d
// CHECK-ENCODING: [0x20,0x84,0xa2,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 84 a2 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+lsr z31.d, p0/m, z31.d, #64
+// CHECK-INST: lsr z31.d, p0/m, z31.d, #64
+// CHECK-ENCODING: [0x1f,0x80,0x81,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 80 81 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+lsr z31.d, p0/m, z31.d, #64
+// CHECK-INST: lsr z31.d, p0/m, z31.d, #64
+// CHECK-ENCODING: [0x1f,0x80,0x81,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 80 81 04 <unknown>
+
+movprfx z0.s, p0/z, z7.s
+// CHECK-INST: movprfx z0.s, p0/z, z7.s
+// CHECK-ENCODING: [0xe0,0x20,0x90,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 20 90 04 <unknown>
+
+lsr z0.s, p0/m, z0.s, z1.d
+// CHECK-INST: lsr z0.s, p0/m, z0.s, z1.d
+// CHECK-ENCODING: [0x20,0x80,0x99,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 80 99 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+lsr z0.s, p0/m, z0.s, z1.d
+// CHECK-INST: lsr z0.s, p0/m, z0.s, z1.d
+// CHECK-ENCODING: [0x20,0x80,0x99,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 80 99 04 <unknown>
diff --git a/test/MC/AArch64/SVE/lsrr.s b/test/MC/AArch64/SVE/lsrr.s
index a6bca70103a5..ec87a711dbfa 100644
--- a/test/MC/AArch64/SVE/lsrr.s
+++ b/test/MC/AArch64/SVE/lsrr.s
@@ -30,3 +30,31 @@ lsrr z0.d, p0/m, z0.d, z0.d
// CHECK-ENCODING: [0x00,0x80,0xd5,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 00 80 d5 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z5.d, p0/z, z7.d
+// CHECK-INST: movprfx z5.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe5,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 20 d0 04 <unknown>
+
+lsrr z5.d, p0/m, z5.d, z0.d
+// CHECK-INST: lsrr z5.d, p0/m, z5.d, z0.d
+// CHECK-ENCODING: [0x05,0x80,0xd5,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 80 d5 04 <unknown>
+
+movprfx z5, z7
+// CHECK-INST: movprfx z5, z7
+// CHECK-ENCODING: [0xe5,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 bc 20 04 <unknown>
+
+lsrr z5.d, p0/m, z5.d, z0.d
+// CHECK-INST: lsrr z5.d, p0/m, z5.d, z0.d
+// CHECK-ENCODING: [0x05,0x80,0xd5,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 80 d5 04 <unknown>
diff --git a/test/MC/AArch64/SVE/mad.s b/test/MC/AArch64/SVE/mad.s
index 3a5d81e32611..b9712d6ef3bb 100644
--- a/test/MC/AArch64/SVE/mad.s
+++ b/test/MC/AArch64/SVE/mad.s
@@ -30,3 +30,31 @@ mad z0.d, p7/m, z1.d, z31.d
// CHECK-ENCODING: [0xe0,0xdf,0xc1,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 df c1 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+mad z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: mad z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0xe0,0xdf,0xc1,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 df c1 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+mad z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: mad z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0xe0,0xdf,0xc1,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 df c1 04 <unknown>
diff --git a/test/MC/AArch64/SVE/mla.s b/test/MC/AArch64/SVE/mla.s
index 4911e6afd925..d76ee0996a8e 100644
--- a/test/MC/AArch64/SVE/mla.s
+++ b/test/MC/AArch64/SVE/mla.s
@@ -30,3 +30,31 @@ mla z0.d, p7/m, z1.d, z31.d
// CHECK-ENCODING: [0x20,0x5c,0xdf,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 5c df 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+mla z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: mla z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x5c,0xdf,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 5c df 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+mla z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: mla z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x5c,0xdf,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 5c df 04 <unknown>
diff --git a/test/MC/AArch64/SVE/mls.s b/test/MC/AArch64/SVE/mls.s
index 8c088fdd98b0..32d3d0e2dd8b 100644
--- a/test/MC/AArch64/SVE/mls.s
+++ b/test/MC/AArch64/SVE/mls.s
@@ -30,3 +30,31 @@ mls z0.d, p7/m, z1.d, z31.d
// CHECK-ENCODING: [0x20,0x7c,0xdf,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 7c df 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+mls z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: mls z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x7c,0xdf,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 7c df 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+mls z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: mls z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x7c,0xdf,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 7c df 04 <unknown>
diff --git a/test/MC/AArch64/SVE/mov-diagnostics.s b/test/MC/AArch64/SVE/mov-diagnostics.s
index 8f0eef0fa4d0..23b8b55be96e 100644
--- a/test/MC/AArch64/SVE/mov-diagnostics.s
+++ b/test/MC/AArch64/SVE/mov-diagnostics.s
@@ -412,3 +412,79 @@ mov z24.q, z21.q[4]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: mov z24.q, z21.q[4]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.b, p0/z, z6.b
+mov z31.b, wsp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: mov z31.b, wsp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+mov z31.b, wsp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: mov z31.b, wsp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+mov z0.d, #0xe0000000000003ff
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: mov z0.d, #0xe0000000000003ff
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+mov z0.d, #0xe0000000000003ff
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: mov z0.d, #0xe0000000000003ff
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z4.d, p7/z, z6.d
+mov z4.d, p7/m, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: mov z4.d, p7/m, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+mov z31.d, p15/m, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: mov z31.d, p15/m, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+mov z0.d, d0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: mov z0.d, d0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+mov z0.d, d0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: mov z0.d, d0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.d, p0/z, z6.d
+mov z31.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: mov z31.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+mov z31.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: mov z31.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.d, p0/z, z6.d
+mov z31.d, z31.d[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: mov z31.d, z31.d[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+mov z31.d, z31.d[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: mov z31.d, z31.d[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/mov.s b/test/MC/AArch64/SVE/mov.s
index c0a50e39c706..5704e50c941a 100644
--- a/test/MC/AArch64/SVE/mov.s
+++ b/test/MC/AArch64/SVE/mov.s
@@ -660,3 +660,79 @@ mov p15.b, p15/z, p15.b
// CHECK-ENCODING: [0xef,0x7d,0x0f,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ef 7d 0f 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p7/z, z6.d
+// CHECK-INST: movprfx z31.d, p7/z, z6.d
+// CHECK-ENCODING: [0xdf,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 3c d0 04 <unknown>
+
+mov z31.d, p7/m, sp
+// CHECK-INST: mov z31.d, p7/m, sp
+// CHECK-ENCODING: [0xff,0xbf,0xe8,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff bf e8 05 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+mov z31.d, p7/m, sp
+// CHECK-INST: mov z31.d, p7/m, sp
+// CHECK-ENCODING: [0xff,0xbf,0xe8,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff bf e8 05 <unknown>
+
+movprfx z21.d, p7/z, z28.d
+// CHECK-INST: movprfx z21.d, p7/z, z28.d
+// CHECK-ENCODING: [0x95,0x3f,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 3f d0 04 <unknown>
+
+mov z21.d, p7/m, #-128, lsl #8
+// CHECK-INST: mov z21.d, p7/m, #-32768
+// CHECK-ENCODING: [0x15,0x70,0xd7,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 15 70 d7 05 <unknown>
+
+movprfx z21, z28
+// CHECK-INST: movprfx z21, z28
+// CHECK-ENCODING: [0x95,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 bf 20 04 <unknown>
+
+mov z21.d, p15/m, #-128, lsl #8
+// CHECK-INST: mov z21.d, p15/m, #-32768
+// CHECK-ENCODING: [0x15,0x70,0xdf,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 15 70 df 05 <unknown>
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+mov z4.d, p7/m, d31
+// CHECK-INST: mov z4.d, p7/m, d31
+// CHECK-ENCODING: [0xe4,0x9f,0xe0,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 9f e0 05 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+mov z4.d, p7/m, d31
+// CHECK-INST: mov z4.d, p7/m, d31
+// CHECK-ENCODING: [0xe4,0x9f,0xe0,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 9f e0 05 <unknown>
diff --git a/test/MC/AArch64/SVE/movprfx-diagnostics.s b/test/MC/AArch64/SVE/movprfx-diagnostics.s
new file mode 100644
index 000000000000..56b1f5cfc725
--- /dev/null
+++ b/test/MC/AArch64/SVE/movprfx-diagnostics.s
@@ -0,0 +1,193 @@
+// RUN: not llvm-mc -triple=aarch64-none-linux-gnu -show-encoding -mattr=+sve 2>&1 < %s | FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Different destination register (unary)
+
+movprfx z0, z1
+abs z2.d, p0/m, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx writing to a different destination
+// CHECK-NEXT: abs z2.d, p0/m, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Different destination register (binary)
+
+movprfx z0, z1
+add z2.d, p0/m, z2.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx writing to a different destination
+// CHECK-NEXT: add z2.d, p0/m, z2.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Different destination register (wide element)
+
+movprfx z0, z1
+asr z2.s, p0/m, z2.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx writing to a different destination
+// CHECK-NEXT: asr z2.s, p0/m, z2.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Different destination register (ternary)
+
+movprfx z0, z1
+mla z3.d, p0/m, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx writing to a different destination
+// CHECK-NEXT: mla z3.d, p0/m, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Destination used in other operand (unary)
+
+movprfx z0, z1
+abs z0.d, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx and destination also used as non-destructive source
+// CHECK-NEXT: abs z0.d, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z1.d
+cpy z0.d, p0/m, d0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx and destination also used as non-destructive source
+// CHECK-NEXT: cpy z0.d, p0/m, d0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z1.d
+mov z0.d, p0/m, d0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx and destination also used as non-destructive source
+// CHECK-NEXT: mov z0.d, p0/m, d0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Destination used in other operand (binary)
+
+movprfx z0, z1
+add z0.d, p0/m, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx and destination also used as non-destructive source
+// CHECK-NEXT: add z0.d, p0/m, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Destination used in other operand (wide element)
+
+movprfx z0, z1
+asr z0.s, p0/m, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx and destination also used as non-destructive source
+// CHECK-NEXT: asr z0.s, p0/m, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Destination used in other operand (ternary)
+
+movprfx z0, z1
+mla z0.d, p0/m, z0.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx and destination also used as non-destructive source
+// CHECK-NEXT: mla z0.d, p0/m, z0.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Different general predicate (unary)
+
+movprfx z0.d, p0/m, z1.d
+abs z0.d, p1/m, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx using a different general predicate
+// CHECK-NEXT: abs z0.d, p1/m, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Different general predicate (binary)
+
+movprfx z0.d, p0/m, z1.d
+add z0.d, p1/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx using a different general predicate
+// CHECK-NEXT: add z0.d, p1/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Different general predicate (wide element)
+
+movprfx z0.d, p0/m, z1.d
+asr z0.s, p1/m, z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx using a different general predicate
+// CHECK-NEXT: asr z0.s, p1/m, z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Different general predicate (ternary)
+
+movprfx z0.d, p0/m, z1.d
+mla z0.d, p1/m, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx using a different general predicate
+// CHECK-NEXT: mla z0.d, p1/m, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Different element size (unary)
+
+movprfx z0.s, p0/m, z1.s
+abs z0.d, p0/m, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
+// CHECK-NEXT: abs z0.d, p0/m, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Different element size (binary)
+
+movprfx z0.s, p0/m, z1.s
+add z0.d, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
+// CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Different element size (wide element)
+
+movprfx z0.d, p0/m, z1.d
+asr z0.s, p0/m, z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
+// CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Different element size (ternary)
+
+movprfx z0.s, p0/m, z1.s
+mla z0.d, p0/m, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
+// CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Predicated movprfx with non-predicated instruction.
+
+movprfx z0.d, p0/m, z1.d
+add z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: add z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Ensure we don't try to apply a prefix to subsequent instructions (upon failure)
+
+movprfx z0, z1
+add z0.d, z1.d, z2.d
+add z0.d, z1.d, z2.d
+// CHECK: [[@LINE-2]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: add z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-3]]:{{[0-9]+}}:
+// CHECK: add z0.d, z1.d, z2.d
diff --git a/test/MC/AArch64/SVE/movprfx.s b/test/MC/AArch64/SVE/movprfx.s
new file mode 100644
index 000000000000..8065967ebc3a
--- /dev/null
+++ b/test/MC/AArch64/SVE/movprfx.s
@@ -0,0 +1,97 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+// This test file is mostly empty because most 'movprfx' tests are embedded
+// with other instructions that are destructive and can be prefixed
+// by the movprfx instruction. A list of destructive instructions
+// is given below by their mnemonic, which have tests in corresponding
+// <mnemonic>.s test files:
+//
+// abs decp fdivr fnmla fsubr mov sdivr sqincw umulh
+// add eon fmad fnmls ftmad msb sdot sqsub uqadd
+// and eor fmax fnmsb incd mul smax sub uqdecd
+// asr ext fmaxnm frecpx inch neg smin subr uqdech
+// asrd fabd fmin frinta incp not smulh sxtb uqdecp
+// asrr fabs fminnm frinti incw orn splice sxth uqdecw
+// bic fadd fmla frintm insr orr sqadd sxtw uqincd
+// clasta fcadd fmls frintn lsl rbit sqdecd uabd uqinch
+// clastb fcmla fmov frintp lslr revb sqdech ucvtf uqincp
+// cls fcpy fmsb frintx lsr revh sqdecp udiv uqincw
+// clz fcvt fmul frintz lsrr revw sqdecw udivr uqsub
+// cnot fcvtzs fmulx fscale mad sabd sqincd udot uxtb
+// cnt fcvtzu fneg fsqrt mla scvtf sqinch umax uxth
+// cpy fdiv fnmad fsub mls sdiv sqincp umin uxtw
+
+
+// ------------------------------------------------------------------------- //
+// Test compatibility with MOVPRFX instruction with BRK and HLT.
+//
+// Section 7.1.2 of the SVE Architecture Reference Manual Supplement:
+// "it is permitted to use MOVPRFX to prefix an A64 BRK or HLT instruction"
+
+movprfx z0, z1
+// CHECK-INST: movprfx z0, z1
+// CHECK-ENCODING: [0x20,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 bc 20 04 <unknown>
+
+hlt #1
+// CHECK-INST: hlt #0x1
+// CHECK-ENCODING: [0x20,0x00,0x40,0xd4]
+
+movprfx z0.d, p0/z, z1.d
+// CHECK-INST: movprfx z0.d, p0/z, z1.d
+// CHECK-ENCODING: [0x20,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 20 d0 04 <unknown>
+
+hlt #1
+// CHECK-INST: hlt #0x1
+// CHECK-ENCODING: [0x20,0x00,0x40,0xd4]
+
+movprfx z0, z1
+// CHECK-INST: movprfx z0, z1
+// CHECK-ENCODING: [0x20,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 bc 20 04 <unknown>
+
+brk #1
+// CHECK-INST: brk #0x1
+// CHECK-ENCODING: [0x20,0x00,0x20,0xd4]
+
+movprfx z0.d, p0/z, z1.d
+// CHECK-INST: movprfx z0.d, p0/z, z1.d
+// CHECK-ENCODING: [0x20,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 20 d0 04 <unknown>
+
+brk #1
+// CHECK-INST: brk #0x1
+// CHECK-ENCODING: [0x20,0x00,0x20,0xd4]
+
+// ------------------------------------------------------------------------- //
+// Ensure we don't try to apply a prefix to subsequent instructions (upon success)
+
+movprfx z0, z1
+// CHECK-INST: movprfx z0, z1
+// CHECK-ENCODING: [0x20,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 bc 20 04 <unknown>
+
+add z0.d, p0/m, z0.d, z1.d
+// CHECK-INST: add z0.d, p0/m, z0.d, z1.d
+// CHECK-ENCODING: [0x20,0x00,0xc0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 00 c0 04 <unknown>
+
+add z0.d, p0/m, z0.d, z1.d
+// CHECK-INST: add z0.d, p0/m, z0.d, z1.d
+// CHECK-ENCODING: [0x20,0x00,0xc0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 00 c0 04 <unknown>
diff --git a/test/MC/AArch64/SVE/msb.s b/test/MC/AArch64/SVE/msb.s
index 048f32160e12..94715616db51 100644
--- a/test/MC/AArch64/SVE/msb.s
+++ b/test/MC/AArch64/SVE/msb.s
@@ -30,3 +30,31 @@ msb z0.d, p7/m, z1.d, z31.d
// CHECK-ENCODING: [0xe0,0xff,0xc1,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 ff c1 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+msb z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: msb z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0xe0,0xff,0xc1,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 ff c1 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+msb z0.d, p7/m, z1.d, z31.d
+// CHECK-INST: msb z0.d, p7/m, z1.d, z31.d
+// CHECK-ENCODING: [0xe0,0xff,0xc1,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 ff c1 04 <unknown>
diff --git a/test/MC/AArch64/SVE/mul-diagnostics.s b/test/MC/AArch64/SVE/mul-diagnostics.s
index 745c35d613f8..de08aff0fd5a 100644
--- a/test/MC/AArch64/SVE/mul-diagnostics.s
+++ b/test/MC/AArch64/SVE/mul-diagnostics.s
@@ -36,3 +36,13 @@ mul z0.b, p8/m, z0.b, z1.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: mul z0.b, p8/m, z0.b, z1.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+mul z31.d, z31.d, #127
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: mul z31.d, z31.d, #127
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/mul.s b/test/MC/AArch64/SVE/mul.s
index f83b88ee7125..d1a1f867847f 100644
--- a/test/MC/AArch64/SVE/mul.s
+++ b/test/MC/AArch64/SVE/mul.s
@@ -78,3 +78,43 @@ mul z31.d, z31.d, #127
// CHECK-ENCODING: [0xff,0xcf,0xf0,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff cf f0 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+mul z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: mul z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d0 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+mul z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: mul z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d0 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+mul z31.d, z31.d, #127
+// CHECK-INST: mul z31.d, z31.d, #127
+// CHECK-ENCODING: [0xff,0xcf,0xf0,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff cf f0 25 <unknown>
diff --git a/test/MC/AArch64/SVE/neg.s b/test/MC/AArch64/SVE/neg.s
index 3ad4abf463cc..437f0e959b2d 100644
--- a/test/MC/AArch64/SVE/neg.s
+++ b/test/MC/AArch64/SVE/neg.s
@@ -54,3 +54,31 @@ neg z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xd7,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf d7 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+neg z4.d, p7/m, z31.d
+// CHECK-INST: neg z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd7,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d7 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+neg z4.d, p7/m, z31.d
+// CHECK-INST: neg z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd7,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d7 04 <unknown>
diff --git a/test/MC/AArch64/SVE/not.s b/test/MC/AArch64/SVE/not.s
index 67ebce236c26..35e72bb06ac1 100644
--- a/test/MC/AArch64/SVE/not.s
+++ b/test/MC/AArch64/SVE/not.s
@@ -42,3 +42,31 @@ not p15.b, p15/z, p15.b
// CHECK-ENCODING: [0xef,0x7f,0x0f,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ef 7f 0f 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+not z4.d, p7/m, z31.d
+// CHECK-INST: not z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xde,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf de 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+not z4.d, p7/m, z31.d
+// CHECK-INST: not z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xde,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf de 04 <unknown>
diff --git a/test/MC/AArch64/SVE/orn-diagnostics.s b/test/MC/AArch64/SVE/orn-diagnostics.s
index d33cf8009068..a9db36ba258c 100644
--- a/test/MC/AArch64/SVE/orn-diagnostics.s
+++ b/test/MC/AArch64/SVE/orn-diagnostics.s
@@ -77,3 +77,13 @@ orn p0.b, p0/m, p1.b, p2.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: orn p0.b, p0/m, p1.b, p2.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+orn z0.d, z0.d, #0x6
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: orn z0.d, z0.d, #0x6
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/orn.s b/test/MC/AArch64/SVE/orn.s
index 89b8ea5795e8..1028414070f0 100644
--- a/test/MC/AArch64/SVE/orn.s
+++ b/test/MC/AArch64/SVE/orn.s
@@ -66,3 +66,19 @@ orn p15.b, p15/z, p15.b, p15.b
// CHECK-ENCODING: [0xff,0x7d,0x8f,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff 7d 8f 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+orn z0.d, z0.d, #0x6
+// CHECK-INST: orr z0.d, z0.d, #0xfffffffffffffff9
+// CHECK-ENCODING: [0xa0,0xef,0x03,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: a0 ef 03 05 <unknown>
diff --git a/test/MC/AArch64/SVE/orr-diagnostics.s b/test/MC/AArch64/SVE/orr-diagnostics.s
index bcea515cc131..7038ea41f470 100644
--- a/test/MC/AArch64/SVE/orr-diagnostics.s
+++ b/test/MC/AArch64/SVE/orr-diagnostics.s
@@ -92,3 +92,37 @@ orr p0.b, p0/m, p1.b, p2.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: orr p0.b, p0/m, p1.b, p2.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+orr z0.d, z0.d, #0x6
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: orr z0.d, z0.d, #0x6
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23.d, p0/z, z30.d
+orr z23.d, z13.d, z8.d // should not use mov-alias
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: orr z23.d, z13.d, z8.d // should not use mov-alias
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23, z30
+orr z23.d, z13.d, z8.d // should not use mov-alias
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: orr z23.d, z13.d, z8.d // should not use mov-alias
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+orr z0.d, z0.d, z0.d // should use mov-alias
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: orr z0.d, z0.d, z0.d // should use mov-alias
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+orr z0.d, z0.d, z0.d // should use mov-alias
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: orr z0.d, z0.d, z0.d // should use mov-alias
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/orr.s b/test/MC/AArch64/SVE/orr.s
index 1cf48fdd7a03..7d00cd8c6cf1 100644
--- a/test/MC/AArch64/SVE/orr.s
+++ b/test/MC/AArch64/SVE/orr.s
@@ -110,3 +110,43 @@ orr p15.b, p15/z, p15.b, p15.b
// CHECK-ENCODING: [0xef,0x7d,0x8f,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ef 7d 8f 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+orr z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: orr z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xd8,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f d8 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+orr z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: orr z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xd8,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f d8 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+orr z0.d, z0.d, #0x6
+// CHECK-INST: orr z0.d, z0.d, #0x6
+// CHECK-ENCODING: [0x20,0xf8,0x03,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 f8 03 05 <unknown>
diff --git a/test/MC/AArch64/SVE/orv-diagnostics.s b/test/MC/AArch64/SVE/orv-diagnostics.s
index 8a64ad89c005..1892321be28b 100644
--- a/test/MC/AArch64/SVE/orv-diagnostics.s
+++ b/test/MC/AArch64/SVE/orv-diagnostics.s
@@ -31,4 +31,19 @@ orv v0.2d, p7, z31.d
orv h0, p8, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: orv h0, p8, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+orv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: orv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+orv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: orv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/pfalse-diagnostics.s b/test/MC/AArch64/SVE/pfalse-diagnostics.s
new file mode 100644
index 000000000000..1a4047d8d80b
--- /dev/null
+++ b/test/MC/AArch64/SVE/pfalse-diagnostics.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Only .b is supported
+
+pfalse p15.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: pfalse p15.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/pfalse.s b/test/MC/AArch64/SVE/pfalse.s
new file mode 100644
index 000000000000..b1385d8b863f
--- /dev/null
+++ b/test/MC/AArch64/SVE/pfalse.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+pfalse p15.b
+// CHECK-INST: pfalse p15.b
+// CHECK-ENCODING: [0x0f,0xe4,0x18,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f e4 18 25 <unknown>
diff --git a/test/MC/AArch64/SVE/pfirst-diagnostics.s b/test/MC/AArch64/SVE/pfirst-diagnostics.s
new file mode 100644
index 000000000000..6ed891a59f24
--- /dev/null
+++ b/test/MC/AArch64/SVE/pfirst-diagnostics.s
@@ -0,0 +1,19 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Only .b is supported
+
+pfirst p0.h, p15, p0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: pfirst p0.h, p15, p0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Tied operands must match
+
+pfirst p0.b, p15, p1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: pfirst p0.b, p15, p1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/pfirst.s b/test/MC/AArch64/SVE/pfirst.s
new file mode 100644
index 000000000000..8090bf72a0ef
--- /dev/null
+++ b/test/MC/AArch64/SVE/pfirst.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+pfirst p0.b, p15, p0.b
+// CHECK-INST: pfirst p0.b, p15, p0.b
+// CHECK-ENCODING: [0xe0,0xc1,0x58,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c1 58 25 <unknown>
+
+pfirst p15.b, p15, p15.b
+// CHECK-INST: pfirst p15.b, p15, p15.b
+// CHECK-ENCODING: [0xef,0xc1,0x58,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ef c1 58 25 <unknown>
diff --git a/test/MC/AArch64/SVE/pnext-diagnostics.s b/test/MC/AArch64/SVE/pnext-diagnostics.s
new file mode 100644
index 000000000000..e8ee5669dfea
--- /dev/null
+++ b/test/MC/AArch64/SVE/pnext-diagnostics.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Tied operands must match
+
+pnext p0.b, p15, p1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: pnext p0.b, p15, p1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/pnext.s b/test/MC/AArch64/SVE/pnext.s
new file mode 100644
index 000000000000..3d788deb05c4
--- /dev/null
+++ b/test/MC/AArch64/SVE/pnext.s
@@ -0,0 +1,38 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+pnext p15.b, p15, p15.b
+// CHECK-INST: pnext p15.b, p15, p15.b
+// CHECK-ENCODING: [0xef,0xc5,0x19,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ef c5 19 25 <unknown>
+
+pnext p0.b, p15, p0.b
+// CHECK-INST: pnext p0.b, p15, p0.b
+// CHECK-ENCODING: [0xe0,0xc5,0x19,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c5 19 25 <unknown>
+
+pnext p0.h, p15, p0.h
+// CHECK-INST: pnext p0.h, p15, p0.h
+// CHECK-ENCODING: [0xe0,0xc5,0x59,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c5 59 25 <unknown>
+
+pnext p0.s, p15, p0.s
+// CHECK-INST: pnext p0.s, p15, p0.s
+// CHECK-ENCODING: [0xe0,0xc5,0x99,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c5 99 25 <unknown>
+
+pnext p0.d, p15, p0.d
+// CHECK-INST: pnext p0.d, p15, p0.d
+// CHECK-ENCODING: [0xe0,0xc5,0xd9,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c5 d9 25 <unknown>
diff --git a/test/MC/AArch64/SVE/prfb-diagnostics.s b/test/MC/AArch64/SVE/prfb-diagnostics.s
index 2ffdc6ff6a00..24466ed79480 100644
--- a/test/MC/AArch64/SVE/prfb-diagnostics.s
+++ b/test/MC/AArch64/SVE/prfb-diagnostics.s
@@ -128,3 +128,19 @@ prfb #0, p8, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: prfb #0, p8, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+prfb pldl1keep, p0, [x0, z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: prfb pldl1keep, p0, [x0, z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+prfb pldl1keep, p0, [x0, z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: prfb pldl1keep, p0, [x0, z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/prfd-diagnostics.s b/test/MC/AArch64/SVE/prfd-diagnostics.s
index bca2f5b71218..2fb6a296576f 100644
--- a/test/MC/AArch64/SVE/prfd-diagnostics.s
+++ b/test/MC/AArch64/SVE/prfd-diagnostics.s
@@ -114,3 +114,19 @@ prfd #0, p8, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: prfd #0, p8, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+prfd pldl1keep, p0, [x0, z0.d, lsl #3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: prfd pldl1keep, p0, [x0, z0.d, lsl #3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+prfd pldl1keep, p0, [x0, z0.d, lsl #3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: prfd pldl1keep, p0, [x0, z0.d, lsl #3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/prfh-diagnostics.s b/test/MC/AArch64/SVE/prfh-diagnostics.s
index 0265c9f86a31..a9f8ad84c65c 100644
--- a/test/MC/AArch64/SVE/prfh-diagnostics.s
+++ b/test/MC/AArch64/SVE/prfh-diagnostics.s
@@ -153,3 +153,19 @@ prfh #0, p8, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: prfh #0, p8, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+prfh pldl1keep, p0, [x0, z0.d, lsl #1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: prfh pldl1keep, p0, [x0, z0.d, lsl #1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+prfh pldl1keep, p0, [x0, z0.d, lsl #1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: prfh pldl1keep, p0, [x0, z0.d, lsl #1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/prfw-diagnostics.s b/test/MC/AArch64/SVE/prfw-diagnostics.s
index 06bc54d2ed0f..510c383f535f 100644
--- a/test/MC/AArch64/SVE/prfw-diagnostics.s
+++ b/test/MC/AArch64/SVE/prfw-diagnostics.s
@@ -154,3 +154,31 @@ prfw #0, p8, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: prfw #0, p8, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z8.d, p3/z, z15.d
+prfw #7, p3, [x13, z8.d, uxtw #2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: prfw #7, p3, [x13, z8.d, uxtw #2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z8, z15
+prfw #7, p3, [x13, z8.d, uxtw #2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: prfw #7, p3, [x13, z8.d, uxtw #2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21.d, p5/z, z28.d
+prfw pldl3strm, p5, [x10, z21.d, lsl #2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: prfw pldl3strm, p5, [x10, z21.d, lsl #2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+prfw pldl3strm, p5, [x10, z21.d, lsl #2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: prfw pldl3strm, p5, [x10, z21.d, lsl #2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ptest-diagnostics.s b/test/MC/AArch64/SVE/ptest-diagnostics.s
new file mode 100644
index 000000000000..f03bf4c60f88
--- /dev/null
+++ b/test/MC/AArch64/SVE/ptest-diagnostics.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Only .b is supported
+
+ptest p15, p15.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
+// CHECK-NEXT: ptest p15, p15.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ptest.s b/test/MC/AArch64/SVE/ptest.s
new file mode 100644
index 000000000000..29622c08c92f
--- /dev/null
+++ b/test/MC/AArch64/SVE/ptest.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ptest p15, p0.b
+// CHECK-INST: ptest p15, p0.b
+// CHECK-ENCODING: [0x00,0xfc,0x50,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 fc 50 25 <unknown>
+
+ptest p15, p15.b
+// CHECK-INST: ptest p15, p15.b
+// CHECK-ENCODING: [0xe0,0xfd,0x50,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 fd 50 25 <unknown>
diff --git a/test/MC/AArch64/SVE/rbit.s b/test/MC/AArch64/SVE/rbit.s
index 9d017d9ce45b..2acba0ebc4e6 100644
--- a/test/MC/AArch64/SVE/rbit.s
+++ b/test/MC/AArch64/SVE/rbit.s
@@ -30,3 +30,31 @@ rbit z0.d, p7/m, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xe7,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f e7 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+rbit z0.d, p7/m, z31.d
+// CHECK-INST: rbit z0.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xe7,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f e7 05 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+rbit z0.d, p7/m, z31.d
+// CHECK-INST: rbit z0.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xe7,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f e7 05 <unknown>
diff --git a/test/MC/AArch64/SVE/rev-diagnostics.s b/test/MC/AArch64/SVE/rev-diagnostics.s
new file mode 100644
index 000000000000..bad4b1655eee
--- /dev/null
+++ b/test/MC/AArch64/SVE/rev-diagnostics.s
@@ -0,0 +1,17 @@
+// RUN: not llvm-mc -triple=aarch64-none-linux-gnu -show-encoding -mattr=+sve 2>&1 < %s | FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+rev z0.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: rev z0.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+rev z0.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: rev z0.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/revb.s b/test/MC/AArch64/SVE/revb.s
index b80f9a5d0492..1393160b80d0 100644
--- a/test/MC/AArch64/SVE/revb.s
+++ b/test/MC/AArch64/SVE/revb.s
@@ -24,3 +24,31 @@ revb z0.d, p7/m, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xe4,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f e4 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+revb z0.d, p7/m, z31.d
+// CHECK-INST: revb z0.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xe4,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f e4 05 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+revb z0.d, p7/m, z31.d
+// CHECK-INST: revb z0.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xe4,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f e4 05 <unknown>
diff --git a/test/MC/AArch64/SVE/revh.s b/test/MC/AArch64/SVE/revh.s
index 8332461a1e69..d37b1bdec530 100644
--- a/test/MC/AArch64/SVE/revh.s
+++ b/test/MC/AArch64/SVE/revh.s
@@ -18,3 +18,31 @@ revh z0.d, p7/m, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xe5,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f e5 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+revh z0.d, p7/m, z31.d
+// CHECK-INST: revh z0.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xe5,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f e5 05 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+revh z0.d, p7/m, z31.d
+// CHECK-INST: revh z0.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xe5,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f e5 05 <unknown>
diff --git a/test/MC/AArch64/SVE/revw.s b/test/MC/AArch64/SVE/revw.s
index 095e2aacd875..c2e419fe2542 100644
--- a/test/MC/AArch64/SVE/revw.s
+++ b/test/MC/AArch64/SVE/revw.s
@@ -12,3 +12,31 @@ revw z0.d, p7/m, z31.d
// CHECK-ENCODING: [0xe0,0x9f,0xe6,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 9f e6 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+revw z0.d, p7/m, z31.d
+// CHECK-INST: revw z0.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xe6,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f e6 05 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+revw z0.d, p7/m, z31.d
+// CHECK-INST: revw z0.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe0,0x9f,0xe6,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 9f e6 05 <unknown>
diff --git a/test/MC/AArch64/SVE/sabd.s b/test/MC/AArch64/SVE/sabd.s
index 0636d3f76284..4186ff048299 100644
--- a/test/MC/AArch64/SVE/sabd.s
+++ b/test/MC/AArch64/SVE/sabd.s
@@ -30,3 +30,31 @@ sabd z31.d, p7/m, z31.d, z31.d
// CHECK-ENCODING: [0xff,0x1f,0xcc,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff 1f cc 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+sabd z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: sabd z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xcc,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f cc 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+sabd z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: sabd z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xcc,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f cc 04 <unknown>
diff --git a/test/MC/AArch64/SVE/saddv-diagnostics.s b/test/MC/AArch64/SVE/saddv-diagnostics.s
index e387e07735ad..d99433222e3e 100644
--- a/test/MC/AArch64/SVE/saddv-diagnostics.s
+++ b/test/MC/AArch64/SVE/saddv-diagnostics.s
@@ -31,4 +31,19 @@ saddv d0, p7, z31.d
saddv d0, p8, z31.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: saddv d0, p8, z31.b
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.s, p7/z, z6.s
+saddv d0, p7, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: saddv d0, p7, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+saddv d0, p7, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: saddv d0, p7, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/scvtf.s b/test/MC/AArch64/SVE/scvtf.s
index 31dec7e144a4..8089e4a0c21f 100644
--- a/test/MC/AArch64/SVE/scvtf.s
+++ b/test/MC/AArch64/SVE/scvtf.s
@@ -48,3 +48,31 @@ scvtf z0.d, p0/m, z0.d
// CHECK-ENCODING: [0x00,0xa0,0xd6,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 00 a0 d6 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z5.d, p0/z, z7.d
+// CHECK-INST: movprfx z5.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe5,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 20 d0 04 <unknown>
+
+scvtf z5.d, p0/m, z0.d
+// CHECK-INST: scvtf z5.d, p0/m, z0.d
+// CHECK-ENCODING: [0x05,0xa0,0xd6,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 a0 d6 65 <unknown>
+
+movprfx z5, z7
+// CHECK-INST: movprfx z5, z7
+// CHECK-ENCODING: [0xe5,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 bc 20 04 <unknown>
+
+scvtf z5.d, p0/m, z0.d
+// CHECK-INST: scvtf z5.d, p0/m, z0.d
+// CHECK-ENCODING: [0x05,0xa0,0xd6,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 a0 d6 65 <unknown>
diff --git a/test/MC/AArch64/SVE/sdiv.s b/test/MC/AArch64/SVE/sdiv.s
index 565e9f63cfa8..5b617eb763e5 100644
--- a/test/MC/AArch64/SVE/sdiv.s
+++ b/test/MC/AArch64/SVE/sdiv.s
@@ -18,3 +18,31 @@ sdiv z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x1f,0xd4,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 1f d4 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+sdiv z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: sdiv z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd4,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d4 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sdiv z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: sdiv z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd4,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d4 04 <unknown>
diff --git a/test/MC/AArch64/SVE/sdivr.s b/test/MC/AArch64/SVE/sdivr.s
index 02c8a3052c05..b85cecedf30d 100644
--- a/test/MC/AArch64/SVE/sdivr.s
+++ b/test/MC/AArch64/SVE/sdivr.s
@@ -18,3 +18,31 @@ sdivr z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x1f,0xd6,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 1f d6 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+sdivr z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: sdivr z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd6,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d6 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sdivr z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: sdivr z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd6,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d6 04 <unknown>
diff --git a/test/MC/AArch64/SVE/sdot-diagnostics.s b/test/MC/AArch64/SVE/sdot-diagnostics.s
index cc22f21aa24d..622e27422991 100644
--- a/test/MC/AArch64/SVE/sdot-diagnostics.s
+++ b/test/MC/AArch64/SVE/sdot-diagnostics.s
@@ -56,3 +56,19 @@ sdot z0.d, z1.h, z15.h[2]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
// CHECK-NEXT: sdot z0.d, z1.h, z15.h[2]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+sdot z0.d, z1.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sdot z0.d, z1.h, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+sdot z0.d, z1.h, z15.h[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sdot z0.d, z1.h, z15.h[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sdot.s b/test/MC/AArch64/SVE/sdot.s
index dbc6b3eabc05..a7d7e7ecb6da 100644
--- a/test/MC/AArch64/SVE/sdot.s
+++ b/test/MC/AArch64/SVE/sdot.s
@@ -30,3 +30,31 @@ sdot z0.d, z1.h, z15.h[1]
// CHECK-ENCODING: [0x20,0x00,0xff,0x44]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 00 ff 44 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sdot z0.d, z1.h, z31.h
+// CHECK-INST: sdot z0.d, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x00,0xdf,0x44]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 00 df 44 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sdot z0.d, z1.h, z15.h[1]
+// CHECK-INST: sdot z0.d, z1.h, z15.h[1]
+// CHECK-ENCODING: [0x20,0x00,0xff,0x44]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 00 ff 44 <unknown>
diff --git a/test/MC/AArch64/SVE/sel-diagnostics.s b/test/MC/AArch64/SVE/sel-diagnostics.s
new file mode 100644
index 000000000000..760f2327f85a
--- /dev/null
+++ b/test/MC/AArch64/SVE/sel-diagnostics.s
@@ -0,0 +1,17 @@
+// RUN: not llvm-mc -triple=aarch64-none-linux-gnu -show-encoding -mattr=+sve 2>&1 < %s | FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z28.b, p7/z, z30.b
+sel z28.b, p7, z13.b, z8.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sel z28.b, p7, z13.b, z8.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z23, z30
+sel z23.b, p11, z13.b, z8.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sel z23.b, p11, z13.b, z8.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/smax-diagnostics.s b/test/MC/AArch64/SVE/smax-diagnostics.s
index ac1c286f26ef..77825e013c35 100644
--- a/test/MC/AArch64/SVE/smax-diagnostics.s
+++ b/test/MC/AArch64/SVE/smax-diagnostics.s
@@ -14,3 +14,13 @@ smax z0.b, p8/m, z0.b, z0.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: smax z0.b, p8/m, z0.b, z0.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+smax z31.d, z31.d, #127
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: smax z31.d, z31.d, #127
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/smax.s b/test/MC/AArch64/SVE/smax.s
index de0e9e2f5247..1fbcc84110e6 100644
--- a/test/MC/AArch64/SVE/smax.s
+++ b/test/MC/AArch64/SVE/smax.s
@@ -78,3 +78,43 @@ smax z31.d, p7/m, z31.d, z31.d
// CHECK-ENCODING: [0xff,0x1f,0xc8,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff 1f c8 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+smax z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: smax z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xc8,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f c8 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+smax z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: smax z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xc8,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f c8 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+smax z31.d, z31.d, #127
+// CHECK-INST: smax z31.d, z31.d, #127
+// CHECK-ENCODING: [0xff,0xcf,0xe8,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff cf e8 25 <unknown>
diff --git a/test/MC/AArch64/SVE/smaxv-diagnostics.s b/test/MC/AArch64/SVE/smaxv-diagnostics.s
index 62936022f44d..7486eb67d13c 100644
--- a/test/MC/AArch64/SVE/smaxv-diagnostics.s
+++ b/test/MC/AArch64/SVE/smaxv-diagnostics.s
@@ -31,4 +31,19 @@ smaxv v0.2d, p7, z31.d
smaxv h0, p8, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: smaxv h0, p8, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+smaxv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: smaxv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+smaxv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: smaxv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/smin-diagnostics.s b/test/MC/AArch64/SVE/smin-diagnostics.s
index efe7855c8315..987650901ea9 100644
--- a/test/MC/AArch64/SVE/smin-diagnostics.s
+++ b/test/MC/AArch64/SVE/smin-diagnostics.s
@@ -14,3 +14,13 @@ smin z0.b, p8/m, z0.b, z0.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: smin z0.b, p8/m, z0.b, z0.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+smin z31.d, z31.d, #127
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: smin z31.d, z31.d, #127
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/smin.s b/test/MC/AArch64/SVE/smin.s
index dd1676bf582e..da26ac04462f 100644
--- a/test/MC/AArch64/SVE/smin.s
+++ b/test/MC/AArch64/SVE/smin.s
@@ -78,3 +78,43 @@ smin z31.d, p7/m, z31.d, z31.d
// CHECK-ENCODING: [0xff,0x1f,0xca,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff 1f ca 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+smin z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: smin z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xca,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f ca 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+smin z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: smin z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xca,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f ca 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+smin z31.d, z31.d, #127
+// CHECK-INST: smin z31.d, z31.d, #127
+// CHECK-ENCODING: [0xff,0xcf,0xea,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff cf ea 25 <unknown>
diff --git a/test/MC/AArch64/SVE/sminv-diagnostics.s b/test/MC/AArch64/SVE/sminv-diagnostics.s
index 85f55772f89e..5f8a4e011e07 100644
--- a/test/MC/AArch64/SVE/sminv-diagnostics.s
+++ b/test/MC/AArch64/SVE/sminv-diagnostics.s
@@ -31,4 +31,19 @@ sminv v0.2d, p7, z31.d
sminv h0, p8, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: sminv h0, p8, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+sminv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sminv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+sminv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sminv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/smulh.s b/test/MC/AArch64/SVE/smulh.s
index 354dfd8c8417..7ef8d80dc786 100644
--- a/test/MC/AArch64/SVE/smulh.s
+++ b/test/MC/AArch64/SVE/smulh.s
@@ -30,3 +30,31 @@ smulh z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x1f,0xd2,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 1f d2 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+smulh z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: smulh z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd2,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d2 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+smulh z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: smulh z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd2,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d2 04 <unknown>
diff --git a/test/MC/AArch64/SVE/splice-diagnostics.s b/test/MC/AArch64/SVE/splice-diagnostics.s
index dbac7403c4cb..3cd659caad93 100644
--- a/test/MC/AArch64/SVE/splice-diagnostics.s
+++ b/test/MC/AArch64/SVE/splice-diagnostics.s
@@ -25,3 +25,13 @@ splice z0.b, p8, z0.b, z1.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: splice z0.b, p8, z0.b, z1.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z4.d, p7/z, z6.d
+splice z4.d, p7, z4.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: splice z4.d, p7, z4.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/splice.s b/test/MC/AArch64/SVE/splice.s
index a213b76d5091..719a761f3022 100644
--- a/test/MC/AArch64/SVE/splice.s
+++ b/test/MC/AArch64/SVE/splice.s
@@ -30,3 +30,19 @@ splice z31.d, p7, z31.d, z31.d
// CHECK-ENCODING: [0xff,0x9f,0xec,0x05]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff 9f ec 05 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+splice z4.d, p7, z4.d, z31.d
+// CHECK-INST: splice z4.d, p7, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x9f,0xec,0x05]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 9f ec 05 <unknown>
diff --git a/test/MC/AArch64/SVE/sqadd-diagnostics.s b/test/MC/AArch64/SVE/sqadd-diagnostics.s
index 92672b648a6f..ae07bb69f9be 100644
--- a/test/MC/AArch64/SVE/sqadd-diagnostics.s
+++ b/test/MC/AArch64/SVE/sqadd-diagnostics.s
@@ -86,3 +86,25 @@ sqadd z0.d, z0.d, #65536
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 255] or a multiple of 256 in range [256, 65280]
// CHECK-NEXT: sqadd z0.d, z0.d, #65536
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+sqadd z31.d, z31.d, #65280
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqadd z31.d, z31.d, #65280
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+sqadd z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqadd z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+sqadd z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqadd z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sqadd.s b/test/MC/AArch64/SVE/sqadd.s
index 49abd41a6ade..8f722ea1da48 100644
--- a/test/MC/AArch64/SVE/sqadd.s
+++ b/test/MC/AArch64/SVE/sqadd.s
@@ -115,3 +115,19 @@ sqadd z31.d, z31.d, #65280
// CHECK-ENCODING: [0xff,0xff,0xe4,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff ff e4 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+sqadd z31.d, z31.d, #65280
+// CHECK-INST: sqadd z31.d, z31.d, #65280
+// CHECK-ENCODING: [0xff,0xff,0xe4,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff ff e4 25 <unknown>
diff --git a/test/MC/AArch64/SVE/sqdecd-diagnostics.s b/test/MC/AArch64/SVE/sqdecd-diagnostics.s
index de2270c07005..658af848c363 100644
--- a/test/MC/AArch64/SVE/sqdecd-diagnostics.s
+++ b/test/MC/AArch64/SVE/sqdecd-diagnostics.s
@@ -79,3 +79,25 @@ sqdecd x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: sqdecd x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+sqdecd z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqdecd z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+sqdecd z0.d, pow2, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqdecd z0.d, pow2, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+sqdecd z0.d, pow2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqdecd z0.d, pow2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sqdecd.s b/test/MC/AArch64/SVE/sqdecd.s
index c240cc9eab88..107d630cb3e4 100644
--- a/test/MC/AArch64/SVE/sqdecd.s
+++ b/test/MC/AArch64/SVE/sqdecd.s
@@ -294,3 +294,43 @@ sqdecd x0, #28
// CHECK-ENCODING: [0x80,0xfb,0xf0,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 fb f0 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqdecd z0.d
+// CHECK-INST: sqdecd z0.d
+// CHECK-ENCODING: [0xe0,0xcb,0xe0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 cb e0 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqdecd z0.d, pow2, mul #16
+// CHECK-INST: sqdecd z0.d, pow2, mul #16
+// CHECK-ENCODING: [0x00,0xc8,0xef,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c8 ef 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqdecd z0.d, pow2
+// CHECK-INST: sqdecd z0.d, pow2
+// CHECK-ENCODING: [0x00,0xc8,0xe0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c8 e0 04 <unknown>
diff --git a/test/MC/AArch64/SVE/sqdech-diagnostics.s b/test/MC/AArch64/SVE/sqdech-diagnostics.s
index dd68b9d08f09..af9268cb6ee2 100644
--- a/test/MC/AArch64/SVE/sqdech-diagnostics.s
+++ b/test/MC/AArch64/SVE/sqdech-diagnostics.s
@@ -79,3 +79,25 @@ sqdech x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: sqdech x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.h, p0/z, z7.h
+sqdech z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqdech z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+sqdech z0.h, pow2, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqdech z0.h, pow2, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+sqdech z0.h, pow2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqdech z0.h, pow2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sqdech.s b/test/MC/AArch64/SVE/sqdech.s
index b0e8c8d39386..574316825b18 100644
--- a/test/MC/AArch64/SVE/sqdech.s
+++ b/test/MC/AArch64/SVE/sqdech.s
@@ -294,3 +294,43 @@ sqdech x0, #28
// CHECK-ENCODING: [0x80,0xfb,0x70,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 fb 70 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqdech z0.h
+// CHECK-INST: sqdech z0.h
+// CHECK-ENCODING: [0xe0,0xcb,0x60,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 cb 60 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqdech z0.h, pow2, mul #16
+// CHECK-INST: sqdech z0.h, pow2, mul #16
+// CHECK-ENCODING: [0x00,0xc8,0x6f,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c8 6f 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqdech z0.h, pow2
+// CHECK-INST: sqdech z0.h, pow2
+// CHECK-ENCODING: [0x00,0xc8,0x60,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c8 60 04 <unknown>
diff --git a/test/MC/AArch64/SVE/sqdecp-diagnostics.s b/test/MC/AArch64/SVE/sqdecp-diagnostics.s
index f3fca0f17c9f..80579b824210 100644
--- a/test/MC/AArch64/SVE/sqdecp-diagnostics.s
+++ b/test/MC/AArch64/SVE/sqdecp-diagnostics.s
@@ -51,3 +51,13 @@ sqdecp x0, p0.q
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
// CHECK-NEXT: sqdecp x0, p0.q
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+sqdecp z0.d, p0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqdecp z0.d, p0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sqdecp.s b/test/MC/AArch64/SVE/sqdecp.s
index 6b6abac8621f..2a56b182f6bb 100644
--- a/test/MC/AArch64/SVE/sqdecp.s
+++ b/test/MC/AArch64/SVE/sqdecp.s
@@ -72,3 +72,19 @@ sqdecp z0.d, p0
// CHECK-ENCODING: [0x00,0x80,0xea,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 00 80 ea 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqdecp z0.d, p0
+// CHECK-INST: sqdecp z0.d, p0
+// CHECK-ENCODING: [0x00,0x80,0xea,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 80 ea 25 <unknown>
diff --git a/test/MC/AArch64/SVE/sqdecw-diagnostics.s b/test/MC/AArch64/SVE/sqdecw-diagnostics.s
index f3b11c2732c4..97adc35cf16a 100644
--- a/test/MC/AArch64/SVE/sqdecw-diagnostics.s
+++ b/test/MC/AArch64/SVE/sqdecw-diagnostics.s
@@ -79,3 +79,25 @@ sqdecw x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: sqdecw x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/z, z7.s
+sqdecw z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqdecw z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+sqdecw z0.s, pow2, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqdecw z0.s, pow2, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+sqdecw z0.s, pow2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqdecw z0.s, pow2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sqdecw.s b/test/MC/AArch64/SVE/sqdecw.s
index ac456f3f8cc0..e48d5999d409 100644
--- a/test/MC/AArch64/SVE/sqdecw.s
+++ b/test/MC/AArch64/SVE/sqdecw.s
@@ -294,3 +294,43 @@ sqdecw x0, #28
// CHECK-ENCODING: [0x80,0xfb,0xb0,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 fb b0 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqdecw z0.s
+// CHECK-INST: sqdecw z0.s
+// CHECK-ENCODING: [0xe0,0xcb,0xa0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 cb a0 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqdecw z0.s, pow2, mul #16
+// CHECK-INST: sqdecw z0.s, pow2, mul #16
+// CHECK-ENCODING: [0x00,0xc8,0xaf,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c8 af 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqdecw z0.s, pow2
+// CHECK-INST: sqdecw z0.s, pow2
+// CHECK-ENCODING: [0x00,0xc8,0xa0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c8 a0 04 <unknown>
diff --git a/test/MC/AArch64/SVE/sqincd-diagnostics.s b/test/MC/AArch64/SVE/sqincd-diagnostics.s
index d2bad3bc6b0c..2e462e6f6dc1 100644
--- a/test/MC/AArch64/SVE/sqincd-diagnostics.s
+++ b/test/MC/AArch64/SVE/sqincd-diagnostics.s
@@ -79,3 +79,25 @@ sqincd x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: sqincd x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+sqincd z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqincd z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+sqincd z0.d, pow2, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqincd z0.d, pow2, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+sqincd z0.d, pow2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqincd z0.d, pow2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sqincd.s b/test/MC/AArch64/SVE/sqincd.s
index f6919ffdc9dd..440e4b708ffc 100644
--- a/test/MC/AArch64/SVE/sqincd.s
+++ b/test/MC/AArch64/SVE/sqincd.s
@@ -294,3 +294,43 @@ sqincd x0, #28
// CHECK-ENCODING: [0x80,0xf3,0xf0,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 f3 f0 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqincd z0.d
+// CHECK-INST: sqincd z0.d
+// CHECK-ENCODING: [0xe0,0xc3,0xe0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c3 e0 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqincd z0.d, pow2, mul #16
+// CHECK-INST: sqincd z0.d, pow2, mul #16
+// CHECK-ENCODING: [0x00,0xc0,0xef,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c0 ef 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqincd z0.d, pow2
+// CHECK-INST: sqincd z0.d, pow2
+// CHECK-ENCODING: [0x00,0xc0,0xe0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c0 e0 04 <unknown>
diff --git a/test/MC/AArch64/SVE/sqinch-diagnostics.s b/test/MC/AArch64/SVE/sqinch-diagnostics.s
index 3394a814a95a..1f9cb8f8b6b2 100644
--- a/test/MC/AArch64/SVE/sqinch-diagnostics.s
+++ b/test/MC/AArch64/SVE/sqinch-diagnostics.s
@@ -79,3 +79,25 @@ sqinch x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: sqinch x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.h, p0/z, z7.h
+sqinch z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqinch z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+sqinch z0.h, pow2, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqinch z0.h, pow2, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+sqinch z0.h, pow2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqinch z0.h, pow2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sqinch.s b/test/MC/AArch64/SVE/sqinch.s
index 41bdcb9f15be..6ed6dc4758e0 100644
--- a/test/MC/AArch64/SVE/sqinch.s
+++ b/test/MC/AArch64/SVE/sqinch.s
@@ -294,3 +294,43 @@ sqinch x0, #28
// CHECK-ENCODING: [0x80,0xf3,0x70,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 f3 70 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqinch z0.h
+// CHECK-INST: sqinch z0.h
+// CHECK-ENCODING: [0xe0,0xc3,0x60,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c3 60 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqinch z0.h, pow2, mul #16
+// CHECK-INST: sqinch z0.h, pow2, mul #16
+// CHECK-ENCODING: [0x00,0xc0,0x6f,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c0 6f 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqinch z0.h, pow2
+// CHECK-INST: sqinch z0.h, pow2
+// CHECK-ENCODING: [0x00,0xc0,0x60,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c0 60 04 <unknown>
diff --git a/test/MC/AArch64/SVE/sqincp-diagnostics.s b/test/MC/AArch64/SVE/sqincp-diagnostics.s
index 8b98e62e220d..9bd8587edb04 100644
--- a/test/MC/AArch64/SVE/sqincp-diagnostics.s
+++ b/test/MC/AArch64/SVE/sqincp-diagnostics.s
@@ -46,3 +46,13 @@ uqdecp x0, p0.q
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
// CHECK-NEXT: uqdecp x0, p0.q
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+sqincp z0.d, p0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqincp z0.d, p0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sqincp.s b/test/MC/AArch64/SVE/sqincp.s
index 6f812947df29..f7d427b0ca6e 100644
--- a/test/MC/AArch64/SVE/sqincp.s
+++ b/test/MC/AArch64/SVE/sqincp.s
@@ -72,3 +72,19 @@ sqincp z0.d, p0
// CHECK-ENCODING: [0x00,0x80,0xe8,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 00 80 e8 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqincp z0.d, p0
+// CHECK-INST: sqincp z0.d, p0
+// CHECK-ENCODING: [0x00,0x80,0xe8,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 80 e8 25 <unknown>
diff --git a/test/MC/AArch64/SVE/sqincw-diagnostics.s b/test/MC/AArch64/SVE/sqincw-diagnostics.s
index 53b726fdacde..b0a380e16016 100644
--- a/test/MC/AArch64/SVE/sqincw-diagnostics.s
+++ b/test/MC/AArch64/SVE/sqincw-diagnostics.s
@@ -79,3 +79,25 @@ sqincw x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: sqincw x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/z, z7.s
+sqincw z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqincw z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+sqincw z0.s, pow2, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqincw z0.s, pow2, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+sqincw z0.s, pow2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqincw z0.s, pow2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sqincw.s b/test/MC/AArch64/SVE/sqincw.s
index 1528c30a3788..7ff2bc6540a7 100644
--- a/test/MC/AArch64/SVE/sqincw.s
+++ b/test/MC/AArch64/SVE/sqincw.s
@@ -294,3 +294,43 @@ sqincw x0, #28
// CHECK-ENCODING: [0x80,0xf3,0xb0,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 f3 b0 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqincw z0.s
+// CHECK-INST: sqincw z0.s
+// CHECK-ENCODING: [0xe0,0xc3,0xa0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c3 a0 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqincw z0.s, pow2, mul #16
+// CHECK-INST: sqincw z0.s, pow2, mul #16
+// CHECK-ENCODING: [0x00,0xc0,0xaf,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c0 af 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sqincw z0.s, pow2
+// CHECK-INST: sqincw z0.s, pow2
+// CHECK-ENCODING: [0x00,0xc0,0xa0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c0 a0 04 <unknown>
diff --git a/test/MC/AArch64/SVE/sqsub-diagnostics.s b/test/MC/AArch64/SVE/sqsub-diagnostics.s
index 8155c366c0ab..15633050ebfd 100644
--- a/test/MC/AArch64/SVE/sqsub-diagnostics.s
+++ b/test/MC/AArch64/SVE/sqsub-diagnostics.s
@@ -86,3 +86,25 @@ sqsub z0.d, z0.d, #65536
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 255] or a multiple of 256 in range [256, 65280]
// CHECK-NEXT: sqsub z0.d, z0.d, #65536
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+sqsub z31.d, z31.d, #65280
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqsub z31.d, z31.d, #65280
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+sqsub z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqsub z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+sqsub z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqsub z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sqsub.s b/test/MC/AArch64/SVE/sqsub.s
index ad41b5ae41b6..c91d5fe3d124 100644
--- a/test/MC/AArch64/SVE/sqsub.s
+++ b/test/MC/AArch64/SVE/sqsub.s
@@ -115,3 +115,19 @@ sqsub z31.d, z31.d, #65280
// CHECK-ENCODING: [0xff,0xff,0xe6,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff ff e6 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+sqsub z31.d, z31.d, #65280
+// CHECK-INST: sqsub z31.d, z31.d, #65280
+// CHECK-ENCODING: [0xff,0xff,0xe6,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff ff e6 25 <unknown>
diff --git a/test/MC/AArch64/SVE/st1b-diagnostics.s b/test/MC/AArch64/SVE/st1b-diagnostics.s
index 99bbc55b072c..25f153c78ce9 100644
--- a/test/MC/AArch64/SVE/st1b-diagnostics.s
+++ b/test/MC/AArch64/SVE/st1b-diagnostics.s
@@ -175,3 +175,19 @@ st1b z0.d, p0, [z0.d, #32]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31].
// CHECK-NEXT: st1b z0.d, p0, [z0.d, #32]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+st1b { z31.d }, p7, [z31.d, #31]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st1b { z31.d }, p7, [z31.d, #31]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+st1b { z31.d }, p7, [z31.d, #31]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st1b { z31.d }, p7, [z31.d, #31]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st1d-diagnostics.s b/test/MC/AArch64/SVE/st1d-diagnostics.s
index f510633983f8..82f52ce56784 100644
--- a/test/MC/AArch64/SVE/st1d-diagnostics.s
+++ b/test/MC/AArch64/SVE/st1d-diagnostics.s
@@ -131,3 +131,19 @@ st1d z0.d, p0, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 248].
// CHECK-NEXT: st1d z0.d, p0, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+st1d { z31.d }, p7, [z31.d, #248]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st1d { z31.d }, p7, [z31.d, #248]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+st1d { z31.d }, p7, [z31.d, #248]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st1d { z31.d }, p7, [z31.d, #248]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st1h-diagnostics.s b/test/MC/AArch64/SVE/st1h-diagnostics.s
index b8b8ecdebd8b..e58cd52c7858 100644
--- a/test/MC/AArch64/SVE/st1h-diagnostics.s
+++ b/test/MC/AArch64/SVE/st1h-diagnostics.s
@@ -189,3 +189,19 @@ st1h z0.d, p0, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 2 in range [0, 62].
// CHECK-NEXT: st1h z0.d, p0, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+st1h { z31.d }, p7, [z31.d, #62]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st1h { z31.d }, p7, [z31.d, #62]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+st1h { z31.d }, p7, [z31.d, #62]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st1h { z31.d }, p7, [z31.d, #62]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st1w-diagnostics.s b/test/MC/AArch64/SVE/st1w-diagnostics.s
index 2222b0677566..a5c0f0517ecf 100644
--- a/test/MC/AArch64/SVE/st1w-diagnostics.s
+++ b/test/MC/AArch64/SVE/st1w-diagnostics.s
@@ -178,3 +178,19 @@ st1w z0.d, p0, [z0.d, #3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [0, 124].
// CHECK-NEXT: st1w z0.d, p0, [z0.d, #3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+st1w { z31.d }, p7, [z31.d, #124]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st1w { z31.d }, p7, [z31.d, #124]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+st1w { z31.d }, p7, [z31.d, #124]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st1w { z31.d }, p7, [z31.d, #124]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st2b-diagnostics.s b/test/MC/AArch64/SVE/st2b-diagnostics.s
index 07fdd64fe15b..ac30bb47192e 100644
--- a/test/MC/AArch64/SVE/st2b-diagnostics.s
+++ b/test/MC/AArch64/SVE/st2b-diagnostics.s
@@ -89,3 +89,19 @@ st2b { v0.2d, v1.2d }, p0, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: st2b { v0.2d, v1.2d }, p0, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.b, p5/z, z28.b
+st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st2d-diagnostics.s b/test/MC/AArch64/SVE/st2d-diagnostics.s
index e5e09424484f..301849f2ee04 100644
--- a/test/MC/AArch64/SVE/st2d-diagnostics.s
+++ b/test/MC/AArch64/SVE/st2d-diagnostics.s
@@ -94,3 +94,19 @@ st2d { v0.2d, v1.2d }, p0, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: st2d { v0.2d, v1.2d }, p0, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st2h-diagnostics.s b/test/MC/AArch64/SVE/st2h-diagnostics.s
index 0854ada487e7..b22ae30c09ce 100644
--- a/test/MC/AArch64/SVE/st2h-diagnostics.s
+++ b/test/MC/AArch64/SVE/st2h-diagnostics.s
@@ -94,3 +94,19 @@ st2h { v0.2d, v1.2d }, p0, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: st2h { v0.2d, v1.2d }, p0, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.h, p5/z, z28.h
+st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st2w-diagnostics.s b/test/MC/AArch64/SVE/st2w-diagnostics.s
index 61ca5ec964fc..1e40d0a054cb 100644
--- a/test/MC/AArch64/SVE/st2w-diagnostics.s
+++ b/test/MC/AArch64/SVE/st2w-diagnostics.s
@@ -94,3 +94,19 @@ st2w { v0.2d, v1.2d }, p0, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: st2w { v0.2d, v1.2d }, p0, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.s, p5/z, z28.s
+st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st3b-diagnostics.s b/test/MC/AArch64/SVE/st3b-diagnostics.s
index 65201355482d..43016c0c314a 100644
--- a/test/MC/AArch64/SVE/st3b-diagnostics.s
+++ b/test/MC/AArch64/SVE/st3b-diagnostics.s
@@ -89,3 +89,19 @@ st3b { v0.16b, v1.16b, v2.16b }, p0, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: st3b { v0.16b, v1.16b, v2.16b }, p0, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.b, p5/z, z28.b
+st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st3d-diagnostics.s b/test/MC/AArch64/SVE/st3d-diagnostics.s
index c9e8591e446c..ad089b2ceffd 100644
--- a/test/MC/AArch64/SVE/st3d-diagnostics.s
+++ b/test/MC/AArch64/SVE/st3d-diagnostics.s
@@ -94,3 +94,19 @@ st3d { v0.2d, v1.2d, v2.2d }, p0, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: st3d { v0.2d, v1.2d, v2.2d }, p0, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st3h-diagnostics.s b/test/MC/AArch64/SVE/st3h-diagnostics.s
index f6874294cfcf..aba5215defeb 100644
--- a/test/MC/AArch64/SVE/st3h-diagnostics.s
+++ b/test/MC/AArch64/SVE/st3h-diagnostics.s
@@ -94,3 +94,19 @@ st3h { v0.8h, v1.8h, v2.8h }, p0, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: st3h { v0.8h, v1.8h, v2.8h }, p0, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.h, p5/z, z28.h
+st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st3w-diagnostics.s b/test/MC/AArch64/SVE/st3w-diagnostics.s
index 9c2bb001049f..2e89901b6e1e 100644
--- a/test/MC/AArch64/SVE/st3w-diagnostics.s
+++ b/test/MC/AArch64/SVE/st3w-diagnostics.s
@@ -94,3 +94,19 @@ st3w { v0.4s, v1.4s, v2.4s }, p0, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: st3w { v0.4s, v1.4s, v2.4s }, p0, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.s, p5/z, z28.s
+st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st4b-diagnostics.s b/test/MC/AArch64/SVE/st4b-diagnostics.s
index c316c81f03cc..dcd31b78a286 100644
--- a/test/MC/AArch64/SVE/st4b-diagnostics.s
+++ b/test/MC/AArch64/SVE/st4b-diagnostics.s
@@ -89,3 +89,19 @@ st4b { v0.16b, v1.16b, v2.16b }, p0, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: st4b { v0.16b, v1.16b, v2.16b }, p0, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.b, p5/z, z28.b
+st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st4d-diagnostics.s b/test/MC/AArch64/SVE/st4d-diagnostics.s
index 87f4225a87c4..e63c0c553b54 100644
--- a/test/MC/AArch64/SVE/st4d-diagnostics.s
+++ b/test/MC/AArch64/SVE/st4d-diagnostics.s
@@ -95,3 +95,19 @@ st4d { v0.2d, v1.2d, v2.2d }, p0, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: st4d { v0.2d, v1.2d, v2.2d }, p0, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.d, p5/z, z28.d
+st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st4h-diagnostics.s b/test/MC/AArch64/SVE/st4h-diagnostics.s
index e3cecfef0212..8d80eb2248c8 100644
--- a/test/MC/AArch64/SVE/st4h-diagnostics.s
+++ b/test/MC/AArch64/SVE/st4h-diagnostics.s
@@ -94,3 +94,19 @@ st4h { v0.8h, v1.8h, v2.8h }, p0, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: st4h { v0.8h, v1.8h, v2.8h }, p0, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.h, p5/z, z28.h
+st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/st4w-diagnostics.s b/test/MC/AArch64/SVE/st4w-diagnostics.s
index 46bb2d7d7856..8c3816538b79 100644
--- a/test/MC/AArch64/SVE/st4w-diagnostics.s
+++ b/test/MC/AArch64/SVE/st4w-diagnostics.s
@@ -94,3 +94,19 @@ st4w { v0.4s, v1.4s, v2.4s }, p0, [x0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: st4w { v0.4s, v1.4s, v2.4s }, p0, [x0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z21.s, p5/z, z28.s
+st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z21, z28
+st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/stnt1b-diagnostics.s b/test/MC/AArch64/SVE/stnt1b-diagnostics.s
index 8f5a0b222dab..4b7806e5b0e8 100644
--- a/test/MC/AArch64/SVE/stnt1b-diagnostics.s
+++ b/test/MC/AArch64/SVE/stnt1b-diagnostics.s
@@ -64,3 +64,19 @@ stnt1b { v0.2d }, p0, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: stnt1b { v0.2d }, p0, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.b, p0/z, z7.b
+stnt1b { z0.b }, p0, [x0, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1b { z0.b }, p0, [x0, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+stnt1b { z0.b }, p0, [x0, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1b { z0.b }, p0, [x0, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/stnt1d-diagnostics.s b/test/MC/AArch64/SVE/stnt1d-diagnostics.s
index 9761ad98cc27..a5cbfe1a9343 100644
--- a/test/MC/AArch64/SVE/stnt1d-diagnostics.s
+++ b/test/MC/AArch64/SVE/stnt1d-diagnostics.s
@@ -64,3 +64,19 @@ stnt1d { v0.2d }, p0, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: stnt1d { v0.2d }, p0, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+stnt1d { z0.d }, p0, [x0, x0, lsl #3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1d { z0.d }, p0, [x0, x0, lsl #3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+stnt1d { z0.d }, p0, [x0, x0, lsl #3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1d { z0.d }, p0, [x0, x0, lsl #3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/stnt1h-diagnostics.s b/test/MC/AArch64/SVE/stnt1h-diagnostics.s
index cf009b74e493..22fe5cb7dee3 100644
--- a/test/MC/AArch64/SVE/stnt1h-diagnostics.s
+++ b/test/MC/AArch64/SVE/stnt1h-diagnostics.s
@@ -64,3 +64,19 @@ stnt1h { v0.2d }, p0, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: stnt1h { v0.2d }, p0, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.h, p0/z, z7.h
+stnt1h { z0.h }, p0, [x0, x0, lsl #1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1h { z0.h }, p0, [x0, x0, lsl #1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+stnt1h { z0.h }, p0, [x0, x0, lsl #1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1h { z0.h }, p0, [x0, x0, lsl #1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/stnt1w-diagnostics.s b/test/MC/AArch64/SVE/stnt1w-diagnostics.s
index eb38fcac1b4b..339ea25c4488 100644
--- a/test/MC/AArch64/SVE/stnt1w-diagnostics.s
+++ b/test/MC/AArch64/SVE/stnt1w-diagnostics.s
@@ -64,3 +64,19 @@ stnt1w { v0.2d }, p0, [x1, #1, MUL VL]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
// CHECK-NEXT: stnt1w { v0.2d }, p0, [x1, #1, MUL VL]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/z, z7.s
+stnt1w { z0.s }, p0, [x0, x0, lsl #2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1w { z0.s }, p0, [x0, x0, lsl #2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+stnt1w { z0.s }, p0, [x0, x0, lsl #2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1w { z0.s }, p0, [x0, x0, lsl #2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sub-diagnostics.s b/test/MC/AArch64/SVE/sub-diagnostics.s
index 4ace1ce7dabb..2bd933735bbd 100644
--- a/test/MC/AArch64/SVE/sub-diagnostics.s
+++ b/test/MC/AArch64/SVE/sub-diagnostics.s
@@ -144,3 +144,25 @@ sub z0.d, z0.d, #65536
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 255] or a multiple of 256 in range [256, 65280]
// CHECK-NEXT: sub z0.d, z0.d, #65536
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+sub z31.d, z31.d, #65280
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sub z31.d, z31.d, #65280
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.s, p0/z, z6.s
+sub z31.s, z31.s, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sub z31.s, z31.s, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+sub z31.s, z31.s, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sub z31.s, z31.s, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sub.s b/test/MC/AArch64/SVE/sub.s
index dcbef0bf3e5e..8e74cf8cf3f2 100644
--- a/test/MC/AArch64/SVE/sub.s
+++ b/test/MC/AArch64/SVE/sub.s
@@ -286,3 +286,43 @@ sub z31.d, z31.d, #65280
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff ff e1 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z23.b, p3/z, z30.b
+// CHECK-INST: movprfx z23.b, p3/z, z30.b
+// CHECK-ENCODING: [0xd7,0x2f,0x10,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: d7 2f 10 04 <unknown>
+
+sub z23.b, p3/m, z23.b, z13.b
+// CHECK-INST: sub z23.b, p3/m, z23.b, z13.b
+// CHECK-ENCODING: [0xb7,0x0d,0x01,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: b7 0d 01 04 <unknown>
+
+movprfx z23, z30
+// CHECK-INST: movprfx z23, z30
+// CHECK-ENCODING: [0xd7,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: d7 bf 20 04 <unknown>
+
+sub z23.b, p3/m, z23.b, z13.b
+// CHECK-INST: sub z23.b, p3/m, z23.b, z13.b
+// CHECK-ENCODING: [0xb7,0x0d,0x01,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: b7 0d 01 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+sub z31.d, z31.d, #65280
+// CHECK-INST: sub z31.d, z31.d, #65280
+// CHECK-ENCODING: [0xff,0xff,0xe1,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff ff e1 25 <unknown>
diff --git a/test/MC/AArch64/SVE/subr-diagnostics.s b/test/MC/AArch64/SVE/subr-diagnostics.s
index 847bef6dfa7d..cf259ce17ddf 100644
--- a/test/MC/AArch64/SVE/subr-diagnostics.s
+++ b/test/MC/AArch64/SVE/subr-diagnostics.s
@@ -138,3 +138,13 @@ subr z0.d, z0.d, #65536
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 255] or a multiple of 256 in range [256, 65280]
// CHECK-NEXT: subr z0.d, z0.d, #65536
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+subr z31.d, z31.d, #65280
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: subr z31.d, z31.d, #65280
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/subr.s b/test/MC/AArch64/SVE/subr.s
index 595d275a732b..ec6fa65c9621 100644
--- a/test/MC/AArch64/SVE/subr.s
+++ b/test/MC/AArch64/SVE/subr.s
@@ -115,3 +115,43 @@ subr z31.d, z31.d, #65280
// CHECK-ENCODING: [0xff,0xff,0xe3,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff ff e3 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z5.d, p0/z, z7.d
+// CHECK-INST: movprfx z5.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe5,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 20 d0 04 <unknown>
+
+subr z5.d, p0/m, z5.d, z0.d
+// CHECK-INST: subr z5.d, p0/m, z5.d, z0.d
+// CHECK-ENCODING: [0x05,0x00,0xc3,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 00 c3 04 <unknown>
+
+movprfx z5, z7
+// CHECK-INST: movprfx z5, z7
+// CHECK-ENCODING: [0xe5,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 bc 20 04 <unknown>
+
+subr z5.d, p0/m, z5.d, z0.d
+// CHECK-INST: subr z5.d, p0/m, z5.d, z0.d
+// CHECK-ENCODING: [0x05,0x00,0xc3,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 00 c3 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+subr z31.d, z31.d, #65280
+// CHECK-INST: subr z31.d, z31.d, #65280
+// CHECK-ENCODING: [0xff,0xff,0xe3,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff ff e3 25 <unknown>
diff --git a/test/MC/AArch64/SVE/sunpkhi-diagnostics.s b/test/MC/AArch64/SVE/sunpkhi-diagnostics.s
index 768ef1236bf4..cbbaee25b9ca 100644
--- a/test/MC/AArch64/SVE/sunpkhi-diagnostics.s
+++ b/test/MC/AArch64/SVE/sunpkhi-diagnostics.s
@@ -18,3 +18,19 @@ sunpkhi z0.d, z0.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: sunpkhi z0.d, z0.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+sunpkhi z31.d, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sunpkhi z31.d, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+sunpkhi z31.d, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sunpkhi z31.d, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sunpklo-diagnostics.s b/test/MC/AArch64/SVE/sunpklo-diagnostics.s
index 92de4278e23c..14a5a42e5121 100644
--- a/test/MC/AArch64/SVE/sunpklo-diagnostics.s
+++ b/test/MC/AArch64/SVE/sunpklo-diagnostics.s
@@ -18,3 +18,19 @@ sunpklo z0.d, z0.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: sunpklo z0.d, z0.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+sunpklo z31.d, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sunpklo z31.d, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+sunpklo z31.d, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sunpklo z31.d, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/sxtb.s b/test/MC/AArch64/SVE/sxtb.s
index fe8a699e60e1..b9fa8b62c71c 100644
--- a/test/MC/AArch64/SVE/sxtb.s
+++ b/test/MC/AArch64/SVE/sxtb.s
@@ -42,3 +42,31 @@ sxtb z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xd0,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf d0 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+sxtb z4.d, p7/m, z31.d
+// CHECK-INST: sxtb z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d0 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+sxtb z4.d, p7/m, z31.d
+// CHECK-INST: sxtb z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d0 04 <unknown>
diff --git a/test/MC/AArch64/SVE/sxth.s b/test/MC/AArch64/SVE/sxth.s
index 138bfa43c6c9..e422de5dd6af 100644
--- a/test/MC/AArch64/SVE/sxth.s
+++ b/test/MC/AArch64/SVE/sxth.s
@@ -30,3 +30,31 @@ sxth z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xd2,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf d2 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+sxth z4.d, p7/m, z31.d
+// CHECK-INST: sxth z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd2,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d2 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+sxth z4.d, p7/m, z31.d
+// CHECK-INST: sxth z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd2,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d2 04 <unknown>
diff --git a/test/MC/AArch64/SVE/sxtw.s b/test/MC/AArch64/SVE/sxtw.s
index ef7b4e9bafba..96c66cb30472 100644
--- a/test/MC/AArch64/SVE/sxtw.s
+++ b/test/MC/AArch64/SVE/sxtw.s
@@ -18,3 +18,31 @@ sxtw z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xd4,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf d4 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+sxtw z4.d, p7/m, z31.d
+// CHECK-INST: sxtw z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd4,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d4 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+sxtw z4.d, p7/m, z31.d
+// CHECK-INST: sxtw z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd4,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d4 04 <unknown>
diff --git a/test/MC/AArch64/SVE/tbl-diagnostics.s b/test/MC/AArch64/SVE/tbl-diagnostics.s
index 8cb8575bbe81..7314fb49d1b2 100644
--- a/test/MC/AArch64/SVE/tbl-diagnostics.s
+++ b/test/MC/AArch64/SVE/tbl-diagnostics.s
@@ -9,3 +9,19 @@ tbl { z0.h }, z0.h, z0.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
// CHECK-NEXT: tbl { z0.h }, z0.h, z0.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+tbl z31.d, { z31.d }, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: tbl z31.d, { z31.d }, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+tbl z31.d, { z31.d }, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: tbl z31.d, { z31.d }, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/trn1-diagnostics.s b/test/MC/AArch64/SVE/trn1-diagnostics.s
index 9e318c1eacd6..7bd0f19fa5c6 100644
--- a/test/MC/AArch64/SVE/trn1-diagnostics.s
+++ b/test/MC/AArch64/SVE/trn1-diagnostics.s
@@ -41,3 +41,19 @@ trn1 p1.s, p2.s, z3.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: trn1 p1.s, p2.s, z3.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+trn1 z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: trn1 z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+trn1 z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: trn1 z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/trn2-diagnostics.s b/test/MC/AArch64/SVE/trn2-diagnostics.s
index 48b37bf3a181..4c1e29ed2dae 100644
--- a/test/MC/AArch64/SVE/trn2-diagnostics.s
+++ b/test/MC/AArch64/SVE/trn2-diagnostics.s
@@ -41,3 +41,19 @@ trn2 p1.s, p2.s, z3.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: trn2 p1.s, p2.s, z3.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+trn2 z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: trn2 z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+trn2 z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: trn2 z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uabd.s b/test/MC/AArch64/SVE/uabd.s
index ae06eb8782f8..0f1a4b382518 100644
--- a/test/MC/AArch64/SVE/uabd.s
+++ b/test/MC/AArch64/SVE/uabd.s
@@ -30,3 +30,31 @@ uabd z31.d, p7/m, z31.d, z31.d
// CHECK-ENCODING: [0xff,0x1f,0xcd,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff 1f cd 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+uabd z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: uabd z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xcd,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f cd 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+uabd z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: uabd z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xcd,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f cd 04 <unknown>
diff --git a/test/MC/AArch64/SVE/uaddv-diagnostics.s b/test/MC/AArch64/SVE/uaddv-diagnostics.s
index 11ec959913b5..55ffc6075822 100644
--- a/test/MC/AArch64/SVE/uaddv-diagnostics.s
+++ b/test/MC/AArch64/SVE/uaddv-diagnostics.s
@@ -26,4 +26,19 @@ uaddv s0, p7, z31.s
uaddv d0, p8, z31.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: uaddv d0, p8, z31.b
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+uaddv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uaddv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+uaddv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uaddv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/ucvtf.s b/test/MC/AArch64/SVE/ucvtf.s
index 6848448a6821..9e5f4f792b14 100644
--- a/test/MC/AArch64/SVE/ucvtf.s
+++ b/test/MC/AArch64/SVE/ucvtf.s
@@ -48,3 +48,31 @@ ucvtf z0.d, p0/m, z0.d
// CHECK-ENCODING: [0x00,0xa0,0xd7,0x65]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 00 a0 d7 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z5.d, p0/z, z7.d
+// CHECK-INST: movprfx z5.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe5,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 20 d0 04 <unknown>
+
+ucvtf z5.d, p0/m, z0.d
+// CHECK-INST: ucvtf z5.d, p0/m, z0.d
+// CHECK-ENCODING: [0x05,0xa0,0xd7,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 a0 d7 65 <unknown>
+
+movprfx z5, z7
+// CHECK-INST: movprfx z5, z7
+// CHECK-ENCODING: [0xe5,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 bc 20 04 <unknown>
+
+ucvtf z5.d, p0/m, z0.d
+// CHECK-INST: ucvtf z5.d, p0/m, z0.d
+// CHECK-ENCODING: [0x05,0xa0,0xd7,0x65]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 05 a0 d7 65 <unknown>
diff --git a/test/MC/AArch64/SVE/udiv.s b/test/MC/AArch64/SVE/udiv.s
index 7ad8a73cb097..ca528ee6d074 100644
--- a/test/MC/AArch64/SVE/udiv.s
+++ b/test/MC/AArch64/SVE/udiv.s
@@ -18,3 +18,31 @@ udiv z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x1f,0xd5,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 1f d5 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+udiv z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: udiv z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd5,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d5 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+udiv z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: udiv z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd5,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d5 04 <unknown>
diff --git a/test/MC/AArch64/SVE/udivr.s b/test/MC/AArch64/SVE/udivr.s
index f09b7f49b8c1..45bc391d0ef5 100644
--- a/test/MC/AArch64/SVE/udivr.s
+++ b/test/MC/AArch64/SVE/udivr.s
@@ -18,3 +18,31 @@ udivr z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x1f,0xd7,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 1f d7 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+udivr z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: udivr z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd7,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d7 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+udivr z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: udivr z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd7,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d7 04 <unknown>
diff --git a/test/MC/AArch64/SVE/udot-diagnostics.s b/test/MC/AArch64/SVE/udot-diagnostics.s
index 08abd77592ec..ecdb036c2706 100644
--- a/test/MC/AArch64/SVE/udot-diagnostics.s
+++ b/test/MC/AArch64/SVE/udot-diagnostics.s
@@ -56,3 +56,19 @@ udot z0.d, z1.h, z15.h[2]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
// CHECK-NEXT: udot z0.d, z1.h, z15.h[2]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+udot z0.d, z1.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: udot z0.d, z1.h, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+udot z0.d, z1.h, z15.h[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: udot z0.d, z1.h, z15.h[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/udot.s b/test/MC/AArch64/SVE/udot.s
index 0c3a392093c8..c9b441172de2 100644
--- a/test/MC/AArch64/SVE/udot.s
+++ b/test/MC/AArch64/SVE/udot.s
@@ -30,3 +30,31 @@ udot z0.d, z1.h, z15.h[1]
// CHECK-ENCODING: [0x20,0x04,0xff,0x44]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 04 ff 44 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+udot z0.d, z1.h, z31.h
+// CHECK-INST: udot z0.d, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x04,0xdf,0x44]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 04 df 44 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+udot z0.d, z1.h, z15.h[1]
+// CHECK-INST: udot z0.d, z1.h, z15.h[1]
+// CHECK-ENCODING: [0x20,0x04,0xff,0x44]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 20 04 ff 44 <unknown>
diff --git a/test/MC/AArch64/SVE/umax-diagnostics.s b/test/MC/AArch64/SVE/umax-diagnostics.s
index f1e9e3916fba..628b8b2a7e5d 100644
--- a/test/MC/AArch64/SVE/umax-diagnostics.s
+++ b/test/MC/AArch64/SVE/umax-diagnostics.s
@@ -14,3 +14,13 @@ umax z0.b, p8/m, z0.b, z0.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: umax z0.b, p8/m, z0.b, z0.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.b, p0/z, z6.b
+umax z31.b, z31.b, #255
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: umax z31.b, z31.b, #255
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/umax.s b/test/MC/AArch64/SVE/umax.s
index ac774ec96b1f..bd89da13241b 100644
--- a/test/MC/AArch64/SVE/umax.s
+++ b/test/MC/AArch64/SVE/umax.s
@@ -78,3 +78,43 @@ umax z31.d, p7/m, z31.d, z31.d
// CHECK-ENCODING: [0xff,0x1f,0xc9,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff 1f c9 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+umax z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: umax z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xc9,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f c9 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+umax z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: umax z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xc9,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f c9 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+umax z31.b, z31.b, #255
+// CHECK-INST: umax z31.b, z31.b, #255
+// CHECK-ENCODING: [0xff,0xdf,0x29,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff df 29 25 <unknown>
diff --git a/test/MC/AArch64/SVE/umaxv-diagnostics.s b/test/MC/AArch64/SVE/umaxv-diagnostics.s
index cfcabb9a8b2c..36512f7568db 100644
--- a/test/MC/AArch64/SVE/umaxv-diagnostics.s
+++ b/test/MC/AArch64/SVE/umaxv-diagnostics.s
@@ -31,4 +31,19 @@ umaxv v0.2d, p7, z31.d
umaxv h0, p8, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: umaxv h0, p8, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+umaxv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: umaxv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+umaxv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: umaxv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/umin-diagnostics.s b/test/MC/AArch64/SVE/umin-diagnostics.s
index 03c708b14620..ce60edf4557e 100644
--- a/test/MC/AArch64/SVE/umin-diagnostics.s
+++ b/test/MC/AArch64/SVE/umin-diagnostics.s
@@ -14,3 +14,13 @@ umin z0.b, p8/m, z0.b, z0.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: umin z0.b, p8/m, z0.b, z0.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.b, p0/z, z6.b
+umin z31.b, z31.b, #255
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: umin z31.b, z31.b, #255
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/umin.s b/test/MC/AArch64/SVE/umin.s
index bd142c4e2c17..9aac3092df21 100644
--- a/test/MC/AArch64/SVE/umin.s
+++ b/test/MC/AArch64/SVE/umin.s
@@ -78,3 +78,43 @@ umin z31.d, p7/m, z31.d, z31.d
// CHECK-ENCODING: [0xff,0x1f,0xcb,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff 1f cb 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+umin z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: umin z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xcb,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f cb 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+umin z4.d, p7/m, z4.d, z31.d
+// CHECK-INST: umin z4.d, p7/m, z4.d, z31.d
+// CHECK-ENCODING: [0xe4,0x1f,0xcb,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 1f cb 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+umin z31.b, z31.b, #255
+// CHECK-INST: umin z31.b, z31.b, #255
+// CHECK-ENCODING: [0xff,0xdf,0x2b,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff df 2b 25 <unknown>
diff --git a/test/MC/AArch64/SVE/uminv-diagnostics.s b/test/MC/AArch64/SVE/uminv-diagnostics.s
index 9208dc9a11f4..f6898352a93a 100644
--- a/test/MC/AArch64/SVE/uminv-diagnostics.s
+++ b/test/MC/AArch64/SVE/uminv-diagnostics.s
@@ -31,4 +31,19 @@ uminv v0.2d, p7, z31.d
uminv h0, p8, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
// CHECK-NEXT: uminv h0, p8, z31.h
-// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p7/z, z6.d
+uminv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uminv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+uminv d0, p7, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uminv d0, p7, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/umulh.s b/test/MC/AArch64/SVE/umulh.s
index 523428335edd..a5d6eb58603a 100644
--- a/test/MC/AArch64/SVE/umulh.s
+++ b/test/MC/AArch64/SVE/umulh.s
@@ -30,3 +30,31 @@ umulh z0.d, p7/m, z0.d, z31.d
// CHECK-ENCODING: [0xe0,0x1f,0xd3,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 1f d3 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0.d, p7/z, z7.d
+// CHECK-INST: movprfx z0.d, p7/z, z7.d
+// CHECK-ENCODING: [0xe0,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 3c d0 04 <unknown>
+
+umulh z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: umulh z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd3,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d3 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+umulh z0.d, p7/m, z0.d, z31.d
+// CHECK-INST: umulh z0.d, p7/m, z0.d, z31.d
+// CHECK-ENCODING: [0xe0,0x1f,0xd3,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 1f d3 04 <unknown>
diff --git a/test/MC/AArch64/SVE/uqadd-diagnostics.s b/test/MC/AArch64/SVE/uqadd-diagnostics.s
index 1a6179a14ca5..7dcdba98ccc4 100644
--- a/test/MC/AArch64/SVE/uqadd-diagnostics.s
+++ b/test/MC/AArch64/SVE/uqadd-diagnostics.s
@@ -86,3 +86,25 @@ uqadd z0.d, z0.d, #65536
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 255] or a multiple of 256 in range [256, 65280]
// CHECK-NEXT: uqadd z0.d, z0.d, #65536
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+uqadd z31.d, z31.d, #65280
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqadd z31.d, z31.d, #65280
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+uqadd z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uqadd z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+uqadd z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uqadd z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uqadd.s b/test/MC/AArch64/SVE/uqadd.s
index fc152297c39c..23e213ac85f9 100644
--- a/test/MC/AArch64/SVE/uqadd.s
+++ b/test/MC/AArch64/SVE/uqadd.s
@@ -115,3 +115,19 @@ uqadd z31.d, z31.d, #65280
// CHECK-ENCODING: [0xff,0xff,0xe5,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff ff e5 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+uqadd z31.d, z31.d, #65280
+// CHECK-INST: uqadd z31.d, z31.d, #65280
+// CHECK-ENCODING: [0xff,0xff,0xe5,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff ff e5 25 <unknown>
diff --git a/test/MC/AArch64/SVE/uqdecd-diagnostics.s b/test/MC/AArch64/SVE/uqdecd-diagnostics.s
index 8f3ce3e6e4fd..fcf40770cb36 100644
--- a/test/MC/AArch64/SVE/uqdecd-diagnostics.s
+++ b/test/MC/AArch64/SVE/uqdecd-diagnostics.s
@@ -79,3 +79,25 @@ uqdecd x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: uqdecd x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+uqdecd z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqdecd z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+uqdecd z0.d, pow2, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqdecd z0.d, pow2, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+uqdecd z0.d, pow2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqdecd z0.d, pow2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uqdecd.s b/test/MC/AArch64/SVE/uqdecd.s
index 65183428ed5c..f47d0d412990 100644
--- a/test/MC/AArch64/SVE/uqdecd.s
+++ b/test/MC/AArch64/SVE/uqdecd.s
@@ -294,3 +294,43 @@ uqdecd x0, #28
// CHECK-ENCODING: [0x80,0xff,0xf0,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 ff f0 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqdecd z0.d
+// CHECK-INST: uqdecd z0.d
+// CHECK-ENCODING: [0xe0,0xcf,0xe0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 cf e0 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqdecd z0.d, pow2, mul #16
+// CHECK-INST: uqdecd z0.d, pow2, mul #16
+// CHECK-ENCODING: [0x00,0xcc,0xef,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 cc ef 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqdecd z0.d, pow2
+// CHECK-INST: uqdecd z0.d, pow2
+// CHECK-ENCODING: [0x00,0xcc,0xe0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 cc e0 04 <unknown>
diff --git a/test/MC/AArch64/SVE/uqdech-diagnostics.s b/test/MC/AArch64/SVE/uqdech-diagnostics.s
index 936c486599c4..a4e0a24c9d76 100644
--- a/test/MC/AArch64/SVE/uqdech-diagnostics.s
+++ b/test/MC/AArch64/SVE/uqdech-diagnostics.s
@@ -79,3 +79,25 @@ uqdech x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: uqdech x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.h, p0/z, z7.h
+uqdech z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqdech z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+uqdech z0.h, pow2, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqdech z0.h, pow2, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+uqdech z0.h, pow2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqdech z0.h, pow2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uqdech.s b/test/MC/AArch64/SVE/uqdech.s
index 950409133e0d..9a1ff5256040 100644
--- a/test/MC/AArch64/SVE/uqdech.s
+++ b/test/MC/AArch64/SVE/uqdech.s
@@ -294,3 +294,43 @@ uqdech x0, #28
// CHECK-ENCODING: [0x80,0xff,0x70,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 ff 70 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqdech z0.h
+// CHECK-INST: uqdech z0.h
+// CHECK-ENCODING: [0xe0,0xcf,0x60,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 cf 60 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqdech z0.h, pow2, mul #16
+// CHECK-INST: uqdech z0.h, pow2, mul #16
+// CHECK-ENCODING: [0x00,0xcc,0x6f,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 cc 6f 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqdech z0.h, pow2
+// CHECK-INST: uqdech z0.h, pow2
+// CHECK-ENCODING: [0x00,0xcc,0x60,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 cc 60 04 <unknown>
diff --git a/test/MC/AArch64/SVE/uqdecp-diagnostics.s b/test/MC/AArch64/SVE/uqdecp-diagnostics.s
new file mode 100644
index 000000000000..8fd345b46ee5
--- /dev/null
+++ b/test/MC/AArch64/SVE/uqdecp-diagnostics.s
@@ -0,0 +1,11 @@
+// RUN: not llvm-mc -triple=aarch64-none-linux-gnu -show-encoding -mattr=+sve 2>&1 < %s | FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+uqdecp z0.d, p0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqdecp z0.d, p0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uqdecp.s b/test/MC/AArch64/SVE/uqdecp.s
index 4f9a7af45d55..3c07a7da2462 100644
--- a/test/MC/AArch64/SVE/uqdecp.s
+++ b/test/MC/AArch64/SVE/uqdecp.s
@@ -72,3 +72,19 @@ uqdecp z0.d, p0
// CHECK-ENCODING: [0x00,0x80,0xeb,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 00 80 eb 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqdecp z0.d, p0
+// CHECK-INST: uqdecp z0.d, p0
+// CHECK-ENCODING: [0x00,0x80,0xeb,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 80 eb 25 <unknown>
diff --git a/test/MC/AArch64/SVE/uqdecw-diagnostics.s b/test/MC/AArch64/SVE/uqdecw-diagnostics.s
index b46373915c31..a7a955e64108 100644
--- a/test/MC/AArch64/SVE/uqdecw-diagnostics.s
+++ b/test/MC/AArch64/SVE/uqdecw-diagnostics.s
@@ -79,3 +79,25 @@ uqdecw x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: uqdecw x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/z, z7.s
+uqdecw z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqdecw z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+uqdecw z0.s, pow2, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqdecw z0.s, pow2, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+uqdecw z0.s, pow2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqdecw z0.s, pow2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uqdecw.s b/test/MC/AArch64/SVE/uqdecw.s
index 44e96f31b98a..6ed544d5733f 100644
--- a/test/MC/AArch64/SVE/uqdecw.s
+++ b/test/MC/AArch64/SVE/uqdecw.s
@@ -294,3 +294,43 @@ uqdecw x0, #28
// CHECK-ENCODING: [0x80,0xff,0xb0,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 ff b0 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqdecw z0.s
+// CHECK-INST: uqdecw z0.s
+// CHECK-ENCODING: [0xe0,0xcf,0xa0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 cf a0 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqdecw z0.s, pow2, mul #16
+// CHECK-INST: uqdecw z0.s, pow2, mul #16
+// CHECK-ENCODING: [0x00,0xcc,0xaf,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 cc af 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqdecw z0.s, pow2
+// CHECK-INST: uqdecw z0.s, pow2
+// CHECK-ENCODING: [0x00,0xcc,0xa0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 cc a0 04 <unknown>
diff --git a/test/MC/AArch64/SVE/uqincd-diagnostics.s b/test/MC/AArch64/SVE/uqincd-diagnostics.s
index cb662cfa8f1f..1c6dfa11965f 100644
--- a/test/MC/AArch64/SVE/uqincd-diagnostics.s
+++ b/test/MC/AArch64/SVE/uqincd-diagnostics.s
@@ -79,3 +79,25 @@ uqincd x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: uqincd x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+uqincd z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqincd z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+uqincd z0.d, pow2, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqincd z0.d, pow2, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+uqincd z0.d, pow2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqincd z0.d, pow2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uqincd.s b/test/MC/AArch64/SVE/uqincd.s
index 48890f965db9..d72672ced424 100644
--- a/test/MC/AArch64/SVE/uqincd.s
+++ b/test/MC/AArch64/SVE/uqincd.s
@@ -294,3 +294,43 @@ uqincd x0, #28
// CHECK-ENCODING: [0x80,0xf7,0xf0,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 f7 f0 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqincd z0.d
+// CHECK-INST: uqincd z0.d
+// CHECK-ENCODING: [0xe0,0xc7,0xe0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c7 e0 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqincd z0.d, pow2, mul #16
+// CHECK-INST: uqincd z0.d, pow2, mul #16
+// CHECK-ENCODING: [0x00,0xc4,0xef,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c4 ef 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqincd z0.d, pow2
+// CHECK-INST: uqincd z0.d, pow2
+// CHECK-ENCODING: [0x00,0xc4,0xe0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c4 e0 04 <unknown>
diff --git a/test/MC/AArch64/SVE/uqinch-diagnostics.s b/test/MC/AArch64/SVE/uqinch-diagnostics.s
index 200d79630607..fc694c4e93de 100644
--- a/test/MC/AArch64/SVE/uqinch-diagnostics.s
+++ b/test/MC/AArch64/SVE/uqinch-diagnostics.s
@@ -79,3 +79,25 @@ uqinch x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: uqinch x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.h, p0/z, z7.h
+uqinch z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqinch z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+uqinch z0.h, pow2, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqinch z0.h, pow2, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+uqinch z0.h, pow2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqinch z0.h, pow2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uqinch.s b/test/MC/AArch64/SVE/uqinch.s
index 7a471d41d49a..03c7fb7fe9a8 100644
--- a/test/MC/AArch64/SVE/uqinch.s
+++ b/test/MC/AArch64/SVE/uqinch.s
@@ -296,3 +296,43 @@ uqinch x0, #28
// CHECK-ENCODING: [0x80,0xf7,0x70,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 f7 70 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqinch z0.h
+// CHECK-INST: uqinch z0.h
+// CHECK-ENCODING: [0xe0,0xc7,0x60,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c7 60 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqinch z0.h, pow2, mul #16
+// CHECK-INST: uqinch z0.h, pow2, mul #16
+// CHECK-ENCODING: [0x00,0xc4,0x6f,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c4 6f 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqinch z0.h, pow2
+// CHECK-INST: uqinch z0.h, pow2
+// CHECK-ENCODING: [0x00,0xc4,0x60,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c4 60 04 <unknown>
diff --git a/test/MC/AArch64/SVE/uqincp-diagnostics.s b/test/MC/AArch64/SVE/uqincp-diagnostics.s
index 8a4aa25ac413..d5cf76014df5 100644
--- a/test/MC/AArch64/SVE/uqincp-diagnostics.s
+++ b/test/MC/AArch64/SVE/uqincp-diagnostics.s
@@ -46,3 +46,13 @@ uqincp x0, p0.q
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register
// CHECK-NEXT: uqincp x0, p0.q
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+uqincp z0.d, p0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqincp z0.d, p0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uqincp.s b/test/MC/AArch64/SVE/uqincp.s
index e7c87efa4e06..a4fb8199d994 100644
--- a/test/MC/AArch64/SVE/uqincp.s
+++ b/test/MC/AArch64/SVE/uqincp.s
@@ -72,3 +72,19 @@ uqincp z0.d, p0
// CHECK-ENCODING: [0x00,0x80,0xe9,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 00 80 e9 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqincp z0.d, p0
+// CHECK-INST: uqincp z0.d, p0
+// CHECK-ENCODING: [0x00,0x80,0xe9,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 80 e9 25 <unknown>
diff --git a/test/MC/AArch64/SVE/uqincw-diagnostics.s b/test/MC/AArch64/SVE/uqincw-diagnostics.s
index c5a632bc88cf..0ad3624230b7 100644
--- a/test/MC/AArch64/SVE/uqincw-diagnostics.s
+++ b/test/MC/AArch64/SVE/uqincw-diagnostics.s
@@ -79,3 +79,25 @@ uqincw x0, #32
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate pattern
// CHECK-NEXT: uqincw x0, #32
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/z, z7.s
+uqincw z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqincw z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+uqincw z0.s, pow2, mul #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqincw z0.s, pow2, mul #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+uqincw z0.s, pow2
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqincw z0.s, pow2
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uqincw.s b/test/MC/AArch64/SVE/uqincw.s
index 097b26f40afe..1944d69625e5 100644
--- a/test/MC/AArch64/SVE/uqincw.s
+++ b/test/MC/AArch64/SVE/uqincw.s
@@ -294,3 +294,43 @@ uqincw x0, #28
// CHECK-ENCODING: [0x80,0xf7,0xb0,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 80 f7 b0 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqincw z0.s
+// CHECK-INST: uqincw z0.s
+// CHECK-ENCODING: [0xe0,0xc7,0xa0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 c7 a0 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqincw z0.s, pow2, mul #16
+// CHECK-INST: uqincw z0.s, pow2, mul #16
+// CHECK-ENCODING: [0x00,0xc4,0xaf,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c4 af 04 <unknown>
+
+movprfx z0, z7
+// CHECK-INST: movprfx z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uqincw z0.s, pow2
+// CHECK-INST: uqincw z0.s, pow2
+// CHECK-ENCODING: [0x00,0xc4,0xa0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 c4 a0 04 <unknown>
diff --git a/test/MC/AArch64/SVE/uqsub-diagnostics.s b/test/MC/AArch64/SVE/uqsub-diagnostics.s
index 566334ceea8d..5f8129aa773e 100644
--- a/test/MC/AArch64/SVE/uqsub-diagnostics.s
+++ b/test/MC/AArch64/SVE/uqsub-diagnostics.s
@@ -86,3 +86,25 @@ uqsub z0.d, z0.d, #65536
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 255] or a multiple of 256 in range [256, 65280]
// CHECK-NEXT: uqsub z0.d, z0.d, #65536
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+uqsub z31.d, z31.d, #65280
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uqsub z31.d, z31.d, #65280
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+uqsub z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uqsub z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+uqsub z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uqsub z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uqsub.s b/test/MC/AArch64/SVE/uqsub.s
index 4fb90cae9ee7..5bcc10f8b2ea 100644
--- a/test/MC/AArch64/SVE/uqsub.s
+++ b/test/MC/AArch64/SVE/uqsub.s
@@ -115,3 +115,19 @@ uqsub z31.d, z31.d, #65280
// CHECK-ENCODING: [0xff,0xff,0xe7,0x25]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff ff e7 25 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+uqsub z31.d, z31.d, #65280
+// CHECK-INST: uqsub z31.d, z31.d, #65280
+// CHECK-ENCODING: [0xff,0xff,0xe7,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff ff e7 25 <unknown>
diff --git a/test/MC/AArch64/SVE/uunpkhi-diagnostics.s b/test/MC/AArch64/SVE/uunpkhi-diagnostics.s
index bdae37a32ae3..00adfb86fe42 100644
--- a/test/MC/AArch64/SVE/uunpkhi-diagnostics.s
+++ b/test/MC/AArch64/SVE/uunpkhi-diagnostics.s
@@ -18,3 +18,19 @@ uunpkhi z0.d, z0.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: uunpkhi z0.d, z0.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+uunpkhi z31.d, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uunpkhi z31.d, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+uunpkhi z31.d, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uunpkhi z31.d, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uunpklo-diagnostics.s b/test/MC/AArch64/SVE/uunpklo-diagnostics.s
index 00f8a72fbc24..ae4a810fbea7 100644
--- a/test/MC/AArch64/SVE/uunpklo-diagnostics.s
+++ b/test/MC/AArch64/SVE/uunpklo-diagnostics.s
@@ -18,3 +18,19 @@ uunpklo z0.d, z0.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: uunpklo z0.d, z0.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+uunpklo z31.d, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uunpklo z31.d, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+uunpklo z31.d, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uunpklo z31.d, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uxtb.s b/test/MC/AArch64/SVE/uxtb.s
index 5c1e01620e98..81d7dc01e02a 100644
--- a/test/MC/AArch64/SVE/uxtb.s
+++ b/test/MC/AArch64/SVE/uxtb.s
@@ -42,3 +42,31 @@ uxtb z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xd1,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf d1 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+uxtb z4.d, p7/m, z31.d
+// CHECK-INST: uxtb z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd1,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d1 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+uxtb z4.d, p7/m, z31.d
+// CHECK-INST: uxtb z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd1,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d1 04 <unknown>
diff --git a/test/MC/AArch64/SVE/uxth.s b/test/MC/AArch64/SVE/uxth.s
index 9244fa8c960b..10074b409429 100644
--- a/test/MC/AArch64/SVE/uxth.s
+++ b/test/MC/AArch64/SVE/uxth.s
@@ -30,3 +30,31 @@ uxth z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xd3,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf d3 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+uxth z4.d, p7/m, z31.d
+// CHECK-INST: uxth z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd3,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d3 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+uxth z4.d, p7/m, z31.d
+// CHECK-INST: uxth z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd3,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d3 04 <unknown>
diff --git a/test/MC/AArch64/SVE/uxtw.s b/test/MC/AArch64/SVE/uxtw.s
index e2dbdbcb0b3f..c72fedf31de3 100644
--- a/test/MC/AArch64/SVE/uxtw.s
+++ b/test/MC/AArch64/SVE/uxtw.s
@@ -18,3 +18,31 @@ uxtw z31.d, p7/m, z31.d
// CHECK-ENCODING: [0xff,0xbf,0xd5,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: ff bf d5 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+uxtw z4.d, p7/m, z31.d
+// CHECK-INST: uxtw z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd5,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d5 04 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+uxtw z4.d, p7/m, z31.d
+// CHECK-INST: uxtw z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0xd5,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e4 bf d5 04 <unknown>
diff --git a/test/MC/AArch64/SVE/uzp1-diagnostics.s b/test/MC/AArch64/SVE/uzp1-diagnostics.s
index 3842d60a2916..98f0997527c1 100644
--- a/test/MC/AArch64/SVE/uzp1-diagnostics.s
+++ b/test/MC/AArch64/SVE/uzp1-diagnostics.s
@@ -41,3 +41,19 @@ uzp1 p1.s, p2.s, z3.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: uzp1 p1.s, p2.s, z3.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+uzp1 z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uzp1 z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+uzp1 z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uzp1 z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/uzp2-diagnostics.s b/test/MC/AArch64/SVE/uzp2-diagnostics.s
index ecb8b33ccba2..a53cfe4aa87c 100644
--- a/test/MC/AArch64/SVE/uzp2-diagnostics.s
+++ b/test/MC/AArch64/SVE/uzp2-diagnostics.s
@@ -41,3 +41,19 @@ uzp2 p1.s, p2.s, z3.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: uzp2 p1.s, p2.s, z3.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+uzp2 z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uzp2 z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+uzp2 z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uzp2 z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/whilele-diagnostics.s b/test/MC/AArch64/SVE/whilele-diagnostics.s
new file mode 100644
index 000000000000..94e3993645df
--- /dev/null
+++ b/test/MC/AArch64/SVE/whilele-diagnostics.s
@@ -0,0 +1,20 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid scalar registers
+
+whilele p15.b, xzr, sp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilele p15.b, xzr, sp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilele p15.b, xzr, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilele p15.b, xzr, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilele p15.b, w0, x0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilele p15.b, w0, x0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/whilele.s b/test/MC/AArch64/SVE/whilele.s
new file mode 100644
index 000000000000..e8c48a6e926f
--- /dev/null
+++ b/test/MC/AArch64/SVE/whilele.s
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+whilele p15.b, xzr, x0
+// CHECK-INST: whilele p15.b, xzr, x0
+// CHECK-ENCODING: [0xff,0x17,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff 17 20 25 <unknown>
+
+whilele p15.b, x0, xzr
+// CHECK-INST: whilele p15.b, x0, xzr
+// CHECK-ENCODING: [0x1f,0x14,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 14 3f 25 <unknown>
+
+whilele p15.b, wzr, w0
+// CHECK-INST: whilele p15.b, wzr, w0
+// CHECK-ENCODING: [0xff,0x07,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff 07 20 25 <unknown>
+
+whilele p15.b, w0, wzr
+// CHECK-INST: whilele p15.b, w0, wzr
+// CHECK-ENCODING: [0x1f,0x04,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 04 3f 25 <unknown>
+
+whilele p15.h, x0, xzr
+// CHECK-INST: whilele p15.h, x0, xzr
+// CHECK-ENCODING: [0x1f,0x14,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 14 7f 25 <unknown>
+
+whilele p15.h, w0, wzr
+// CHECK-INST: whilele p15.h, w0, wzr
+// CHECK-ENCODING: [0x1f,0x04,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 04 7f 25 <unknown>
+
+whilele p15.s, x0, xzr
+// CHECK-INST: whilele p15.s, x0, xzr
+// CHECK-ENCODING: [0x1f,0x14,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 14 bf 25 <unknown>
+
+whilele p15.s, w0, wzr
+// CHECK-INST: whilele p15.s, w0, wzr
+// CHECK-ENCODING: [0x1f,0x04,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 04 bf 25 <unknown>
+
+whilele p15.d, w0, wzr
+// CHECK-INST: whilele p15.d, w0, wzr
+// CHECK-ENCODING: [0x1f,0x04,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 04 ff 25 <unknown>
+
+whilele p15.d, x0, xzr
+// CHECK-INST: whilele p15.d, x0, xzr
+// CHECK-ENCODING: [0x1f,0x14,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 14 ff 25 <unknown>
diff --git a/test/MC/AArch64/SVE/whilelo-diagnostics.s b/test/MC/AArch64/SVE/whilelo-diagnostics.s
new file mode 100644
index 000000000000..6237a1182d01
--- /dev/null
+++ b/test/MC/AArch64/SVE/whilelo-diagnostics.s
@@ -0,0 +1,20 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid scalar registers
+
+whilelo p15.b, xzr, sp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilelo p15.b, xzr, sp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilelo p15.b, xzr, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilelo p15.b, xzr, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilelo p15.b, w0, x0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilelo p15.b, w0, x0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/whilelo.s b/test/MC/AArch64/SVE/whilelo.s
new file mode 100644
index 000000000000..30fa35dcd2cf
--- /dev/null
+++ b/test/MC/AArch64/SVE/whilelo.s
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+whilelo p15.b, xzr, x0
+// CHECK-INST: whilelo p15.b, xzr, x0
+// CHECK-ENCODING: [0xef,0x1f,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ef 1f 20 25 <unknown>
+
+whilelo p15.b, x0, xzr
+// CHECK-INST: whilelo p15.b, x0, xzr
+// CHECK-ENCODING: [0x0f,0x1c,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 1c 3f 25 <unknown>
+
+whilelo p15.b, wzr, w0
+// CHECK-INST: whilelo p15.b, wzr, w0
+// CHECK-ENCODING: [0xef,0x0f,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ef 0f 20 25 <unknown>
+
+whilelo p15.b, w0, wzr
+// CHECK-INST: whilelo p15.b, w0, wzr
+// CHECK-ENCODING: [0x0f,0x0c,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 0c 3f 25 <unknown>
+
+whilelo p15.h, x0, xzr
+// CHECK-INST: whilelo p15.h, x0, xzr
+// CHECK-ENCODING: [0x0f,0x1c,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 1c 7f 25 <unknown>
+
+whilelo p15.h, w0, wzr
+// CHECK-INST: whilelo p15.h, w0, wzr
+// CHECK-ENCODING: [0x0f,0x0c,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 0c 7f 25 <unknown>
+
+whilelo p15.s, x0, xzr
+// CHECK-INST: whilelo p15.s, x0, xzr
+// CHECK-ENCODING: [0x0f,0x1c,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 1c bf 25 <unknown>
+
+whilelo p15.s, w0, wzr
+// CHECK-INST: whilelo p15.s, w0, wzr
+// CHECK-ENCODING: [0x0f,0x0c,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 0c bf 25 <unknown>
+
+whilelo p15.d, w0, wzr
+// CHECK-INST: whilelo p15.d, w0, wzr
+// CHECK-ENCODING: [0x0f,0x0c,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 0c ff 25 <unknown>
+
+whilelo p15.d, x0, xzr
+// CHECK-INST: whilelo p15.d, x0, xzr
+// CHECK-ENCODING: [0x0f,0x1c,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 1c ff 25 <unknown>
diff --git a/test/MC/AArch64/SVE/whilels-diagnostics.s b/test/MC/AArch64/SVE/whilels-diagnostics.s
new file mode 100644
index 000000000000..6a422900a071
--- /dev/null
+++ b/test/MC/AArch64/SVE/whilels-diagnostics.s
@@ -0,0 +1,20 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid scalar registers
+
+whilels p15.b, xzr, sp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilels p15.b, xzr, sp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilels p15.b, xzr, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilels p15.b, xzr, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilels p15.b, w0, x0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilels p15.b, w0, x0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/whilels.s b/test/MC/AArch64/SVE/whilels.s
new file mode 100644
index 000000000000..3e127d3e1720
--- /dev/null
+++ b/test/MC/AArch64/SVE/whilels.s
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+whilels p15.b, xzr, x0
+// CHECK-INST: whilels p15.b, xzr, x0
+// CHECK-ENCODING: [0xff,0x1f,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff 1f 20 25 <unknown>
+
+whilels p15.b, x0, xzr
+// CHECK-INST: whilels p15.b, x0, xzr
+// CHECK-ENCODING: [0x1f,0x1c,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 1c 3f 25 <unknown>
+
+whilels p15.b, wzr, w0
+// CHECK-INST: whilels p15.b, wzr, w0
+// CHECK-ENCODING: [0xff,0x0f,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff 0f 20 25 <unknown>
+
+whilels p15.b, w0, wzr
+// CHECK-INST: whilels p15.b, w0, wzr
+// CHECK-ENCODING: [0x1f,0x0c,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 0c 3f 25 <unknown>
+
+whilels p15.h, x0, xzr
+// CHECK-INST: whilels p15.h, x0, xzr
+// CHECK-ENCODING: [0x1f,0x1c,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 1c 7f 25 <unknown>
+
+whilels p15.h, w0, wzr
+// CHECK-INST: whilels p15.h, w0, wzr
+// CHECK-ENCODING: [0x1f,0x0c,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 0c 7f 25 <unknown>
+
+whilels p15.s, x0, xzr
+// CHECK-INST: whilels p15.s, x0, xzr
+// CHECK-ENCODING: [0x1f,0x1c,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 1c bf 25 <unknown>
+
+whilels p15.s, w0, wzr
+// CHECK-INST: whilels p15.s, w0, wzr
+// CHECK-ENCODING: [0x1f,0x0c,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 0c bf 25 <unknown>
+
+whilels p15.d, w0, wzr
+// CHECK-INST: whilels p15.d, w0, wzr
+// CHECK-ENCODING: [0x1f,0x0c,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 0c ff 25 <unknown>
+
+whilels p15.d, x0, xzr
+// CHECK-INST: whilels p15.d, x0, xzr
+// CHECK-ENCODING: [0x1f,0x1c,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 1f 1c ff 25 <unknown>
diff --git a/test/MC/AArch64/SVE/whilelt-diagnostics.s b/test/MC/AArch64/SVE/whilelt-diagnostics.s
new file mode 100644
index 000000000000..1c3a8f8f46ec
--- /dev/null
+++ b/test/MC/AArch64/SVE/whilelt-diagnostics.s
@@ -0,0 +1,20 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid scalar registers
+
+whilelt p15.b, xzr, sp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilelt p15.b, xzr, sp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilelt p15.b, xzr, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilelt p15.b, xzr, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilelt p15.b, w0, x0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilelt p15.b, w0, x0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/whilelt.s b/test/MC/AArch64/SVE/whilelt.s
new file mode 100644
index 000000000000..9a0723cdc7c9
--- /dev/null
+++ b/test/MC/AArch64/SVE/whilelt.s
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+whilelt p15.b, xzr, x0
+// CHECK-INST: whilelt p15.b, xzr, x0
+// CHECK-ENCODING: [0xef,0x17,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ef 17 20 25 <unknown>
+
+whilelt p15.b, x0, xzr
+// CHECK-INST: whilelt p15.b, x0, xzr
+// CHECK-ENCODING: [0x0f,0x14,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 14 3f 25 <unknown>
+
+whilelt p15.b, wzr, w0
+// CHECK-INST: whilelt p15.b, wzr, w0
+// CHECK-ENCODING: [0xef,0x07,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ef 07 20 25 <unknown>
+
+whilelt p15.b, w0, wzr
+// CHECK-INST: whilelt p15.b, w0, wzr
+// CHECK-ENCODING: [0x0f,0x04,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 04 3f 25 <unknown>
+
+whilelt p15.h, x0, xzr
+// CHECK-INST: whilelt p15.h, x0, xzr
+// CHECK-ENCODING: [0x0f,0x14,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 14 7f 25 <unknown>
+
+whilelt p15.h, w0, wzr
+// CHECK-INST: whilelt p15.h, w0, wzr
+// CHECK-ENCODING: [0x0f,0x04,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 04 7f 25 <unknown>
+
+whilelt p15.s, x0, xzr
+// CHECK-INST: whilelt p15.s, x0, xzr
+// CHECK-ENCODING: [0x0f,0x14,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 14 bf 25 <unknown>
+
+whilelt p15.s, w0, wzr
+// CHECK-INST: whilelt p15.s, w0, wzr
+// CHECK-ENCODING: [0x0f,0x04,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 04 bf 25 <unknown>
+
+whilelt p15.d, w0, wzr
+// CHECK-INST: whilelt p15.d, w0, wzr
+// CHECK-ENCODING: [0x0f,0x04,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 04 ff 25 <unknown>
+
+whilelt p15.d, x0, xzr
+// CHECK-INST: whilelt p15.d, x0, xzr
+// CHECK-ENCODING: [0x0f,0x14,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 0f 14 ff 25 <unknown>
diff --git a/test/MC/AArch64/SVE/zip1-diagnostics.s b/test/MC/AArch64/SVE/zip1-diagnostics.s
index 17670be5de22..a0652e3880fd 100644
--- a/test/MC/AArch64/SVE/zip1-diagnostics.s
+++ b/test/MC/AArch64/SVE/zip1-diagnostics.s
@@ -41,3 +41,19 @@ zip1 p1.s, p2.s, z3.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: zip1 p1.s, p2.s, z3.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+zip1 z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: zip1 z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+zip1 z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: zip1 z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SVE/zip2-diagnostics.s b/test/MC/AArch64/SVE/zip2-diagnostics.s
index 3a78b9432fd7..7691bafb538d 100644
--- a/test/MC/AArch64/SVE/zip2-diagnostics.s
+++ b/test/MC/AArch64/SVE/zip2-diagnostics.s
@@ -41,3 +41,19 @@ zip2 p1.s, p2.s, z3.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: zip2 p1.s, p2.s, z3.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+zip2 z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: zip2 z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+zip2 z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: zip2 z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/arm64-directive_loh.s b/test/MC/AArch64/arm64-directive_loh.s
index 76d2d7f21861..654dc9e70067 100644
--- a/test/MC/AArch64/arm64-directive_loh.s
+++ b/test/MC/AArch64/arm64-directive_loh.s
@@ -1,5 +1,7 @@
# RUN: not llvm-mc -triple arm64-apple-darwin < %s 2> %t | FileCheck %s
# RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+# RUN: not llvm-mc -triple aarch64-linux-gnu < %s 2>&1 | FileCheck --check-prefix=UNKNOWN %s
+# RUN: not llvm-mc -triple aarch64-win32-gnu < %s 2>&1 | FileCheck --check-prefix=UNKNOWN %s
.globl _fct1
_fct1:
@@ -15,6 +17,8 @@ _fct1:
# CHECK: .loh AdrpAdrp L1, L2
# CHECK: .loh AdrpAdrp L1, L2
+# UNKNOWN: error: unknown directive
+# UNKNOWN-NEXT: .loh AdrpAdrp L1, L2
.loh AdrpAdrp L1, L2
.loh 1 L1, L2
diff --git a/test/MC/AArch64/inst-directive-other.s b/test/MC/AArch64/inst-directive-other.s
new file mode 100644
index 000000000000..02f21c13d295
--- /dev/null
+++ b/test/MC/AArch64/inst-directive-other.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc %s -triple=arm64-apple-darwin -filetype=asm -o - \
+// RUN: | FileCheck %s --check-prefix=CHECK-ASM
+// RUN: llvm-mc %s -triple=arm64-apple-darwin -filetype=obj -o - \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefixes=CHECK-OBJ,CHECK-OBJ-CODE
+// RUN: llvm-mc %s -triple=aarch64-win32-gnu -filetype=asm -o - \
+// RUN: | FileCheck %s --check-prefix=CHECK-ASM
+// RUN: llvm-mc %s -triple=aarch64-win32-gnu -filetype=obj -o - \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefixes=CHECK-OBJ,CHECK-OBJ-CODE
+// RUN: llvm-mc %s -triple=aarch64-linux-gnu -filetype=asm -o - \
+// RUN: | FileCheck %s --check-prefix=CHECK-ASM
+// RUN: llvm-mc %s -triple=aarch64-linux-gnu -filetype=obj -o - \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefixes=CHECK-OBJ,CHECK-OBJ-DATA
+// RUN: llvm-mc %s -triple=aarch64_be-linux-gnu -filetype=asm -o - \
+// RUN: | FileCheck %s --check-prefix=CHECK-ASM
+// RUN: llvm-mc %s -triple=aarch64_be-linux-gnu -filetype=obj -o - \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefixes=CHECK-OBJ,CHECK-OBJ-BE
+
+ .text
+
+ .p2align 2
+ .globl _func
+_func:
+ nop
+ // A .long is stored differently for big endian aarch64 targets, while
+ // instructions always are stored in little endian.
+ // ELF distinguishes between data and code when emitted this way, but
+ // MachO and COFF don't.
+ .long 0xd503201f
+ .inst 0xd503201f
+
+// CHECK-ASM: .p2align 2
+// CHECK-ASM: .globl _func
+// CHECK-ASM: _func:
+// CHECK-ASM: nop
+// CHECK-ASM: .{{long|word}} 3573751839
+// CHECK-ASM: .inst 0xd503201f
+
+// CHECK-OBJ: 0: 1f 20 03 d5 nop
+// CHECK-OBJ-CODE: 4: 1f 20 03 d5 nop
+// CHECK-OBJ-DATA: 4: 1f 20 03 d5 .word 0xd503201f
+// CHECK-OBJ-BE: 4: d5 03 20 1f .word 0xd503201f
+// CHECK-OBJ: 8: 1f 20 03 d5 nop
diff --git a/test/MC/ARM/directive-unsupported.s b/test/MC/ARM/directive-unsupported.s
index 0b1f9bac61a1..d90db7adee61 100644
--- a/test/MC/ARM/directive-unsupported.s
+++ b/test/MC/ARM/directive-unsupported.s
@@ -30,24 +30,6 @@
// CHECK: .eabi_attribute 0, 0
// CHECK: ^
- .inst 0xdefe
-
-// CHECK: error: unknown directive
-// CHECK: .inst 0xdefe
-// CHECK: ^
-
- .inst.n 0xdefe
-
-// CHECK: error: unknown directive
-// CHECK: .inst.n 0xdefe
-// CHECK: ^
-
- .inst.w 0xdefe
-
-// CHECK: error: unknown directive
-// CHECK: .inst.w 0xdefe
-// CHECK: ^
-
.object_arch armv7
// CHECK: error: unknown directive
diff --git a/test/MC/ARM/inst-directive-other.s b/test/MC/ARM/inst-directive-other.s
new file mode 100644
index 000000000000..f5f0cafe66eb
--- /dev/null
+++ b/test/MC/ARM/inst-directive-other.s
@@ -0,0 +1,47 @@
+// RUN: llvm-mc %s -triple=armv7-apple-darwin -filetype=asm -o - \
+// RUN: | FileCheck %s --check-prefix=CHECK-ASM
+// RUN: llvm-mc %s -triple=armv7-apple-darwin -filetype=obj -o - \
+// RUN: | llvm-objdump -triple=thumbv7 -d - | FileCheck %s --check-prefixes=CHECK-OBJ-CODE
+// RUN: llvm-mc %s -triple=thumbv7-win32-gnu -filetype=asm -o - \
+// RUN: | FileCheck %s --check-prefix=CHECK-ASM
+// RUN: llvm-mc %s -triple=thumbv7-win32-gnu -filetype=obj -o - \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefixes=CHECK-OBJ,CHECK-OBJ-CODE
+// RUN: llvm-mc %s -triple=armv7-linux-gnueabi -filetype=asm -o - \
+// RUN: | FileCheck %s --check-prefix=CHECK-ASM
+// RUN: llvm-mc %s -triple=armv7-linux-gnueabi -filetype=obj -o - \
+// RUN: | llvm-objdump -d -triple=thumbv7 - | FileCheck %s --check-prefixes=CHECK-OBJ,CHECK-OBJ-DATA
+
+ .text
+
+ .p2align 2
+ .globl _func
+ .thumb
+_func:
+ // ELF distinguishes between data and code when emitted this way, but
+ // MachO and COFF don't.
+ bx lr
+ .short 0x4770
+ .inst.n 0x4770
+ mov.w r0, #42
+ .short 0xf04f, 0x002a
+ .inst.w 0xf04f002a
+
+// CHECK-ASM: .p2align 2
+// CHECK-ASM: .globl _func
+// CHECK-ASM: _func:
+// CHECK-ASM: bx lr
+// CHECK-ASM: .short 18288
+// CHECK-ASM: .inst.n 0x4770
+// CHECK-ASM: mov.w r0, #42
+// CHECK-ASM: .short 61519
+// CHECK-ASM: .short 42
+// CHECK-ASM: .inst.w 0xf04f002a
+
+// CHECK-OBJ: 0: 70 47 bx lr
+// CHECK-OBJ-CODE: 2: 70 47 bx lr
+// CHECK-OBJ-DATA: 2: 70 47 .short 0x4770
+// CHECK-OBJ: 4: 70 47 bx lr
+// CHECK-OBJ: 6: 4f f0 2a 00 mov.w r0, #42
+// CHECK-OBJ-CODE: a: 4f f0 2a 00 mov.w r0, #42
+// CHECK-OBJ-DATA: a: 4f f0 2a 00 .word 0x002af04f
+// CHECK-OBJ: e: 4f f0 2a 00 mov.w r0, #42
diff --git a/test/MC/ARM/inst-thumb-suffixes-auto.s b/test/MC/ARM/inst-thumb-suffixes-auto.s
new file mode 100644
index 000000000000..53e3b0594505
--- /dev/null
+++ b/test/MC/ARM/inst-thumb-suffixes-auto.s
@@ -0,0 +1,16 @@
+@ RUN: llvm-mc %s -triple armv7-linux-gnueabi -filetype asm -o - \
+@ RUN: | FileCheck %s
+@ RUN: llvm-mc %s -triple armebv7-linux-gnueabi -filetype asm -o - \
+@ RUN: | FileCheck %s
+
+ .syntax unified
+ .thumb
+
+ .align 2
+ .global inst_n
+ .type inst_n,%function
+inst_n:
+ @ bx lr, mov.w r0, #42
+ .inst 0x4770, 0xf04f002a
+@ CHECK: .inst.n 0x4770
+@ CHECK: .inst.w 0xf04f002a
diff --git a/test/MC/ARM/inst-thumb-suffixes.s b/test/MC/ARM/inst-thumb-suffixes.s
index 40def3c3b89e..13161472d22b 100644
--- a/test/MC/ARM/inst-thumb-suffixes.s
+++ b/test/MC/ARM/inst-thumb-suffixes.s
@@ -8,6 +8,6 @@
.global suffixes_required_in_thumb
.type suffixes_required_in_thumb,%function
suffixes_required_in_thumb:
- .inst 0x0000
+ .inst 0xff00
@ CHECK-ERROR: cannot determine Thumb instruction size, use inst.n/inst.w instead
diff --git a/test/MC/WebAssembly/debug-info.ll b/test/MC/WebAssembly/debug-info.ll
index 5173029a256b..05e339279171 100644
--- a/test/MC/WebAssembly/debug-info.ll
+++ b/test/MC/WebAssembly/debug-info.ll
@@ -66,68 +66,62 @@
; CHECK-NEXT: }
; CHECK-NEXT: Section {
; CHECK-NEXT: Type: CUSTOM (0x0)
-; CHECK-NEXT: Size: 0
-; CHECK-NEXT: Offset: 504
-; CHECK-NEXT: Name: .debug_ranges
-; CHECK-NEXT: }
-; CHECK-NEXT: Section {
-; CHECK-NEXT: Type: CUSTOM (0x0)
; CHECK-NEXT: Size: 1
-; CHECK-NEXT: Offset: 524
+; CHECK-NEXT: Offset: 504
; CHECK-NEXT: Name: .debug_macinfo
; CHECK-NEXT: }
; CHECK-NEXT: Section {
; CHECK-NEXT: Type: CUSTOM (0x0)
; CHECK-NEXT: Size: 42
-; CHECK-NEXT: Offset: 546
+; CHECK-NEXT: Offset: 526
; CHECK-NEXT: Name: .debug_pubnames
; CHECK-NEXT: }
; CHECK-NEXT: Section {
; CHECK-NEXT: Type: CUSTOM (0x0)
; CHECK-NEXT: Size: 26
-; CHECK-NEXT: Offset: 610
+; CHECK-NEXT: Offset: 590
; CHECK-NEXT: Name: .debug_pubtypes
; CHECK-NEXT: }
; CHECK-NEXT: Section {
; CHECK-NEXT: Type: CUSTOM (0x0)
; CHECK-NEXT: Size: 57
-; CHECK-NEXT: Offset: 658
+; CHECK-NEXT: Offset: 638
; CHECK-NEXT: Name: .debug_line
; CHECK-NEXT: }
; CHECK-NEXT: Section {
; CHECK-NEXT: Type: CUSTOM (0x0)
; CHECK-NEXT: Size: 88
-; CHECK-NEXT: Offset: 733
+; CHECK-NEXT: Offset: 713
; CHECK-NEXT: Name: linking
; CHECK-NEXT: }
; CHECK-NEXT: Section {
; CHECK-NEXT: Type: CUSTOM (0x0)
; CHECK-NEXT: Size: 9
-; CHECK-NEXT: Offset: 835
+; CHECK-NEXT: Offset: 815
; CHECK-NEXT: Name: reloc.DATA
; CHECK-NEXT: }
; CHECK-NEXT: Section {
; CHECK-NEXT: Type: CUSTOM (0x0)
; CHECK-NEXT: Size: 58
-; CHECK-NEXT: Offset: 861
+; CHECK-NEXT: Offset: 841
; CHECK-NEXT: Name: reloc..debug_info
; CHECK-NEXT: }
; CHECK-NEXT: Section {
; CHECK-NEXT: Type: CUSTOM (0x0)
; CHECK-NEXT: Size: 6
-; CHECK-NEXT: Offset: 943
+; CHECK-NEXT: Offset: 923
; CHECK-NEXT: Name: reloc..debug_pubnames
; CHECK-NEXT: }
; CHECK-NEXT: Section {
; CHECK-NEXT: Type: CUSTOM (0x0)
; CHECK-NEXT: Size: 6
-; CHECK-NEXT: Offset: 977
+; CHECK-NEXT: Offset: 957
; CHECK-NEXT: Name: reloc..debug_pubtypes
; CHECK-NEXT: }
; CHECK-NEXT: Section {
; CHECK-NEXT: Type: CUSTOM (0x0)
; CHECK-NEXT: Size: 6
-; CHECK-NEXT: Offset: 1011
+; CHECK-NEXT: Offset: 991
; CHECK-NEXT: Name: reloc..debug_line
; CHECK-NEXT: }
; CHECK-NEXT:]
@@ -151,13 +145,13 @@
; CHECK-NEXT: 0x5B R_WEBASSEMBLY_FUNCTION_OFFSET_I32 f2 0
; CHECK-NEXT: 0x63 R_WEBASSEMBLY_SECTION_OFFSET_I32 .debug_str 118
; CHECK-NEXT: }
-; CHECK-NEXT: Section (12) .debug_pubnames {
+; CHECK-NEXT: Section (11) .debug_pubnames {
; CHECK-NEXT: 0x6 R_WEBASSEMBLY_SECTION_OFFSET_I32 .debug_info 0
; CHECK-NEXT: }
-; CHECK-NEXT: Section (13) .debug_pubtypes {
+; CHECK-NEXT: Section (12) .debug_pubtypes {
; CHECK-NEXT: 0x6 R_WEBASSEMBLY_SECTION_OFFSET_I32 .debug_info 0
; CHECK-NEXT: }
-; CHECK-NEXT: Section (14) .debug_line {
+; CHECK-NEXT: Section (13) .debug_line {
; CHECK-NEXT: 0x2B R_WEBASSEMBLY_FUNCTION_OFFSET_I32 f2 0
; CHECK-NEXT: }
; CHECK-NEXT:]
diff --git a/test/Other/new-pm-defaults.ll b/test/Other/new-pm-defaults.ll
index fcbfc354b920..30ee219d2ca8 100644
--- a/test/Other/new-pm-defaults.ll
+++ b/test/Other/new-pm-defaults.ll
@@ -170,17 +170,22 @@
; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass
; CHECK-Os-NEXT: Running pass: GVN
; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis
+; CHECK-Os-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-Oz-NEXT: Running pass: MergedLoadStoreMotionPass
; CHECK-Oz-NEXT: Running pass: GVN
; CHECK-Oz-NEXT: Running analysis: MemoryDependenceAnalysis
+; CHECK-Oz-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-O2-NEXT: Running pass: MergedLoadStoreMotionPass
; CHECK-O2-NEXT: Running pass: GVN
; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis
+; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-O3-NEXT: Running pass: MergedLoadStoreMotionPass
; CHECK-O3-NEXT: Running pass: GVN
; CHECK-O3-NEXT: Running analysis: MemoryDependenceAnalysis
+; CHECK-O3-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-O-NEXT: Running pass: MemCpyOptPass
; CHECK-O1-NEXT: Running analysis: MemoryDependenceAnalysis
+; CHECK-O1-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-O-NEXT: Running pass: SCCPPass
; CHECK-O-NEXT: Running pass: BDCEPass
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
diff --git a/test/Other/new-pm-lto-defaults.ll b/test/Other/new-pm-lto-defaults.ll
index 5bb4d9a4eac8..26680f5edc40 100644
--- a/test/Other/new-pm-lto-defaults.ll
+++ b/test/Other/new-pm-lto-defaults.ll
@@ -81,6 +81,7 @@
; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass>
; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis
+; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis
; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass
; CHECK-O2-NEXT: Running pass: LowerTypeTestsPass
diff --git a/test/Other/new-pm-thinlto-defaults.ll b/test/Other/new-pm-thinlto-defaults.ll
index 3ddae0224232..e1d3d1c0b06a 100644
--- a/test/Other/new-pm-thinlto-defaults.ll
+++ b/test/Other/new-pm-thinlto-defaults.ll
@@ -152,17 +152,22 @@
; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass
; CHECK-Os-NEXT: Running pass: GVN
; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis
+; CHECK-Os-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-Oz-NEXT: Running pass: MergedLoadStoreMotionPass
; CHECK-Oz-NEXT: Running pass: GVN
; CHECK-Oz-NEXT: Running analysis: MemoryDependenceAnalysis
+; CHECK-Oz-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-O2-NEXT: Running pass: MergedLoadStoreMotionPass
; CHECK-O2-NEXT: Running pass: GVN
; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis
+; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-O3-NEXT: Running pass: MergedLoadStoreMotionPass
; CHECK-O3-NEXT: Running pass: GVN
; CHECK-O3-NEXT: Running analysis: MemoryDependenceAnalysis
+; CHECK-O3-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-O-NEXT: Running pass: MemCpyOptPass
; CHECK-O1-NEXT: Running analysis: MemoryDependenceAnalysis
+; CHECK-O1-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-O-NEXT: Running pass: SCCPPass
; CHECK-O-NEXT: Running pass: BDCEPass
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
diff --git a/test/Other/opt-O2-pipeline.ll b/test/Other/opt-O2-pipeline.ll
index 2ebb6ed909f9..b36bd2449fd0 100644
--- a/test/Other/opt-O2-pipeline.ll
+++ b/test/Other/opt-O2-pipeline.ll
@@ -59,6 +59,7 @@
; CHECK-NEXT: Memory SSA
; CHECK-NEXT: Early CSE w/ MemorySSA
; CHECK-NEXT: Speculatively execute instructions if target has divergent branches
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Lazy Value Information Analysis
; CHECK-NEXT: Jump Threading
@@ -120,12 +121,15 @@
; CHECK-NEXT: Delete dead loops
; CHECK-NEXT: Unroll loops
; CHECK-NEXT: MergedLoadStoreMotion
+; CHECK-NEXT: Phi Values Analysis
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory Dependence Analysis
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Optimization Remark Emitter
; CHECK-NEXT: Global Value Numbering
+; CHECK-NEXT: Phi Values Analysis
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory Dependence Analysis
@@ -133,6 +137,7 @@
; CHECK-NEXT: Sparse Conditional Constant Propagation
; CHECK-NEXT: Demanded bits analysis
; CHECK-NEXT: Bit-Tracking Dead Code Elimination
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
@@ -144,12 +149,14 @@
; CHECK-NEXT: Value Propagation
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
+; CHECK-NEXT: Phi Values Analysis
; CHECK-NEXT: Memory Dependence Analysis
; CHECK-NEXT: Dead Store Elimination
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
; CHECK-NEXT: LCSSA Verifier
; CHECK-NEXT: Loop-Closed SSA Form Pass
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Loop Pass Manager
diff --git a/test/Other/opt-O3-pipeline.ll b/test/Other/opt-O3-pipeline.ll
index d9ffc96d4349..096982a9a8d8 100644
--- a/test/Other/opt-O3-pipeline.ll
+++ b/test/Other/opt-O3-pipeline.ll
@@ -62,6 +62,7 @@
; CHECK-NEXT: Memory SSA
; CHECK-NEXT: Early CSE w/ MemorySSA
; CHECK-NEXT: Speculatively execute instructions if target has divergent branches
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Lazy Value Information Analysis
; CHECK-NEXT: Jump Threading
@@ -124,12 +125,15 @@
; CHECK-NEXT: Delete dead loops
; CHECK-NEXT: Unroll loops
; CHECK-NEXT: MergedLoadStoreMotion
+; CHECK-NEXT: Phi Values Analysis
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory Dependence Analysis
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Optimization Remark Emitter
; CHECK-NEXT: Global Value Numbering
+; CHECK-NEXT: Phi Values Analysis
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory Dependence Analysis
@@ -137,6 +141,7 @@
; CHECK-NEXT: Sparse Conditional Constant Propagation
; CHECK-NEXT: Demanded bits analysis
; CHECK-NEXT: Bit-Tracking Dead Code Elimination
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
@@ -148,12 +153,14 @@
; CHECK-NEXT: Value Propagation
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
+; CHECK-NEXT: Phi Values Analysis
; CHECK-NEXT: Memory Dependence Analysis
; CHECK-NEXT: Dead Store Elimination
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
; CHECK-NEXT: LCSSA Verifier
; CHECK-NEXT: Loop-Closed SSA Form Pass
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Loop Pass Manager
diff --git a/test/Other/opt-Os-pipeline.ll b/test/Other/opt-Os-pipeline.ll
index 58bf62ffc9c0..34b8fa86537a 100644
--- a/test/Other/opt-Os-pipeline.ll
+++ b/test/Other/opt-Os-pipeline.ll
@@ -59,6 +59,7 @@
; CHECK-NEXT: Memory SSA
; CHECK-NEXT: Early CSE w/ MemorySSA
; CHECK-NEXT: Speculatively execute instructions if target has divergent branches
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Lazy Value Information Analysis
; CHECK-NEXT: Jump Threading
@@ -107,12 +108,15 @@
; CHECK-NEXT: Delete dead loops
; CHECK-NEXT: Unroll loops
; CHECK-NEXT: MergedLoadStoreMotion
+; CHECK-NEXT: Phi Values Analysis
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory Dependence Analysis
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Optimization Remark Emitter
; CHECK-NEXT: Global Value Numbering
+; CHECK-NEXT: Phi Values Analysis
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory Dependence Analysis
@@ -120,6 +124,7 @@
; CHECK-NEXT: Sparse Conditional Constant Propagation
; CHECK-NEXT: Demanded bits analysis
; CHECK-NEXT: Bit-Tracking Dead Code Elimination
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
@@ -131,12 +136,14 @@
; CHECK-NEXT: Value Propagation
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
+; CHECK-NEXT: Phi Values Analysis
; CHECK-NEXT: Memory Dependence Analysis
; CHECK-NEXT: Dead Store Elimination
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
; CHECK-NEXT: LCSSA Verifier
; CHECK-NEXT: Loop-Closed SSA Form Pass
+; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Loop Pass Manager
diff --git a/test/Transforms/GVN/PRE/pre-after-rle.ll b/test/Transforms/GVN/PRE/pre-after-rle.ll
index ff35e56a6761..879d20e891be 100644
--- a/test/Transforms/GVN/PRE/pre-after-rle.ll
+++ b/test/Transforms/GVN/PRE/pre-after-rle.ll
@@ -63,10 +63,12 @@ preheader:
%cmp = icmp slt i32 1, %h
br i1 %cmp, label %body, label %exit
-; Alias analysis currently can't figure out %width doesn't alias %s, so just
-; check that the redundant load has been removed.
+; CHECK-LABEL: preheader.body_crit_edge:
+; CHECK: load i32, i32* %width, align 8
+
; CHECK-LABEL: body:
; CHECK-NOT: load i32*, i32** %start, align 8
+; CHECK-NOT: load i32, i32* %width, align 8
body:
%j = phi i32 [ 0, %preheader ], [ %j.next, %body ]
%s = load i32*, i32** %start, align 8
diff --git a/test/Transforms/GlobalOpt/globalsra-multigep.ll b/test/Transforms/GlobalOpt/globalsra-multigep.ll
new file mode 100644
index 000000000000..87a8486d8818
--- /dev/null
+++ b/test/Transforms/GlobalOpt/globalsra-multigep.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@g_data = internal unnamed_addr global <{ [8 x i16], [8 x i16] }> <{ [8 x i16] [i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16], [8 x i16] zeroinitializer }>, align 16
+; We cannot SRA here due to the second gep meaning the access to g_data may be to either element
+; CHECK: @g_data = internal unnamed_addr constant <{ [8 x i16], [8 x i16] }>
+
+define i16 @test(i64 %a1) {
+entry:
+ %g1 = getelementptr inbounds <{ [8 x i16], [8 x i16] }>, <{ [8 x i16], [8 x i16] }>* @g_data, i64 0, i32 0
+ %arrayidx.i = getelementptr inbounds [8 x i16], [8 x i16]* %g1, i64 0, i64 %a1
+ %r = load i16, i16* %arrayidx.i, align 2
+ ret i16 %r
+}
diff --git a/test/Transforms/GlobalOpt/globalsra-partial.ll b/test/Transforms/GlobalOpt/globalsra-partial.ll
index 6f24128c42b5..141ee1bb5a8c 100644
--- a/test/Transforms/GlobalOpt/globalsra-partial.ll
+++ b/test/Transforms/GlobalOpt/globalsra-partial.ll
@@ -1,11 +1,12 @@
-; In this case, the global can only be broken up by one level.
+; In this case, the global cannot be merged as i may be out of range
; RUN: opt < %s -globalopt -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
@G = internal global { i32, [4 x float] } zeroinitializer ; <{ i32, [4 x float] }*> [#uses=3]
-; CHECK-NOT: 12345
+; CHECK: @G = internal unnamed_addr global { i32, [4 x float] }
+; CHECK: 12345
define void @onlystore() {
store i32 12345, i32* getelementptr ({ i32, [4 x float] }, { i32, [4 x float] }* @G, i32 0, i32 0)
ret void
diff --git a/test/Transforms/Inline/attributes.ll b/test/Transforms/Inline/attributes.ll
index c2808ba8c037..66a831bf8179 100644
--- a/test/Transforms/Inline/attributes.ll
+++ b/test/Transforms/Inline/attributes.ll
@@ -333,9 +333,10 @@ define i32 @test_no-use-jump-tables3(i32 %i) "no-jump-tables"="true" {
; CHECK-NEXT: ret i32
}
-; Calle with "null-pointer-is-valid"="true" attribute should not be inlined
-; into a caller without this attribute. Exception: alwaysinline callee
-; can still be inlined.
+; Callee with "null-pointer-is-valid"="true" attribute should not be inlined
+; into a caller without this attribute.
+; Exception: alwaysinline callee can still be inlined but
+; "null-pointer-is-valid"="true" should get copied to caller.
define i32 @null-pointer-is-valid_callee0(i32 %i) "null-pointer-is-valid"="true" {
ret i32 %i
@@ -355,6 +356,7 @@ define i32 @null-pointer-is-valid_callee2(i32 %i) {
; CHECK-NEXT: ret i32
}
+; No inlining since caller does not have "null-pointer-is-valid"="true" attribute.
define i32 @test_null-pointer-is-valid0(i32 %i) {
%1 = call i32 @null-pointer-is-valid_callee0(i32 %i)
ret i32 %1
@@ -363,17 +365,22 @@ define i32 @test_null-pointer-is-valid0(i32 %i) {
; CHECK-NEXT: ret i32
}
-define i32 @test_null-pointer-is-valid1(i32 %i) {
+; alwaysinline should force inlining even when caller does not have
+; "null-pointer-is-valid"="true" attribute. However, the attribute should be
+; copied to caller.
+define i32 @test_null-pointer-is-valid1(i32 %i) "null-pointer-is-valid"="false" {
%1 = call i32 @null-pointer-is-valid_callee1(i32 %i)
ret i32 %1
-; CHECK: @test_null-pointer-is-valid1(
+; CHECK: @test_null-pointer-is-valid1(i32 %i) [[NULLPOINTERISVALID:#[0-9]+]] {
; CHECK-NEXT: ret i32
}
+; Can inline since both caller and callee have "null-pointer-is-valid"="true"
+; attribute.
define i32 @test_null-pointer-is-valid2(i32 %i) "null-pointer-is-valid"="true" {
%1 = call i32 @null-pointer-is-valid_callee2(i32 %i)
ret i32 %1
-; CHECK: @test_null-pointer-is-valid2(
+; CHECK: @test_null-pointer-is-valid2(i32 %i) [[NULLPOINTERISVALID]] {
; CHECK-NEXT: ret i32
}
@@ -381,3 +388,4 @@ define i32 @test_null-pointer-is-valid2(i32 %i) "null-pointer-is-valid"="true" {
; CHECK: attributes [[FPMAD_TRUE]] = { "less-precise-fpmad"="true" }
; CHECK: attributes [[NOIMPLICITFLOAT]] = { noimplicitfloat }
; CHECK: attributes [[NOUSEJUMPTABLES]] = { "no-jump-tables"="true" }
+; CHECK: attributes [[NULLPOINTERISVALID]] = { "null-pointer-is-valid"="true" }
diff --git a/test/Transforms/InstCombine/and-xor-or.ll b/test/Transforms/InstCombine/and-xor-or.ll
index e4495fa5b0ae..1eb871e594cc 100644
--- a/test/Transforms/InstCombine/and-xor-or.ll
+++ b/test/Transforms/InstCombine/and-xor-or.ll
@@ -1,6 +1,101 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
+; a & (a ^ b) --> a & ~b
+
+define i32 @and_xor_common_op(i32 %pa, i32 %pb) {
+; CHECK-LABEL: @and_xor_common_op(
+; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[PA:%.*]]
+; CHECK-NEXT: [[B:%.*]] = udiv i32 43, [[PB:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT: [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %a = udiv i32 42, %pa ; thwart complexity-based canonicalization
+ %b = udiv i32 43, %pb ; thwart complexity-based canonicalization
+ %xor = xor i32 %a, %b
+ %r = and i32 %a, %xor
+ ret i32 %r
+}
+
+; a & (b ^ a) --> a & ~b
+
+define i32 @and_xor_common_op_commute1(i32 %pa, i32 %pb) {
+; CHECK-LABEL: @and_xor_common_op_commute1(
+; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[PA:%.*]]
+; CHECK-NEXT: [[B:%.*]] = udiv i32 43, [[PB:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT: [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %a = udiv i32 42, %pa ; thwart complexity-based canonicalization
+ %b = udiv i32 43, %pb ; thwart complexity-based canonicalization
+ %xor = xor i32 %b, %a
+ %r = and i32 %a, %xor
+ ret i32 %r
+}
+
+; (b ^ a) & a --> a & ~b
+
+define i32 @and_xor_common_op_commute2(i32 %pa, i32 %pb) {
+; CHECK-LABEL: @and_xor_common_op_commute2(
+; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[PA:%.*]]
+; CHECK-NEXT: [[B:%.*]] = udiv i32 43, [[PB:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT: [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %a = udiv i32 42, %pa ; thwart complexity-based canonicalization
+ %b = udiv i32 43, %pb ; thwart complexity-based canonicalization
+ %xor = xor i32 %b, %a
+ %r = and i32 %xor, %a
+ ret i32 %r
+}
+
+; (a ^ b) & a --> a & ~b
+
+define <2 x i32> @and_xor_common_op_commute3(<2 x i32> %pa, <2 x i32> %pb) {
+; CHECK-LABEL: @and_xor_common_op_commute3(
+; CHECK-NEXT: [[A:%.*]] = udiv <2 x i32> <i32 42, i32 43>, [[PA:%.*]]
+; CHECK-NEXT: [[B:%.*]] = udiv <2 x i32> <i32 43, i32 42>, [[PB:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[B]], <i32 -1, i32 -1>
+; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[A]], [[TMP1]]
+; CHECK-NEXT: ret <2 x i32> [[R]]
+;
+ %a = udiv <2 x i32> <i32 42, i32 43>, %pa ; thwart complexity-based canonicalization
+ %b = udiv <2 x i32> <i32 43, i32 42>, %pb ; thwart complexity-based canonicalization
+ %xor = xor <2 x i32> %a, %b
+ %r = and <2 x i32> %xor, %a
+ ret <2 x i32> %r
+}
+
+; It's ok to match a common constant.
+; TODO: The xor should be a 'not' op (-1 constant), but demanded bits shrinks it.
+
+define <4 x i32> @and_xor_common_op_constant(<4 x i32> %A) {
+; CHECK-LABEL: @and_xor_common_op_constant(
+; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], <i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT: ret <4 x i32> [[TMP2]]
+;
+ %1 = xor <4 x i32> %A, <i32 1, i32 2, i32 3, i32 4>
+ %2 = and <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %1
+ ret <4 x i32> %2
+}
+
+; a & (a ^ ~b) --> a & b
+
+define i32 @and_xor_not_common_op(i32 %a, i32 %b) {
+; CHECK-LABEL: @and_xor_not_common_op(
+; CHECK-NEXT: [[T4:%.*]] = and i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT: ret i32 [[T4]]
+;
+ %b2 = xor i32 %b, -1
+ %t2 = xor i32 %a, %b2
+ %t4 = and i32 %t2, %a
+ ret i32 %t4
+}
+
; rdar://10770603
; (x & y) | (x ^ y) -> x | y
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index dde786c9b009..ec23f61cb673 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -21,18 +21,6 @@ define i32 @test3(i32 %X, i32 %Y) {
ret i32 %b
}
-; Make sure we don't go into an infinite loop with this test
-define <4 x i32> @test5(<4 x i32> %A) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> %A, <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT: ret <4 x i32> [[TMP2]]
-;
- %1 = xor <4 x i32> %A, <i32 1, i32 2, i32 3, i32 4>
- %2 = and <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %1
- ret <4 x i32> %2
-}
-
define i1 @test7(i32 %i, i1 %b) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 %i, 0
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index 5d015bc99ae9..01c1087dec69 100644
--- a/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -10,376 +11,507 @@ target triple = "x86_64-unknown-linux-gnu"
; PR17850: http://llvm.org/bugs/show_bug.cgi?id=17850
define float @acos_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @acos(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: acos_test1
-; CHECK: call fast float @acosf(float %f)
+; CHECK-LABEL: @acos_test1(
+; CHECK-NEXT: [[ACOSF:%.*]] = call fast float @acosf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[ACOSF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @acos(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @acos_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @acos(double %conv)
- ret double %call
-; CHECK-LABEL: acos_test2
-; CHECK: call fast double @acos(double %conv)
+; CHECK-LABEL: @acos_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @acos(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @acos(double %conv)
+ ret double %call
}
define float @acosh_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @acosh(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: acosh_test1
-; CHECK: call fast float @acoshf(float %f)
+; CHECK-LABEL: @acosh_test1(
+; CHECK-NEXT: [[ACOSHF:%.*]] = call fast float @acoshf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[ACOSHF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @acosh(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @acosh_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @acosh(double %conv)
- ret double %call
-; CHECK-LABEL: acosh_test2
-; CHECK: call fast double @acosh(double %conv)
+; CHECK-LABEL: @acosh_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @acosh(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @acosh(double %conv)
+ ret double %call
}
define float @asin_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @asin(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: asin_test1
-; CHECK: call fast float @asinf(float %f)
+; CHECK-LABEL: @asin_test1(
+; CHECK-NEXT: [[ASINF:%.*]] = call fast float @asinf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[ASINF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @asin(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @asin_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @asin(double %conv)
- ret double %call
-; CHECK-LABEL: asin_test2
-; CHECK: call fast double @asin(double %conv)
+; CHECK-LABEL: @asin_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @asin(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @asin(double %conv)
+ ret double %call
}
define float @asinh_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @asinh(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: asinh_test1
-; CHECK: call fast float @asinhf(float %f)
+; CHECK-LABEL: @asinh_test1(
+; CHECK-NEXT: [[ASINHF:%.*]] = call fast float @asinhf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[ASINHF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @asinh(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @asinh_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @asinh(double %conv)
- ret double %call
-; CHECK-LABEL: asinh_test2
-; CHECK: call fast double @asinh(double %conv)
+; CHECK-LABEL: @asinh_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @asinh(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @asinh(double %conv)
+ ret double %call
}
define float @atan_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @atan(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: atan_test1
-; CHECK: call fast float @atanf(float %f)
+; CHECK-LABEL: @atan_test1(
+; CHECK-NEXT: [[ATANF:%.*]] = call fast float @atanf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[ATANF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @atan(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @atan_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @atan(double %conv)
- ret double %call
-; CHECK-LABEL: atan_test2
-; CHECK: call fast double @atan(double %conv)
+; CHECK-LABEL: @atan_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @atan(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @atan(double %conv)
+ ret double %call
}
define float @atanh_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @atanh(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: atanh_test1
-; CHECK: call fast float @atanhf(float %f)
+; CHECK-LABEL: @atanh_test1(
+; CHECK-NEXT: [[ATANHF:%.*]] = call fast float @atanhf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[ATANHF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @atanh(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @atanh_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @atanh(double %conv)
- ret double %call
-; CHECK-LABEL: atanh_test2
-; CHECK: call fast double @atanh(double %conv)
+; CHECK-LABEL: @atanh_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @atanh(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @atanh(double %conv)
+ ret double %call
}
define float @cbrt_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @cbrt(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: cbrt_test1
-; CHECK: call fast float @cbrtf(float %f)
+; CHECK-LABEL: @cbrt_test1(
+; CHECK-NEXT: [[CBRTF:%.*]] = call fast float @cbrtf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[CBRTF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @cbrt(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @cbrt_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @cbrt(double %conv)
- ret double %call
-; CHECK-LABEL: cbrt_test2
-; CHECK: call fast double @cbrt(double %conv)
+; CHECK-LABEL: @cbrt_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @cbrt(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @cbrt(double %conv)
+ ret double %call
}
define float @exp_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @exp(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: exp_test1
-; CHECK: call fast float @expf(float %f)
+; CHECK-LABEL: @exp_test1(
+; CHECK-NEXT: [[EXPF:%.*]] = call fast float @expf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[EXPF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @exp(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @exp_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @exp(double %conv)
- ret double %call
-; CHECK-LABEL: exp_test2
-; CHECK: call fast double @exp(double %conv)
+; CHECK-LABEL: @exp_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @exp(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @exp(double %conv)
+ ret double %call
}
define float @expm1_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @expm1(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: expm1_test1
-; CHECK: call fast float @expm1f(float %f)
+; CHECK-LABEL: @expm1_test1(
+; CHECK-NEXT: [[EXPM1F:%.*]] = call fast float @expm1f(float [[F:%.*]])
+; CHECK-NEXT: ret float [[EXPM1F]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @expm1(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @expm1_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @expm1(double %conv)
- ret double %call
-; CHECK-LABEL: expm1_test2
-; CHECK: call fast double @expm1(double %conv)
+; CHECK-LABEL: @expm1_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @expm1(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @expm1(double %conv)
+ ret double %call
}
; exp10f() doesn't exist for this triple, so it doesn't shrink.
define float @exp10_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @exp10(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: exp10_test1
-; CHECK: call fast double @exp10(double %conv)
+; CHECK-LABEL: @exp10_test1(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @exp10(double [[CONV]])
+; CHECK-NEXT: [[CONV1:%.*]] = fptrunc double [[CALL]] to float
+; CHECK-NEXT: ret float [[CONV1]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @exp10(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @exp10_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @exp10(double %conv)
- ret double %call
-; CHECK-LABEL: exp10_test2
-; CHECK: call fast double @exp10(double %conv)
+; CHECK-LABEL: @exp10_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @exp10(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @exp10(double %conv)
+ ret double %call
}
define float @log_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @log(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: log_test1
-; CHECK: call fast float @logf(float %f)
+; CHECK-LABEL: @log_test1(
+; CHECK-NEXT: [[LOGF:%.*]] = call fast float @logf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[LOGF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @log(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @log_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @log(double %conv)
- ret double %call
-; CHECK-LABEL: log_test2
-; CHECK: call fast double @log(double %conv)
+; CHECK-LABEL: @log_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @log(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @log(double %conv)
+ ret double %call
}
define float @log10_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @log10(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: log10_test1
-; CHECK: call fast float @log10f(float %f)
+; CHECK-LABEL: @log10_test1(
+; CHECK-NEXT: [[LOG10F:%.*]] = call fast float @log10f(float [[F:%.*]])
+; CHECK-NEXT: ret float [[LOG10F]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @log10(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @log10_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @log10(double %conv)
- ret double %call
-; CHECK-LABEL: log10_test2
-; CHECK: call fast double @log10(double %conv)
+; CHECK-LABEL: @log10_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @log10(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @log10(double %conv)
+ ret double %call
}
define float @log1p_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @log1p(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: log1p_test1
-; CHECK: call fast float @log1pf(float %f)
+; CHECK-LABEL: @log1p_test1(
+; CHECK-NEXT: [[LOG1PF:%.*]] = call fast float @log1pf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[LOG1PF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @log1p(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @log1p_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @log1p(double %conv)
- ret double %call
-; CHECK-LABEL: log1p_test2
-; CHECK: call fast double @log1p(double %conv)
+; CHECK-LABEL: @log1p_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @log1p(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @log1p(double %conv)
+ ret double %call
}
define float @log2_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @log2(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: log2_test1
-; CHECK: call fast float @log2f(float %f)
+; CHECK-LABEL: @log2_test1(
+; CHECK-NEXT: [[LOG2F:%.*]] = call fast float @log2f(float [[F:%.*]])
+; CHECK-NEXT: ret float [[LOG2F]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @log2(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @log2_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @log2(double %conv)
- ret double %call
-; CHECK-LABEL: log2_test2
-; CHECK: call fast double @log2(double %conv)
+; CHECK-LABEL: @log2_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @log2(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @log2(double %conv)
+ ret double %call
}
define float @logb_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @logb(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: logb_test1
-; CHECK: call fast float @logbf(float %f)
+; CHECK-LABEL: @logb_test1(
+; CHECK-NEXT: [[LOGBF:%.*]] = call fast float @logbf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[LOGBF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @logb(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @logb_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @logb(double %conv)
- ret double %call
-; CHECK-LABEL: logb_test2
-; CHECK: call fast double @logb(double %conv)
+; CHECK-LABEL: @logb_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @logb(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @logb(double %conv)
+ ret double %call
+}
+
+; FIXME: Miscompile - we dropped the 2nd argument!
+
+define float @pow_test1(float %f, float %g) {
+; CHECK-LABEL: @pow_test1(
+; CHECK-NEXT: [[POWF:%.*]] = call fast float @powf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[POWF]]
+;
+ %df = fpext float %f to double
+ %dg = fpext float %g to double
+ %call = call fast double @pow(double %df, double %dg)
+ %fr = fptrunc double %call to float
+ ret float %fr
+}
+
+; TODO: This should shrink?
+
+define double @pow_test2(float %f, float %g) {
+; CHECK-LABEL: @pow_test2(
+; CHECK-NEXT: [[DF:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[DG:%.*]] = fpext float [[G:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @pow(double [[DF]], double [[DG]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %df = fpext float %f to double
+ %dg = fpext float %g to double
+ %call = call fast double @pow(double %df, double %dg)
+ ret double %call
}
define float @sin_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @sin(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: sin_test1
-; CHECK: call fast float @sinf(float %f)
+; CHECK-LABEL: @sin_test1(
+; CHECK-NEXT: [[SINF:%.*]] = call fast float @sinf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[SINF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @sin(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @sin_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @sin(double %conv)
- ret double %call
-; CHECK-LABEL: sin_test2
-; CHECK: call fast double @sin(double %conv)
+; CHECK-LABEL: @sin_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @sin(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @sin(double %conv)
+ ret double %call
}
define float @sqrt_test1(float %f) {
- %conv = fpext float %f to double
- %call = call double @sqrt(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: sqrt_test1
-; CHECK: call float @sqrtf(float %f)
+; CHECK-LABEL: @sqrt_test1(
+; CHECK-NEXT: [[SQRTF:%.*]] = call float @sqrtf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[SQRTF]]
+;
+ %conv = fpext float %f to double
+ %call = call double @sqrt(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @sqrt_test2(float %f) {
- %conv = fpext float %f to double
- %call = call double @sqrt(double %conv)
- ret double %call
-; CHECK-LABEL: sqrt_test2
-; CHECK: call double @sqrt(double %conv)
+; CHECK-LABEL: @sqrt_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call double @sqrt(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call double @sqrt(double %conv)
+ ret double %call
}
define float @sqrt_int_test1(float %f) {
- %conv = fpext float %f to double
- %call = call double @llvm.sqrt.f64(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: sqrt_int_test1
-; CHECK: call float @llvm.sqrt.f32(float %f)
+; CHECK-LABEL: @sqrt_int_test1(
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[F:%.*]])
+; CHECK-NEXT: ret float [[TMP1]]
+;
+ %conv = fpext float %f to double
+ %call = call double @llvm.sqrt.f64(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @sqrt_int_test2(float %f) {
- %conv = fpext float %f to double
- %call = call double @llvm.sqrt.f64(double %conv)
- ret double %call
-; CHECK-LABEL: sqrt_int_test2
-; CHECK: call double @llvm.sqrt.f64(double %conv)
+; CHECK-LABEL: @sqrt_int_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call double @llvm.sqrt.f64(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call double @llvm.sqrt.f64(double %conv)
+ ret double %call
}
define float @tan_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @tan(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: tan_test1
-; CHECK: call fast float @tanf(float %f)
+; CHECK-LABEL: @tan_test1(
+; CHECK-NEXT: [[TANF:%.*]] = call fast float @tanf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[TANF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @tan(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @tan_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @tan(double %conv)
- ret double %call
-; CHECK-LABEL: tan_test2
-; CHECK: call fast double @tan(double %conv)
+; CHECK-LABEL: @tan_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @tan(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @tan(double %conv)
+ ret double %call
}
define float @tanh_test1(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @tanh(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
-; CHECK-LABEL: tanh_test1
-; CHECK: call fast float @tanhf(float %f)
+; CHECK-LABEL: @tanh_test1(
+; CHECK-NEXT: [[TANHF:%.*]] = call fast float @tanhf(float [[F:%.*]])
+; CHECK-NEXT: ret float [[TANHF]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @tanh(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
}
define double @tanh_test2(float %f) {
- %conv = fpext float %f to double
- %call = call fast double @tanh(double %conv)
- ret double %call
-; CHECK-LABEL: tanh_test2
-; CHECK: call fast double @tanh(double %conv)
+; CHECK-LABEL: @tanh_test2(
+; CHECK-NEXT: [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT: [[CALL:%.*]] = call fast double @tanh(double [[CONV]])
+; CHECK-NEXT: ret double [[CALL]]
+;
+ %conv = fpext float %f to double
+ %call = call fast double @tanh(double %conv)
+ ret double %call
}
; 'arcp' on an fmax() is meaningless. This test just proves that
; flags are propagated for shrunken *binary* double FP calls.
define float @max1(float %a, float %b) {
+; CHECK-LABEL: @max1(
+; CHECK-NEXT: [[FMAXF:%.*]] = call arcp float @fmaxf(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT: ret float [[FMAXF]]
+;
%c = fpext float %a to double
%d = fpext float %b to double
%e = call arcp double @fmax(double %c, double %d)
%f = fptrunc double %e to float
ret float %f
-
-; CHECK-LABEL: max1(
-; CHECK-NEXT: call arcp float @fmaxf(float %a, float %b)
-; CHECK-NEXT: ret
}
; A function can have a name that matches a common libcall,
; but with the wrong type(s). Let it be.
define float @fake_fmin(float %a, float %b) {
+; CHECK-LABEL: @fake_fmin(
+; CHECK-NEXT: [[C:%.*]] = fpext float [[A:%.*]] to fp128
+; CHECK-NEXT: [[D:%.*]] = fpext float [[B:%.*]] to fp128
+; CHECK-NEXT: [[E:%.*]] = call fp128 @fmin(fp128 [[C]], fp128 [[D]])
+; CHECK-NEXT: [[F:%.*]] = fptrunc fp128 [[E]] to float
+; CHECK-NEXT: ret float [[F]]
+;
%c = fpext float %a to fp128
%d = fpext float %b to fp128
%e = call fp128 @fmin(fp128 %c, fp128 %d)
%f = fptrunc fp128 %e to float
ret float %f
-
-; CHECK-LABEL: fake_fmin(
-; CHECK-NEXT: %c = fpext float %a to fp128
-; CHECK-NEXT: %d = fpext float %b to fp128
-; CHECK-NEXT: %e = call fp128 @fmin(fp128 %c, fp128 %d)
-; CHECK-NEXT: %f = fptrunc fp128 %e to float
-; CHECK-NEXT: ret float %f
}
declare fp128 @fmin(fp128, fp128) ; This is not the 'fmin' you're looking for.
@@ -389,12 +521,13 @@ declare double @fmax(double, double)
declare double @tanh(double)
declare double @tan(double)
-; sqrt is a special case: the shrinking optimization
+; sqrt is a special case: the shrinking optimization
; is valid even without unsafe-fp-math.
-declare double @sqrt(double)
-declare double @llvm.sqrt.f64(double)
+declare double @sqrt(double)
+declare double @llvm.sqrt.f64(double)
declare double @sin(double)
+declare double @pow(double, double)
declare double @log2(double)
declare double @log1p(double)
declare double @log10(double)
diff --git a/test/Transforms/InstCombine/gep-addrspace.ll b/test/Transforms/InstCombine/gep-addrspace.ll
index 4a4951dee7fd..fadf2ae6bf68 100644
--- a/test/Transforms/InstCombine/gep-addrspace.ll
+++ b/test/Transforms/InstCombine/gep-addrspace.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -7,7 +8,9 @@ target triple = "x86_64-pc-win32"
; make sure that we are not crashing when creating an illegal type
define void @func(%myStruct addrspace(1)* nocapture %p) nounwind {
-ST:
+; CHECK-LABEL: @func(
+; CHECK-NEXT: ret void
+;
%A = getelementptr inbounds %myStruct, %myStruct addrspace(1)* %p, i64 0
%B = addrspacecast %myStruct addrspace(1)* %A to %myStruct*
%C = getelementptr inbounds %myStruct, %myStruct* %B, i32 0, i32 1
@@ -21,14 +24,19 @@ ST:
@scalar = internal addrspace(3) global float 0.000000e+00, align 4
define void @keep_necessary_addrspacecast(i64 %i, float** %out0, float** %out1) {
-entry:
-; CHECK-LABEL: @keep_necessary_addrspacecast
- %0 = getelementptr [256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @array to [256 x float]*), i64 0, i64 %i
-; CHECK: addrspacecast float addrspace(3)* %{{[0-9]+}} to float*
- %1 = getelementptr [0 x float], [0 x float]* addrspacecast (float addrspace(3)* @scalar to [0 x float]*), i64 0, i64 %i
-; CHECK: addrspacecast float addrspace(3)* %{{[0-9]+}} to float*
- store float* %0, float** %out0, align 4
- store float* %1, float** %out1, align 4
+; CHECK-LABEL: @keep_necessary_addrspacecast(
+; CHECK-NEXT: [[T01:%.*]] = getelementptr [256 x float], [256 x float] addrspace(3)* @array, i64 0, i64 [[I:%.*]]
+; CHECK-NEXT: [[T0:%.*]] = addrspacecast float addrspace(3)* [[T01]] to float*
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float addrspace(3)* @scalar, i64 [[I]]
+; CHECK-NEXT: [[T1:%.*]] = addrspacecast float addrspace(3)* [[TMP1]] to float*
+; CHECK-NEXT: store float* [[T0]], float** [[OUT0:%.*]], align 4
+; CHECK-NEXT: store float* [[T1]], float** [[OUT1:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+ %t0 = getelementptr [256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @array to [256 x float]*), i64 0, i64 %i
+ %t1 = getelementptr [0 x float], [0 x float]* addrspacecast (float addrspace(3)* @scalar to [0 x float]*), i64 0, i64 %i
+ store float* %t0, float** %out0, align 4
+ store float* %t1, float** %out1, align 4
ret void
}
@@ -37,17 +45,42 @@ declare void @escape_alloca(i16*)
; check that addrspacecast is not ignored (leading to an assertion failure)
; when trying to mark a GEP as inbounds
define { i8, i8 } @inbounds_after_addrspacecast() {
-top:
-; CHECK-LABEL: @inbounds_after_addrspacecast
- %0 = alloca i16, align 2
- call void @escape_alloca(i16* %0)
- %tmpcast = bitcast i16* %0 to [2 x i8]*
-; CHECK: addrspacecast [2 x i8]* %tmpcast to [2 x i8] addrspace(11)*
- %1 = addrspacecast [2 x i8]* %tmpcast to [2 x i8] addrspace(11)*
-; CHECK: getelementptr [2 x i8], [2 x i8] addrspace(11)* %1, i64 0, i64 1
- %2 = getelementptr [2 x i8], [2 x i8] addrspace(11)* %1, i64 0, i64 1
-; CHECK: addrspace(11)
- %3 = load i8, i8 addrspace(11)* %2, align 1
- %.fca.1.insert = insertvalue { i8, i8 } zeroinitializer, i8 %3, 1
- ret { i8, i8 } %.fca.1.insert
+; CHECK-LABEL: @inbounds_after_addrspacecast(
+; CHECK-NEXT: [[T0:%.*]] = alloca i16, align 2
+; CHECK-NEXT: call void @escape_alloca(i16* nonnull [[T0]])
+; CHECK-NEXT: [[TMPCAST:%.*]] = bitcast i16* [[T0]] to [2 x i8]*
+; CHECK-NEXT: [[T1:%.*]] = addrspacecast [2 x i8]* [[TMPCAST]] to [2 x i8] addrspace(11)*
+; CHECK-NEXT: [[T2:%.*]] = getelementptr [2 x i8], [2 x i8] addrspace(11)* [[T1]], i64 0, i64 1
+; CHECK-NEXT: [[T3:%.*]] = load i8, i8 addrspace(11)* [[T2]], align 1
+; CHECK-NEXT: [[INSERT:%.*]] = insertvalue { i8, i8 } zeroinitializer, i8 [[T3]], 1
+; CHECK-NEXT: ret { i8, i8 } [[INSERT]]
+;
+ %t0 = alloca i16, align 2
+ call void @escape_alloca(i16* %t0)
+ %tmpcast = bitcast i16* %t0 to [2 x i8]*
+ %t1 = addrspacecast [2 x i8]* %tmpcast to [2 x i8] addrspace(11)*
+ %t2 = getelementptr [2 x i8], [2 x i8] addrspace(11)* %t1, i64 0, i64 1
+ %t3 = load i8, i8 addrspace(11)* %t2, align 1
+ %insert = insertvalue { i8, i8 } zeroinitializer, i8 %t3, 1
+ ret { i8, i8 } %insert
+}
+
+
+declare spir_func <16 x i32> @my_extern_func()
+
+; check that a bitcast is not generated when we need an addrspace cast
+define void @bitcast_after_gep(<16 x i32>* %t0) {
+; CHECK-LABEL: @bitcast_after_gep(
+; CHECK-NEXT: [[T4:%.*]] = addrspacecast <16 x i32>* [[T0:%.*]] to <16 x i32> addrspace(3)*
+; CHECK-NEXT: [[CALL:%.*]] = call spir_func <16 x i32> @my_extern_func()
+; CHECK-NEXT: store <16 x i32> [[CALL]], <16 x i32> addrspace(3)* [[T4]], align 64
+; CHECK-NEXT: ret void
+;
+ %t1 = bitcast <16 x i32>* %t0 to [16 x i32]*
+ %t2 = addrspacecast [16 x i32]* %t1 to [16 x i32] addrspace(3)*
+ %t3 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %t2, i64 0, i64 0
+ %t4 = bitcast i32 addrspace(3)* %t3 to <16 x i32> addrspace(3)*
+ %call = call spir_func <16 x i32> @my_extern_func()
+ store <16 x i32> %call, <16 x i32> addrspace(3)* %t4
+ ret void
}
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index eef4f76fb84f..cf24548db3e7 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -15,6 +15,8 @@
declare float @powf(float, float) nounwind readonly
declare double @pow(double, double) nounwind readonly
+declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) nounwind readonly
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) nounwind readonly
; Check pow(1.0, x) -> 1.0.
@@ -25,6 +27,13 @@ define float @test_simplify1(float %x) {
; CHECK-NEXT: ret float 1.000000e+00
}
+define <2 x float> @test_simplify1v(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify1v(
+ %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 1.0, float 1.0>, <2 x float> %x)
+ ret <2 x float> %retval
+; CHECK-NEXT: ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+}
+
define double @test_simplify2(double %x) {
; CHECK-LABEL: @test_simplify2(
%retval = call double @pow(double 1.0, double %x)
@@ -32,6 +41,13 @@ define double @test_simplify2(double %x) {
; CHECK-NEXT: ret double 1.000000e+00
}
+define <2 x double> @test_simplify2v(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify2v(
+ %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 1.0, double 1.0>, <2 x double> %x)
+ ret <2 x double> %retval
+; CHECK-NEXT: ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+}
+
; Check pow(2.0, x) -> exp2(x).
define float @test_simplify3(float %x) {
@@ -42,6 +58,14 @@ define float @test_simplify3(float %x) {
; CHECK-NEXT: ret float [[EXP2F]]
}
+define <2 x float> @test_simplify3v(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify3v(
+ %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 2.0, float 2.0>, <2 x float> %x)
+; CHECK-NEXT: [[EXP2F:%[a-z0-9]+]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> %x)
+ ret <2 x float> %retval
+; CHECK-NEXT: ret <2 x float> [[EXP2F]]
+}
+
define double @test_simplify4(double %x) {
; CHECK-LABEL: @test_simplify4(
%retval = call double @pow(double 2.0, double %x)
@@ -50,6 +74,14 @@ define double @test_simplify4(double %x) {
; CHECK-NEXT: ret double [[EXP2]]
}
+define <2 x double> @test_simplify4v(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify4v(
+ %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 2.0, double 2.0>, <2 x double> %x)
+; CHECK-NEXT: [[EXP2:%[a-z0-9]+]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> %x)
+ ret <2 x double> %retval
+; CHECK-NEXT: ret <2 x double> [[EXP2]]
+}
+
; Check pow(x, 0.0) -> 1.0.
define float @test_simplify5(float %x) {
@@ -59,6 +91,13 @@ define float @test_simplify5(float %x) {
; CHECK-NEXT: ret float 1.000000e+00
}
+define <2 x float> @test_simplify5v(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify5v(
+ %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 0.0, float 0.0>)
+ ret <2 x float> %retval
+; CHECK-NEXT: %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> zeroinitializer)
+}
+
define double @test_simplify6(double %x) {
; CHECK-LABEL: @test_simplify6(
%retval = call double @pow(double %x, double 0.0)
@@ -66,6 +105,13 @@ define double @test_simplify6(double %x) {
; CHECK-NEXT: ret double 1.000000e+00
}
+define <2 x double> @test_simplify6v(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify6v(
+ %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 0.0, double 0.0>)
+ ret <2 x double> %retval
+; CHECK-NEXT: %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> zeroinitializer)
+}
+
; Check pow(x, 0.5) -> fabs(sqrt(x)), where x != -infinity.
define float @test_simplify7(float %x) {
@@ -115,6 +161,13 @@ define float @test_simplify11(float %x) {
; CHECK-NEXT: ret float %x
}
+define <2 x float> @test_simplify11v(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify11v(
+ %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 1.0, float 1.0>)
+ ret <2 x float> %retval
+; CHECK-NEXT: %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 1.000000e+00, float 1.000000e+00>)
+}
+
define double @test_simplify12(double %x) {
; CHECK-LABEL: @test_simplify12(
%retval = call double @pow(double %x, double 1.0)
@@ -122,6 +175,13 @@ define double @test_simplify12(double %x) {
; CHECK-NEXT: ret double %x
}
+define <2 x double> @test_simplify12v(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify12v(
+ %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 1.0, double 1.0>)
+ ret <2 x double> %retval
+; CHECK-NEXT: %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 1.000000e+00, double 1.000000e+00>)
+}
+
; Check pow(x, 2.0) -> x*x.
define float @pow2_strict(float %x) {
@@ -133,6 +193,15 @@ define float @pow2_strict(float %x) {
ret float %r
}
+define <2 x float> @pow2_strictv(<2 x float> %x) {
+; CHECK-LABEL: @pow2_strictv(
+; CHECK-NEXT: [[POW2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 2.000000e+00, float 2.000000e+00>)
+; CHECK-NEXT: ret <2 x float> [[POW2]]
+;
+ %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 2.0, float 2.0>)
+ ret <2 x float> %r
+}
+
define double @pow2_double_strict(double %x) {
; CHECK-LABEL: @pow2_double_strict(
; CHECK-NEXT: [[POW2:%.*]] = fmul double %x, %x
@@ -141,6 +210,14 @@ define double @pow2_double_strict(double %x) {
%r = call double @pow(double %x, double 2.0)
ret double %r
}
+define <2 x double> @pow2_double_strictv(<2 x double> %x) {
+; CHECK-LABEL: @pow2_double_strictv(
+; CHECK-NEXT: [[POW2:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 2.000000e+00, double 2.000000e+00>)
+; CHECK-NEXT: ret <2 x double> [[POW2]]
+;
+ %r = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 2.0, double 2.0>)
+ ret <2 x double> %r
+}
; Don't drop the FMF - PR35601 ( https://bugs.llvm.org/show_bug.cgi?id=35601 )
@@ -164,6 +241,15 @@ define float @pow_neg1_strict(float %x) {
ret float %r
}
+define <2 x float> @pow_neg1_strictv(<2 x float> %x) {
+; CHECK-LABEL: @pow_neg1_strictv(
+; CHECK-NEXT: [[POWRECIP:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float -1.000000e+00, float -1.000000e+00>)
+; CHECK-NEXT: ret <2 x float> [[POWRECIP]]
+;
+ %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float -1.0, float -1.0>)
+ ret <2 x float> %r
+}
+
define double @pow_neg1_double_fast(double %x) {
; CHECK-LABEL: @pow_neg1_double_fast(
; CHECK-NEXT: [[POWRECIP:%.*]] = fdiv fast double 1.000000e+00, %x
@@ -173,6 +259,15 @@ define double @pow_neg1_double_fast(double %x) {
ret double %r
}
+define <2 x double> @pow_neg1_double_fastv(<2 x double> %x) {
+; CHECK-LABEL: @pow_neg1_double_fastv(
+; CHECK-NEXT: [[POWRECIP:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -1.000000e+00, double -1.000000e+00>)
+; CHECK-NEXT: ret <2 x double> [[POWRECIP]]
+;
+ %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -1.0, double -1.0>)
+ ret <2 x double> %r
+}
+
declare double @llvm.pow.f64(double %Val, double %Power)
define double @test_simplify17(double %x) {
; CHECK-LABEL: @test_simplify17(
diff --git a/test/Transforms/InstCombine/pow-cbrt.ll b/test/Transforms/InstCombine/pow-cbrt.ll
new file mode 100644
index 000000000000..00fa510b04e6
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-cbrt.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @pow_intrinsic_third_fast(double %x) {
+; CHECK-LABEL: @pow_intrinsic_third_fast(
+; CHECK-NEXT: [[POW:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 0x3FD5555555555555)
+; CHECK-NEXT: ret double [[POW]]
+;
+ %pow = call fast double @llvm.pow.f64(double %x, double 0x3fd5555555555555)
+ ret double %pow
+}
+
+define float @powf_intrinsic_third_fast(float %x) {
+; CHECK-LABEL: @powf_intrinsic_third_fast(
+; CHECK-NEXT: [[POW:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float 0x3FD5555560000000)
+; CHECK-NEXT: ret float [[POW]]
+;
+ %pow = call fast float @llvm.pow.f32(float %x, float 0x3fd5555560000000)
+ ret float %pow
+}
+
+define double @pow_intrinsic_third_approx(double %x) {
+; CHECK-LABEL: @pow_intrinsic_third_approx(
+; CHECK-NEXT: [[POW:%.*]] = call afn double @llvm.pow.f64(double [[X:%.*]], double 0x3FD5555555555555)
+; CHECK-NEXT: ret double [[POW]]
+;
+ %pow = call afn double @llvm.pow.f64(double %x, double 0x3fd5555555555555)
+ ret double %pow
+}
+
+define float @powf_intrinsic_third_approx(float %x) {
+; CHECK-LABEL: @powf_intrinsic_third_approx(
+; CHECK-NEXT: [[POW:%.*]] = call afn float @llvm.pow.f32(float [[X:%.*]], float 0x3FD5555560000000)
+; CHECK-NEXT: ret float [[POW]]
+;
+ %pow = call afn float @llvm.pow.f32(float %x, float 0x3fd5555560000000)
+ ret float %pow
+}
+
+define double @pow_libcall_third_fast(double %x) {
+; CHECK-LABEL: @pow_libcall_third_fast(
+; CHECK-NEXT: [[POW:%.*]] = call fast double @pow(double [[X:%.*]], double 0x3FD5555555555555)
+; CHECK-NEXT: ret double [[POW]]
+;
+ %pow = call fast double @pow(double %x, double 0x3fd5555555555555)
+ ret double %pow
+}
+
+define float @powf_libcall_third_fast(float %x) {
+; CHECK-LABEL: @powf_libcall_third_fast(
+; CHECK-NEXT: [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float 0x3FD5555560000000)
+; CHECK-NEXT: ret float [[POW]]
+;
+ %pow = call fast float @powf(float %x, float 0x3fd5555560000000)
+ ret float %pow
+}
+
+define double @pow_intrinsic_negthird_fast(double %x) {
+; CHECK-LABEL: @pow_intrinsic_negthird_fast(
+; CHECK-NEXT: [[POW:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 0xBFD5555555555555)
+; CHECK-NEXT: ret double [[POW]]
+;
+ %pow = call fast double @llvm.pow.f64(double %x, double 0xbfd5555555555555)
+ ret double %pow
+}
+
+define float @powf_intrinsic_negthird_fast(float %x) {
+; CHECK-LABEL: @powf_intrinsic_negthird_fast(
+; CHECK-NEXT: [[POW:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float 0xBFD5555560000000)
+; CHECK-NEXT: ret float [[POW]]
+;
+ %pow = call fast float @llvm.pow.f32(float %x, float 0xbfd5555560000000)
+ ret float %pow
+}
+
+define double @pow_intrinsic_negthird_approx(double %x) {
+; CHECK-LABEL: @pow_intrinsic_negthird_approx(
+; CHECK-NEXT: [[POW:%.*]] = call afn double @llvm.pow.f64(double [[X:%.*]], double 0xBFD5555555555555)
+; CHECK-NEXT: ret double [[POW]]
+;
+ %pow = call afn double @llvm.pow.f64(double %x, double 0xbfd5555555555555)
+ ret double %pow
+}
+
+define float @powf_intrinsic_negthird_approx(float %x) {
+; CHECK-LABEL: @powf_intrinsic_negthird_approx(
+; CHECK-NEXT: [[POW:%.*]] = call afn float @llvm.pow.f32(float [[X:%.*]], float 0xBFD5555560000000)
+; CHECK-NEXT: ret float [[POW]]
+;
+ %pow = call afn float @llvm.pow.f32(float %x, float 0xbfd5555560000000)
+ ret float %pow
+}
+
+define double @pow_libcall_negthird_fast(double %x) {
+; CHECK-LABEL: @pow_libcall_negthird_fast(
+; CHECK-NEXT: [[POW:%.*]] = call fast double @pow(double [[X:%.*]], double 0xBFD5555555555555)
+; CHECK-NEXT: ret double [[POW]]
+;
+ %pow = call fast double @pow(double %x, double 0xbfd5555555555555)
+ ret double %pow
+}
+
+define float @powf_libcall_negthird_fast(float %x) {
+; CHECK-LABEL: @powf_libcall_negthird_fast(
+; CHECK-NEXT: [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float 0xBFD5555560000000)
+; CHECK-NEXT: ret float [[POW]]
+;
+ %pow = call fast float @powf(float %x, float 0xbfd5555560000000)
+ ret float %pow
+}
+
+declare double @llvm.pow.f64(double, double) #0
+declare float @llvm.pow.f32(float, float) #0
+declare double @pow(double, double)
+declare float @powf(float, float)
+
+attributes #0 = { nounwind readnone speculatable }
diff --git a/test/Transforms/InstCombine/pow-sqrt.ll b/test/Transforms/InstCombine/pow-sqrt.ll
index c07a82ccedda..3b885ad5bdae 100644
--- a/test/Transforms/InstCombine/pow-sqrt.ll
+++ b/test/Transforms/InstCombine/pow-sqrt.ll
@@ -20,9 +20,9 @@ define <2 x double> @pow_intrinsic_half_approx(<2 x double> %x) {
define double @pow_libcall_half_approx(double %x) {
; CHECK-LABEL: @pow_libcall_half_approx(
-; CHECK-NEXT: [[SQRT:%.*]] = call double @sqrt(double %x)
-; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[SQRT]])
-; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq double %x, 0xFFF0000000000000
+; CHECK-NEXT: [[SQRT:%.*]] = call afn double @sqrt(double %x)
+; CHECK-NEXT: [[TMP1:%.*]] = call afn double @llvm.fabs.f64(double [[SQRT]])
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp afn oeq double %x, 0xFFF0000000000000
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], double 0x7FF0000000000000, double [[TMP1]]
; CHECK-NEXT: ret double [[TMP3]]
;
diff --git a/test/Transforms/InstCombine/select-binop-icmp.ll b/test/Transforms/InstCombine/select-binop-icmp.ll
new file mode 100644
index 000000000000..a2ac68fa21e3
--- /dev/null
+++ b/test/Transforms/InstCombine/select-binop-icmp.ll
@@ -0,0 +1,391 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @use(<2 x i1>)
+
+define i32 @select_xor_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 0
+ %B = xor i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_xor_icmp2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp2(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp ne i32 %x, 0
+ %B = xor i32 %x, %z
+ %C = select i1 %A, i32 %y, i32 %B
+ ret i32 %C
+}
+
+define i32 @select_xor_icmp_meta(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp_meta(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]], !prof !0
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 0
+ %B = xor i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y, !prof !0
+ ret i32 %C
+}
+
+define i32 @select_mul_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_mul_icmp(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 1
+ %B = mul i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_add_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_add_icmp(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 0
+ %B = add i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_or_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 0
+ %B = or i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_and_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], -1
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, -1
+ %B = and i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define <2 x i8> @select_xor_icmp_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_xor_icmp_vec(
+; CHECK-NEXT: [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]]
+; CHECK-NEXT: ret <2 x i8> [[C]]
+;
+ %A = icmp eq <2 x i8> %x, <i8 0, i8 0>
+ %B = xor <2 x i8> %x, %z
+ %C = select <2 x i1> %A, <2 x i8> %B, <2 x i8> %y
+ ret <2 x i8> %C
+}
+
+define <2 x i8> @select_xor_icmp_vec_use(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_xor_icmp_vec_use(
+; CHECK-NEXT: [[A:%.*]] = icmp ne <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT: call void @use(<2 x i1> [[A]])
+; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]]
+; CHECK-NEXT: ret <2 x i8> [[C]]
+;
+ %A = icmp ne <2 x i8> %x, <i8 0, i8 0>
+ call void @use(<2 x i1> %A)
+ %B = xor <2 x i8> %x, %z
+ %C = select <2 x i1> %A, <2 x i8> %y, <2 x i8> %B
+ ret <2 x i8> %C
+}
+
+define i32 @select_xor_inv_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_inv_icmp(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 0
+ %B = xor i32 %z, %x
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_xor_inv_icmp2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_inv_icmp2(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp ne i32 %x, 0
+ %B = xor i32 %x, %z
+ %C = select i1 %A, i32 %y, i32 %B
+ ret i32 %C
+}
+
+; Negative tests
+define i32 @select_xor_icmp_bad_1(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_xor_icmp_bad_1(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT: [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, %k
+ %B = xor i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_2(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_xor_icmp_bad_2(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[B:%.*]] = xor i32 [[K:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 0
+ %B = xor i32 %k, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp_bad_3(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT: [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 3
+ %B = xor i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_4(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_xor_icmp_bad_4(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT: [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, %k
+ %B = xor i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_5(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp_bad_5(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[Y:%.*]], i32 [[B]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp ne i32 %x, 0
+ %B = xor i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_6(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp_bad_6(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT: [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp ne i32 %x, 1
+ %B = xor i32 %x, %z
+ %C = select i1 %A, i32 %y, i32 %B
+ ret i32 %C
+}
+
+define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_xor_icmp_vec_bad(
+; CHECK-NEXT: [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 5, i8 3>
+; CHECK-NEXT: [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]]
+; CHECK-NEXT: ret <2 x i8> [[C]]
+;
+ %A = icmp eq <2 x i8> %x, <i8 5, i8 3>
+ %B = xor <2 x i8> %x, %z
+ %C = select <2 x i1> %A, <2 x i8> %B, <2 x i8> %y
+ ret <2 x i8> %C
+}
+
+; TODO: support for undefs, check for an identity constant does not handle them yet
+define <2 x i8> @select_xor_icmp_vec_bad_2(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_xor_icmp_vec_bad_2(
+; CHECK-NEXT: [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 0, i8 undef>
+; CHECK-NEXT: [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]]
+; CHECK-NEXT: ret <2 x i8> [[C]]
+;
+ %A = icmp eq <2 x i8> %x, <i8 0, i8 undef>
+ %B = xor <2 x i8> %x, %z
+ %C = select <2 x i1> %A, <2 x i8> %B, <2 x i8> %y
+ ret <2 x i8> %C
+}
+
+define i32 @select_mul_icmp_bad(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_mul_icmp_bad(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT: [[B:%.*]] = mul i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 3
+ %B = mul i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_add_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_add_icmp_bad(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT: [[B:%.*]] = add i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 1
+ %B = add i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_and_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_bad(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[B:%.*]] = and i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 0
+ %B = and i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_or_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_bad(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT: [[B:%.*]] = or i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 3
+ %B = or i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+; TODO: Support for non-commutative opcodes
+define i32 @select_sub_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_sub_icmp(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[B:%.*]] = sub i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 0
+ %B = sub i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_shl_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_shl_icmp(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[B:%.*]] = shl i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 0
+ %B = shl i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_lshr_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_lshr_icmp(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[B:%.*]] = lshr i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 0
+ %B = lshr i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+define i32 @select_ashr_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_ashr_icmp(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[B:%.*]] = ashr i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %A = icmp eq i32 %x, 0
+ %B = ashr i32 %x, %z
+ %C = select i1 %A, i32 %B, i32 %y
+ ret i32 %C
+}
+
+; TODO: Support for FP opcodes
+define float @select_fadd_icmp(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_icmp(
+; CHECK-NEXT: [[A:%.*]] = fcmp oeq float [[X:%.*]], -0.000000e+00
+; CHECK-NEXT: [[B:%.*]] = fadd float [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT: ret float [[C]]
+;
+ %A = fcmp oeq float %x, -0.0
+ %B = fadd float %x, %z
+ %C = select i1 %A, float %B, float %y
+ ret float %C
+}
+
+define float @select_fadd_icmp2(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_icmp2(
+; CHECK-NEXT: [[A:%.*]] = fcmp ueq float [[X:%.*]], -0.000000e+00
+; CHECK-NEXT: [[B:%.*]] = fadd float [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT: ret float [[C]]
+;
+ %A = fcmp ueq float %x, -0.0
+ %B = fadd float %x, %z
+ %C = select i1 %A, float %B, float %y
+ ret float %C
+}
+
+define float @select_fmul_icmp(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fmul_icmp(
+; CHECK-NEXT: [[A:%.*]] = fcmp oeq float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[B:%.*]] = fmul float [[X]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT: ret float [[C]]
+;
+ %A = fcmp oeq float %x, 1.0
+ %B = fmul float %x, %z
+ %C = select i1 %A, float %B, float %y
+ ret float %C
+}
+
+!0 = !{!"branch_weights", i32 2, i32 10}
diff --git a/test/Transforms/InstCombine/sub-not.ll b/test/Transforms/InstCombine/sub-not.ll
index 5fc42367dad4..cd1f8f3bd52d 100644
--- a/test/Transforms/InstCombine/sub-not.ll
+++ b/test/Transforms/InstCombine/sub-not.ll
@@ -40,8 +40,8 @@ define <2 x i8> @sub_not_vec(<2 x i8> %x, <2 x i8> %y) {
define i8 @dec_sub(i8 %x, i8 %y) {
; CHECK-LABEL: @dec_sub(
-; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[R:%.*]] = add i8 [[S]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[Y:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = add i8 [[TMP1]], [[X:%.*]]
; CHECK-NEXT: ret i8 [[R]]
;
%s = sub i8 %x, %y
@@ -64,8 +64,8 @@ define i8 @dec_sub_extra_use(i8 %x, i8 %y) {
define <2 x i8> @dec_sub_vec(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @dec_sub_vec(
-; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[S]], <i8 -1, i8 undef>
+; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]]
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%s = sub <2 x i8> %x, %y
@@ -75,8 +75,8 @@ define <2 x i8> @dec_sub_vec(<2 x i8> %x, <2 x i8> %y) {
define i8 @sub_inc(i8 %x, i8 %y) {
; CHECK-LABEL: @sub_inc(
-; CHECK-NEXT: [[S:%.*]] = add i8 [[X:%.*]], 1
-; CHECK-NEXT: [[R:%.*]] = sub i8 [[Y:%.*]], [[S]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = add i8 [[TMP1]], [[Y:%.*]]
; CHECK-NEXT: ret i8 [[R]]
;
%s = add i8 %x, 1
@@ -99,8 +99,8 @@ define i8 @sub_inc_extra_use(i8 %x, i8 %y) {
define <2 x i8> @sub_inc_vec(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @sub_inc_vec(
-; CHECK-NEXT: [[S:%.*]] = add <2 x i8> [[X:%.*]], <i8 undef, i8 1>
-; CHECK-NEXT: [[R:%.*]] = sub <2 x i8> [[Y:%.*]], [[S]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], [[Y:%.*]]
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%s = add <2 x i8> %x, <i8 undef, i8 1>
@@ -108,3 +108,38 @@ define <2 x i8> @sub_inc_vec(<2 x i8> %x, <2 x i8> %y) {
ret <2 x i8> %r
}
+define i8 @sub_dec(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_dec(
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[Y:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = add i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %s = add i8 %x, -1
+ %r = sub i8 %s, %y
+ ret i8 %r
+}
+
+define i8 @sub_dec_extra_use(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_dec_extra_use(
+; CHECK-NEXT: [[S:%.*]] = add i8 [[X:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = sub i8 [[S]], [[Y:%.*]]
+; CHECK-NEXT: call void @use(i8 [[S]])
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %s = add i8 %x, -1
+ %r = sub i8 %s, %y
+ call void @use(i8 %s)
+ ret i8 %r
+}
+
+define <2 x i8> @sub_dec_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sub_dec_vec(
+; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: ret <2 x i8> [[R]]
+;
+ %s = add <2 x i8> %x, <i8 undef, i8 -1>
+ %r = sub <2 x i8> %s, %y
+ ret <2 x i8> %r
+}
+
diff --git a/test/Transforms/InstCombine/xor.ll b/test/Transforms/InstCombine/xor.ll
index 0724af1f4221..2fc5270587fc 100644
--- a/test/Transforms/InstCombine/xor.ll
+++ b/test/Transforms/InstCombine/xor.ll
@@ -328,17 +328,6 @@ define i32 @test25(i32 %g, i32 %h) {
ret i32 %t4
}
-define i32 @test26(i32 %a, i32 %b) {
-; CHECK-LABEL: @test26(
-; CHECK-NEXT: [[T4:%.*]] = and i32 %b, %a
-; CHECK-NEXT: ret i32 [[T4]]
-;
- %b2 = xor i32 %b, -1
- %t2 = xor i32 %a, %b2
- %t4 = and i32 %t2, %a
- ret i32 %t4
-}
-
define i32 @test27(i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @test27(
; CHECK-NEXT: [[T6:%.*]] = icmp eq i32 %b, %c
diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll
index 251b4dea63b5..ed68f1121278 100644
--- a/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/test/Transforms/InstSimplify/AndOrXor.ll
@@ -999,28 +999,26 @@ define i64 @shl_or_and2(i32 %a, i1 %b) {
ret i64 %tmp5
}
-define i32 @shl_or_and3(i32 %a, i32 %b) {
; concatinate two 32-bit integers and extract lower 32-bit
+define i64 @shl_or_and3(i32 %a, i32 %b) {
; CHECK-LABEL: @shl_or_and3(
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4294967295
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; CHECK-NEXT: ret i32 [[TMP6]]
+; CHECK-NEXT: ret i64 [[TMP5]]
;
%tmp1 = zext i32 %a to i64
%tmp2 = zext i32 %b to i64
%tmp3 = shl nuw i64 %tmp1, 32
%tmp4 = or i64 %tmp2, %tmp3
%tmp5 = and i64 %tmp4, 4294967295
- %tmp6 = trunc i64 %tmp5 to i32
- ret i32 %tmp6
+ ret i64 %tmp5
}
-define i32 @shl_or_and4(i16 %a, i16 %b) {
; concatinate two 16-bit integers and extract higher 16-bit
+define i32 @shl_or_and4(i16 %a, i16 %b) {
; CHECK-LABEL: @shl_or_and4(
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
@@ -1037,27 +1035,25 @@ define i32 @shl_or_and4(i16 %a, i16 %b) {
ret i32 %tmp5
}
-define i64 @shl_or_and5(i64 %a, i1 %b) {
+define i128 @shl_or_and5(i64 %a, i1 %b) {
; CHECK-LABEL: @shl_or_and5(
; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[A:%.*]] to i128
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[B:%.*]] to i128
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i128 [[TMP1]], 64
; CHECK-NEXT: [[TMP4:%.*]] = or i128 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = and i128 [[TMP4]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
-; CHECK-NEXT: ret i64 [[TMP6]]
+; CHECK-NEXT: ret i128 [[TMP5]]
;
%tmp1 = zext i64 %a to i128
%tmp2 = zext i1 %b to i128
%tmp3 = shl nuw i128 %tmp1, 64
%tmp4 = or i128 %tmp2, %tmp3
%tmp5 = and i128 %tmp4, 1
- %tmp6 = trunc i128 %tmp5 to i64
- ret i64 %tmp6
+ ret i128 %tmp5
}
+; A variation of above test cases; it fails due to the mask value
define i32 @shl_or_and6(i16 %a, i16 %b) {
-; A variation of above test case, but fails due to the mask value
; CHECK-LABEL: @shl_or_and6(
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
@@ -1074,8 +1070,8 @@ define i32 @shl_or_and6(i16 %a, i16 %b) {
ret i32 %tmp5
}
+; A variation of above test cases; it fails due to the mask value
define i32 @shl_or_and7(i16 %a, i16 %b) {
-; A variation of above test case, but fails due to the mask value
; CHECK-LABEL: @shl_or_and7(
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
@@ -1092,8 +1088,8 @@ define i32 @shl_or_and7(i16 %a, i16 %b) {
ret i32 %tmp5
}
+; A variation of above test cases; it fails due to the mask value
define i32 @shl_or_and8(i16 %a, i16 %b) {
-; A variation of above test case, but fails due to the mask value
; CHECK-LABEL: @shl_or_and8(
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
@@ -1109,3 +1105,55 @@ define i32 @shl_or_and8(i16 %a, i16 %b) {
%tmp5 = and i32 %tmp4, 131071 ; mask with 0x1FFFF
ret i32 %tmp5
}
+
+define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) {
+; CHECK-LABEL: @shl_or_and1v(
+; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
+; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 1, i64 1>
+; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+;
+ %tmp1 = zext <2 x i32> %a to <2 x i64>
+ %tmp2 = zext <2 x i1> %b to <2 x i64>
+ %tmp3 = shl nuw <2 x i64> %tmp1, <i64 32, i64 32>
+ %tmp4 = or <2 x i64> %tmp3, %tmp2
+ %tmp5 = and <2 x i64> %tmp4, <i64 1, i64 1>
+ ret <2 x i64> %tmp5
+}
+
+define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) {
+; CHECK-LABEL: @shl_or_and2v(
+; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
+; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 4294967296, i64 4294967296>
+; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+;
+ %tmp1 = zext <2 x i1> %b to <2 x i64>
+ %tmp2 = zext <2 x i32> %a to <2 x i64>
+ %tmp3 = shl nuw <2 x i64> %tmp1, <i64 32, i64 32>
+ %tmp4 = or <2 x i64> %tmp2, %tmp3
+ %tmp5 = and <2 x i64> %tmp4, <i64 4294967296, i64 4294967296>
+ ret <2 x i64> %tmp5
+}
+
+define <2 x i32> @shl_or_and3v(<2 x i16> %a, <2 x i16> %b) {
+; A variation of above test case, but fails due to the mask value
+; CHECK-LABEL: @shl_or_and3v(
+; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[B:%.*]] to <2 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], <i32 -65535, i32 -65535>
+; CHECK-NEXT: ret <2 x i32> [[TMP5]]
+;
+ %tmp1 = zext <2 x i16> %a to <2 x i32>
+ %tmp2 = zext <2 x i16> %b to <2 x i32>
+ %tmp3 = shl nuw <2 x i32> %tmp1, <i32 16, i32 16>
+ %tmp4 = or <2 x i32> %tmp2, %tmp3
+ %tmp5 = and <2 x i32> %tmp4, <i32 4294901761, i32 4294901761> ; mask with 0xFFFF0001
+ ret <2 x i32> %tmp5
+}
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
index 080d3ed22219..1e581dd4d7c7 100644
--- a/test/Transforms/InstSimplify/call.ll
+++ b/test/Transforms/InstSimplify/call.ll
@@ -431,22 +431,72 @@ declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8
declare double @llvm.powi.f64(double, i32)
declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32)
-define double @constant_fold_powi() nounwind uwtable ssp {
+define double @constant_fold_powi() {
; CHECK-LABEL: @constant_fold_powi(
-; CHECK-NEXT: entry:
; CHECK-NEXT: ret double 9.000000e+00
;
-entry:
- %0 = call double @llvm.powi.f64(double 3.00000e+00, i32 2)
- ret double %0
+ %t0 = call double @llvm.powi.f64(double 3.00000e+00, i32 2)
+ ret double %t0
}
-define <2 x double> @constant_fold_powi_vec() nounwind uwtable ssp {
+define <2 x double> @constant_fold_powi_vec() {
; CHECK-LABEL: @constant_fold_powi_vec(
-; CHECK-NEXT: entry:
; CHECK-NEXT: ret <2 x double> <double 9.000000e+00, double 2.500000e+01>
;
-entry:
- %0 = call <2 x double> @llvm.powi.v2f64(<2 x double> <double 3.00000e+00, double 5.00000e+00>, i32 2)
- ret <2 x double> %0
+ %t0 = call <2 x double> @llvm.powi.v2f64(<2 x double> <double 3.00000e+00, double 5.00000e+00>, i32 2)
+ ret <2 x double> %t0
+}
+
+declare i8 @llvm.fshl.i8(i8, i8, i8)
+declare i9 @llvm.fshr.i9(i9, i9, i9)
+declare <2 x i7> @llvm.fshl.v2i7(<2 x i7>, <2 x i7>, <2 x i7>)
+declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>)
+
+define i8 @fshl_no_shift(i8 %x, i8 %y) {
+; CHECK-LABEL: @fshl_no_shift(
+; CHECK-NEXT: ret i8 [[X:%.*]]
+;
+ %z = call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 0)
+ ret i8 %z
+}
+
+define i9 @fshr_no_shift(i9 %x, i9 %y) {
+; CHECK-LABEL: @fshr_no_shift(
+; CHECK-NEXT: ret i9 [[Y:%.*]]
+;
+ %z = call i9 @llvm.fshr.i9(i9 %x, i9 %y, i9 0)
+ ret i9 %z
+}
+
+define i8 @fshl_no_shift_modulo_bitwidth(i8 %x, i8 %y) {
+; CHECK-LABEL: @fshl_no_shift_modulo_bitwidth(
+; CHECK-NEXT: ret i8 [[X:%.*]]
+;
+ %z = call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 40)
+ ret i8 %z
+}
+
+define i9 @fshr_no_shift_modulo_bitwidth(i9 %x, i9 %y) {
+; CHECK-LABEL: @fshr_no_shift_modulo_bitwidth(
+; CHECK-NEXT: ret i9 [[Y:%.*]]
+;
+ %z = call i9 @llvm.fshr.i9(i9 %x, i9 %y, i9 189)
+ ret i9 %z
+}
+
+define <2 x i7> @fshl_no_shift_modulo_bitwidth_splat(<2 x i7> %x, <2 x i7> %y) {
+; CHECK-LABEL: @fshl_no_shift_modulo_bitwidth_splat(
+; CHECK-NEXT: ret <2 x i7> [[X:%.*]]
+;
+ %z = call <2 x i7> @llvm.fshl.v2i7(<2 x i7> %x, <2 x i7> %y, <2 x i7> <i7 21, i7 21>)
+ ret <2 x i7> %z
+}
+
+define <2 x i8> @fshr_no_shift_modulo_bitwidth_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @fshr_no_shift_modulo_bitwidth_splat(
+; CHECK-NEXT: ret <2 x i8> [[Y:%.*]]
+;
+ %z = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> <i8 72, i8 72>)
+ ret <2 x i8> %z
}
+
diff --git a/test/Transforms/InstSimplify/select-and-cmp.ll b/test/Transforms/InstSimplify/select-and-cmp.ll
new file mode 100644
index 000000000000..7153972c79c8
--- /dev/null
+++ b/test/Transforms/InstSimplify/select-and-cmp.ll
@@ -0,0 +1,339 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i32 @select_and_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp(
+; CHECK-NEXT: ret i32 [[X:%.*]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define <2 x i8> @select_and_icmp_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_and_icmp_vec(
+; CHECK-NEXT: ret <2 x i8> [[X:%.*]]
+;
+ %A = icmp eq <2 x i8> %x, %z
+ %B = icmp eq <2 x i8> %y, %z
+ %C = and <2 x i1> %A, %B
+ %D = select <2 x i1> %C, <2 x i8> %z, <2 x i8> %x
+ ret <2 x i8> %D
+}
+
+define i32 @select_and_icmp2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp2(
+; CHECK-NEXT: ret i32 [[Y:%.*]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %y
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_alt(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_alt(
+; CHECK-NEXT: ret i32 [[Z:%.*]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_alt2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_alt2(
+; CHECK-NEXT: ret i32 [[Z:%.*]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %y, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_inv_alt(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_inv_alt(
+; CHECK-NEXT: ret i32 [[Z:%.*]]
+;
+ %A = icmp eq i32 %z, %x
+ %B = icmp eq i32 %z, %y
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_and_inv_icmp_alt(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_inv_icmp_alt(
+; CHECK-NEXT: ret i32 [[Z:%.*]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %B, %A
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_and_inv_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_inv_icmp(
+; CHECK-NEXT: ret i32 [[X:%.*]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %B , %A
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define <2 x i8> @select_and_icmp_alt_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_and_icmp_alt_vec(
+; CHECK-NEXT: ret <2 x i8> [[Z:%.*]]
+;
+ %A = icmp eq <2 x i8> %x, %z
+ %B = icmp eq <2 x i8> %y, %z
+ %C = and <2 x i1> %A, %B
+ %D = select <2 x i1> %C, <2 x i8> %x, <2 x i8> %z
+ ret <2 x i8> %D
+}
+
+
+define i32 @select_and_icmp_inv(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_inv(
+; CHECK-NEXT: ret i32 [[X:%.*]]
+;
+ %A = icmp eq i32 %z, %x
+ %B = icmp eq i32 %z, %y
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+; Negative tests
+define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_1(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_2(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_3(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_4(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_bad_true_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_bad_true_val(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[K:%.*]], i32 [[X]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %k, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_bad_false_val(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[K:%.*]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %k
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_bad_op(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_bad_op(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[K:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X:%.*]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %k, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_bad_op_2(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_bad_op_2(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %k
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_1(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_2(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_3(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_4(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_4(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_5(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_5(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %k
+ %B = icmp eq i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_true_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_true_val(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[K:%.*]], i32 [[Z]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %k, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_false_val(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[K:%.*]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %k
+ ret i32 %D
+}
diff --git a/test/Transforms/InstSimplify/select-or-cmp.ll b/test/Transforms/InstSimplify/select-or-cmp.ll
new file mode 100644
index 000000000000..ea29bff7d1c4
--- /dev/null
+++ b/test/Transforms/InstSimplify/select-or-cmp.ll
@@ -0,0 +1,339 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i32 @select_or_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp(
+; CHECK-NEXT: ret i32 [[Z:%.*]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define <2 x i8> @select_or_icmp_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_or_icmp_vec(
+; CHECK-NEXT: ret <2 x i8> [[Z:%.*]]
+;
+ %A = icmp ne <2 x i8> %x, %z
+ %B = icmp ne <2 x i8> %y, %z
+ %C = or <2 x i1> %A, %B
+ %D = select <2 x i1> %C, <2 x i8> %z, <2 x i8> %x
+ ret <2 x i8> %D
+}
+
+define i32 @select_or_icmp2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp2(
+; CHECK-NEXT: ret i32 [[Z:%.*]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %y
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_alt(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_alt(
+; CHECK-NEXT: ret i32 [[X:%.*]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_alt2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_alt2(
+; CHECK-NEXT: ret i32 [[Y:%.*]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %y, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_inv_alt(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_inv_alt(
+; CHECK-NEXT: ret i32 [[X:%.*]]
+;
+ %A = icmp ne i32 %z, %x
+ %B = icmp ne i32 %z, %y
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_or_inv_icmp_alt(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_inv_icmp_alt(
+; CHECK-NEXT: ret i32 [[X:%.*]]
+;
+ %A = icmp ne i32 %z, %x
+ %B = icmp ne i32 %z, %y
+ %C = or i1 %B, %A
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define <2 x i8> @select_or_icmp_alt_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_or_icmp_alt_vec(
+; CHECK-NEXT: ret <2 x i8> [[X:%.*]]
+;
+ %A = icmp ne <2 x i8> %x, %z
+ %B = icmp ne <2 x i8> %y, %z
+ %C = or <2 x i1> %A, %B
+ %D = select <2 x i1> %C, <2 x i8> %x, <2 x i8> %z
+ ret <2 x i8> %D
+}
+
+define i32 @select_or_inv_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_inv_icmp(
+; CHECK-NEXT: ret i32 [[Z:%.*]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %B , %A
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_inv(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_inv(
+; CHECK-NEXT: ret i32 [[Z:%.*]]
+;
+ %A = icmp ne i32 %z, %x
+ %B = icmp ne i32 %z, %y
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+; Negative tests
+define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_1(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_2(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_3(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_4(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_bad_true_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_bad_true_val(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[K:%.*]], i32 [[X]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %k, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_bad_false_val(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[K:%.*]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %k
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_bad_op(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_bad_op(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[K:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X:%.*]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %k, %z
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+
+define i32 @select_or_icmp_bad_op_2(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_bad_op_2(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %k
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %z, i32 %x
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_1(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_2(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_3(
+; CHECK-NEXT: [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp eq i32 %x, %z
+ %B = icmp eq i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_4(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_4(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = and i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_5(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_5(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %k
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_true_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_true_val(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[K:%.*]], i32 [[Z]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %k, i32 %z
+ ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_false_val(
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[K:%.*]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %A = icmp ne i32 %x, %z
+ %B = icmp ne i32 %y, %z
+ %C = or i1 %A, %B
+ %D = select i1 %C, i32 %x, i32 %k
+ ret i32 %D
+}
diff --git a/test/Transforms/InstSimplify/shift.ll b/test/Transforms/InstSimplify/shift.ll
index 7a09ef971514..cbffd371853b 100644
--- a/test/Transforms/InstSimplify/shift.ll
+++ b/test/Transforms/InstSimplify/shift.ll
@@ -175,41 +175,65 @@ define <2 x i8> @shl_by_sext_bool_vec(<2 x i1> %x, <2 x i8> %y) {
ret <2 x i8> %r
}
-define i32 @shl_or_shr(i32 %a, i32 %b) {
+define i64 @shl_or_shr(i32 %a, i32 %b) {
; CHECK-LABEL: @shl_or_shr(
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 32
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; CHECK-NEXT: ret i32 [[TMP6]]
+; CHECK-NEXT: ret i64 [[TMP1]]
;
%tmp1 = zext i32 %a to i64
%tmp2 = zext i32 %b to i64
%tmp3 = shl nuw i64 %tmp1, 32
%tmp4 = or i64 %tmp2, %tmp3
%tmp5 = lshr i64 %tmp4, 32
- %tmp6 = trunc i64 %tmp5 to i32
- ret i32 %tmp6
+ ret i64 %tmp5
}
-define i32 @shl_or_shr2(i32 %a, i32 %b) {
; Since shift count of shl is smaller than the size of %b, OR cannot be eliminated.
+define i64 @shl_or_shr2(i32 %a, i32 %b) {
; CHECK-LABEL: @shl_or_shr2(
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 31
; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 31
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; CHECK-NEXT: ret i32 [[TMP6]]
+; CHECK-NEXT: ret i64 [[TMP5]]
;
%tmp1 = zext i32 %a to i64
%tmp2 = zext i32 %b to i64
%tmp3 = shl nuw i64 %tmp1, 31
%tmp4 = or i64 %tmp2, %tmp3
%tmp5 = lshr i64 %tmp4, 31
- %tmp6 = trunc i64 %tmp5 to i32
- ret i32 %tmp6
+ ret i64 %tmp5
+}
+
+; Unit test for vector integer
+define <2 x i64> @shl_or_shr1v(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @shl_or_shr1v(
+; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
+ %tmp1 = zext <2 x i32> %a to <2 x i64>
+ %tmp2 = zext <2 x i32> %b to <2 x i64>
+ %tmp3 = shl nuw <2 x i64> %tmp1, <i64 32, i64 32>
+ %tmp4 = or <2 x i64> %tmp3, %tmp2
+ %tmp5 = lshr <2 x i64> %tmp4, <i64 32, i64 32>
+ ret <2 x i64> %tmp5
+}
+
+; Negative unit test for vector integer
+define <2 x i64> @shl_or_shr2v(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @shl_or_shr2v(
+; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 31, i64 31>
+; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = lshr <2 x i64> [[TMP4]], <i64 31, i64 31>
+; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+;
+ %tmp1 = zext <2 x i32> %a to <2 x i64>
+ %tmp2 = zext <2 x i32> %b to <2 x i64>
+ %tmp3 = shl nuw <2 x i64> %tmp1, <i64 31, i64 31>
+ %tmp4 = or <2 x i64> %tmp2, %tmp3
+ %tmp5 = lshr <2 x i64> %tmp4, <i64 31, i64 31>
+ ret <2 x i64> %tmp5
}
diff --git a/test/Transforms/LCSSA/basictest.ll b/test/Transforms/LCSSA/basictest.ll
index 910e6f8f607d..7ca552039b63 100644
--- a/test/Transforms/LCSSA/basictest.ll
+++ b/test/Transforms/LCSSA/basictest.ll
@@ -1,6 +1,6 @@
; RUN: opt < %s -lcssa -S | FileCheck %s
; RUN: opt < %s -passes=lcssa -S | FileCheck %s
-; RUN: opt < %s -debugify -lcssa -S | FileCheck -check-prefix=CHECK2 %s
+; RUN: opt < %s -debugify -lcssa -S | FileCheck -check-prefix=DEBUGIFY %s
define void @lcssa(i1 %S2) {
; CHECK-LABEL: @lcssa
@@ -19,9 +19,12 @@ post.if: ; preds = %if.false, %if.true
br i1 %S2, label %loop.exit, label %loop.interior
loop.exit: ; preds = %post.if
; CHECK: %X3.lcssa = phi i32
-; CHECK2: call void @llvm.dbg.value(metadata i32 %X3.lcssa
+; DEBUGIFY: %X3.lcssa = phi i32 {{.*}}, !dbg ![[DbgLoc:[0-9]+]]
+; DEBUGIFY-NEXT: call void @llvm.dbg.value(metadata i32 %X3.lcssa
; CHECK: %X4 = add i32 3, %X3.lcssa
%X4 = add i32 3, %X3 ; <i32> [#uses=0]
ret void
}
+; Make sure the lcssa phi has %X3's debug location
+; DEBUGIFY: ![[DbgLoc]] = !DILocation(line: 7
diff --git a/test/Transforms/SCCP/preserve-analysis.ll b/test/Transforms/SCCP/preserve-analysis.ll
index 52d2941d81ba..8d34e7195b95 100644
--- a/test/Transforms/SCCP/preserve-analysis.ll
+++ b/test/Transforms/SCCP/preserve-analysis.ll
@@ -7,11 +7,9 @@
; CHECK: Globals Alias Analysis
; CHECK: Dominator Tree Construction
; CHECK: Natural Loop Information
-; CHECK: Basic Alias Analysis (stateless AA impl)
; CHECK: Sparse Conditional Constant Propagation
; CHECK-NOT: Dominator Tree Construction
; CHECK-NOT: Natural Loop Information
-; CHECK-NOT: Basic Alias Analysis (stateless AA impl)
; CHECK-NOT: Globals Alias Analysis
; CHECK: Loop Vectorization
diff --git a/test/Transforms/SLPVectorizer/AArch64/PR38339.ll b/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
new file mode 100644
index 000000000000..1ab4a13260ed
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -S -mtriple=aarch64-apple-ios -mcpu=cyclone -o - %s | FileCheck %s
+
+define void @f1(<2 x i16> %x, i16* %a) {
+; CHECK-LABEL: @f1(
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[X:%.*]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
+; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
+; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
+; CHECK-NEXT: store i16 [[TMP1]], i16* [[A:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
+; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP2]], align 2
+; CHECK-NEXT: ret void
+;
+ %t2 = extractelement <2 x i16> %x, i32 0
+ %t3 = extractelement <2 x i16> %x, i32 1
+ %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
+ %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
+ %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
+ %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
+ store i16 %t2, i16* %a
+ store i16 %t2, i16* %ptr0
+ store i16 %t3, i16* %ptr1
+ store i16 %t3, i16* %ptr2
+ store i16 %t2, i16* %ptr3
+ ret void
+}
diff --git a/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
index f730ef2aed3c..613eedde96bb 100644
--- a/test/Transforms/SimplifyCFG/merge-cond-stores.ll
+++ b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@@ -5,15 +5,15 @@
define void @test_simple(i32* %p, i32 %a, i32 %b) {
; CHECK-LABEL: @test_simple(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i32 [[A:%.*]], 0
+; CHECK-NEXT: [[X1:%.*]] = icmp ne i32 [[A:%.*]], 0
; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[X2]], true
-; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[X2]], true
+; CHECK-NEXT: [[TMP1:%.*]] = or i1 [[X1]], [[TMP0]]
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
; CHECK: [[NOT_X2:%.*]] = xor i1 [[X2]], true
-; CHECK-NEXT: [[DOT:%.*]] = zext i1 [[NOT_X2]] to i32
-; CHECK-NEXT: store i32 [[DOT]], i32* [[P:%.*]], align 4
-; CHECK-NEXT: br label [[TMP4]]
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = zext i1 [[NOT_X2]] to i32
+; CHECK-NEXT: store i32 [[SPEC_SELECT]], i32* [[P:%.*]], align 4
+; CHECK-NEXT: br label [[TMP3]]
; CHECK: ret void
;
entry:
@@ -44,8 +44,8 @@ define void @test_simple_commuted(i32* %p, i32 %a, i32 %b) {
; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[X1]], [[X2]]
; CHECK-NEXT: br i1 [[TMP0]], label [[TMP1:%.*]], label [[TMP2:%.*]]
-; CHECK: [[DOT:%.*]] = zext i1 [[X2]] to i32
-; CHECK-NEXT: store i32 [[DOT]], i32* [[P:%.*]], align 4
+; CHECK: [[SPEC_SELECT:%.*]] = zext i1 [[X2]] to i32
+; CHECK-NEXT: store i32 [[SPEC_SELECT]], i32* [[P:%.*]], align 4
; CHECK-NEXT: br label [[TMP2]]
; CHECK: ret void
;
@@ -76,16 +76,16 @@ define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
; CHECK-NEXT: [[X4:%.*]] = icmp eq i32 [[D:%.*]], 0
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP0]], [[C:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[X4]], true
-; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[X4]], true
+; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]]
; CHECK: [[X3:%.*]] = icmp eq i32 [[C]], 0
-; CHECK-NEXT: [[NOT_X2:%.*]] = icmp ne i32 [[B]], 0
-; CHECK-NEXT: [[DOT:%.*]] = zext i1 [[NOT_X2]] to i32
-; CHECK-NEXT: [[DOT_:%.*]] = select i1 [[X3]], i32 [[DOT]], i32 2
-; CHECK-NEXT: [[DOT__:%.*]] = select i1 [[X4]], i32 [[DOT_]], i32 3
-; CHECK-NEXT: store i32 [[DOT__]], i32* [[P:%.*]], align 4
+; CHECK-NEXT: [[X2:%.*]] = icmp ne i32 [[B]], 0
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = zext i1 [[X2]] to i32
+; CHECK-NEXT: [[SPEC_SELECT1:%.*]] = select i1 [[X3]], i32 [[SPEC_SELECT]], i32 2
+; CHECK-NEXT: [[SPEC_SELECT2:%.*]] = select i1 [[X4]], i32 [[SPEC_SELECT1]], i32 3
+; CHECK-NEXT: store i32 [[SPEC_SELECT2]], i32* [[P:%.*]], align 4
; CHECK-NEXT: br label [[TMP6]]
; CHECK: ret void
;
@@ -265,8 +265,7 @@ define i32 @test_diamond_simple(i32* %p, i32* %q, i32 %a, i32 %b) {
; CHECK-LABEL: @test_diamond_simple(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[X1:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT: [[Z1:%.*]] = add i32 [[A]], [[B:%.*]]
-; CHECK-NEXT: [[Z2:%.*]] = select i1 [[X1]], i32 [[Z1]], i32 0
+; CHECK-NEXT: [[Z2:%.*]] = select i1 [[X1]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B]], 0
; CHECK-NEXT: [[Z3:%.*]] = sub i32 [[Z2]], [[B]]
; CHECK-NEXT: [[Z4:%.*]] = select i1 [[X2]], i32 [[Z3]], i32 3
diff --git a/test/tools/dsymutil/X86/accelerator.test b/test/tools/dsymutil/X86/accelerator.test
index 906b0e645cf4..96fc58ee5683 100644
--- a/test/tools/dsymutil/X86/accelerator.test
+++ b/test/tools/dsymutil/X86/accelerator.test
@@ -1,7 +1,3 @@
-UNSUPPORTED: system-windows
-Windows does not like renaming files that have open handles to them. We
-need to use TempFile::keep to move them in a portable way.
-
RUN: dsymutil -accelerator=Dwarf -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 -o %t.dwarf.dSYM
RUN: dsymutil -accelerator=Apple -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 -o %t.apple.dSYM
diff --git a/test/tools/dsymutil/X86/update-one-CU.test b/test/tools/dsymutil/X86/update-one-CU.test
index 5d36ce7135f2..09f49ca89c48 100644
--- a/test/tools/dsymutil/X86/update-one-CU.test
+++ b/test/tools/dsymutil/X86/update-one-CU.test
@@ -1,7 +1,3 @@
-UNSUPPORTED: system-windows
-Windows does not like renaming files that have open handles to them. We
-need to use TempFile::keep to move them in a portable way.
-
RUN: dsymutil -oso-prepend-path=%p/.. %p/../Inputs/objc.macho.x86_64 -o %t.dSYM
RUN: dsymutil -update %t.dSYM
RUN: llvm-dwarfdump -apple-types -apple-objc %t.dSYM | FileCheck %s
diff --git a/test/tools/dsymutil/X86/update.test b/test/tools/dsymutil/X86/update.test
index cbfff63d6a87..804091ab2943 100644
--- a/test/tools/dsymutil/X86/update.test
+++ b/test/tools/dsymutil/X86/update.test
@@ -1,7 +1,3 @@
-UNSUPPORTED: system-windows
-Windows does not like renaming files that have open handles to them. We
-need to use TempFile::keep to move them in a portable way.
-
RUN: rm -rf %t.dir
RUN: mkdir -p %t.dir
RUN: cat %p/../Inputs/basic.macho.x86_64 > %t.dir/basic
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr.s b/test/tools/llvm-dwarfdump/X86/debug_addr.s
new file mode 100644
index 000000000000..b0ee8be29544
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr.s
@@ -0,0 +1,38 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o %t.o
+# RUN: llvm-dwarfdump -debug-addr %t.o | FileCheck %s
+
+# CHECK: .debug_addr contents
+
+# CHECK-NEXT: length = 0x0000000c, version = 0x0005, addr_size = 0x04, seg_size = 0x00
+# CHECK-NEXT: Addrs: [
+# CHECK-NEXT: 0x00000000
+# CHECK-NEXT: 0x00000001
+# CHECK-NEXT: ]
+# CHECK-NEXT: length = 0x00000004, version = 0x0005, addr_size = 0x04, seg_size = 0x00
+# CHECK-NOT: {{.}}
+
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long 8 # Length of Unit
+ .short 5 # DWARF version number
+ .byte 1 # DWARF unit type
+ .byte 4 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+
+ .section .debug_addr,"",@progbits
+.Ldebug_addr0:
+ .long 12 # unit_length = .short + .byte + .byte + .long + .long
+ .short 5 # version
+ .byte 4 # address_size
+ .byte 0 # segment_selector_size
+ .long 0x00000000
+ .long 0x00000001
+
+ .section .debug_addr,"",@progbits
+.Ldebug_addr1:
+ .long 4 # unit_length = .short + .byte + .byte
+ .short 5 # version
+ .byte 4 # address_size
+ .byte 0 # segment_selector_size
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_64bit_address.s b/test/tools/llvm-dwarfdump/X86/debug_addr_64bit_address.s
new file mode 100644
index 000000000000..616d726d6575
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_64bit_address.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux -o %t.o
+# RUN: llvm-dwarfdump -debug-addr %t.o | FileCheck %s
+
+# CHECK: .debug_addr contents
+# CHECK-NEXT: length = 0x00000014, version = 0x0005, addr_size = 0x08, seg_size = 0x00
+# CHECK-NEXT: Addrs: [
+# CHECK-NEXT: 0x0000000100000000
+# CHECK-NEXT: 0x0000000100000001
+# CHECK-NEXT: ]
+# CHECK-NOT: {{.}}
+
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long 8 # Length of Unit
+ .short 5 # DWARF version number
+ .byte 1 # DWARF unit type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+
+ .section .debug_addr,"",@progbits
+.Ldebug_addr0:
+ .long 20 # unit_length = .short + .byte + .byte + .quad + .quad
+ .short 5 # version
+ .byte 8 # address_size
+ .byte 0 # segment_selector_size
+ .quad 0x0000000100000000
+ .quad 0x0000000100000001
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_absent.s b/test/tools/llvm-dwarfdump/X86/debug_addr_absent.s
new file mode 100644
index 000000000000..1965fef91cda
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_absent.s
@@ -0,0 +1,4 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o - | \
+# RUN: llvm-dwarfdump -debug-addr - 2>&1 | FileCheck %s
+# CHECK: .debug_addr contents:
+# CHECK-NOT: {{.}}
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_address_size_mismatch.s b/test/tools/llvm-dwarfdump/X86/debug_addr_address_size_mismatch.s
new file mode 100644
index 000000000000..49e694a9f1cc
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_address_size_mismatch.s
@@ -0,0 +1,42 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o - | \
+# RUN: llvm-dwarfdump -debug-addr - 2> %t.err | FileCheck %s
+# RUN: FileCheck %s -input-file %t.err -check-prefix=ERR
+
+# ERR: .debug_addr table at offset 0x0 has address size 8 which is different from CU address size 4
+# ERR-NOT: {{.}}
+
+# CHECK: .debug_addr contents
+# CHECK-NEXT: length = 0x0000000c, version = 0x0005, addr_size = 0x04, seg_size = 0x00
+# CHECK-NEXT: Addrs: [
+# CHECK-NEXT: 0x00000000
+# CHECK-NEXT: 0x00000001
+# CHECK-NEXT: ]
+# CHECK-NOT: {{.}}
+
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long 8 # Length of Unit
+ .short 5 # DWARF version number
+ .byte 1 # DWARF unit type
+ .byte 4 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+
+ .section .debug_addr,"",@progbits
+.Ldebug_addr0:
+ .long 12 # unit_length = .short + .byte + .byte + .long + .long
+ .short 5 # version
+ .byte 8 # address_size
+ .byte 0 # segment_selector_size
+ .long 0x00000000
+ .long 0x00000001
+
+ .section .debug_addr,"",@progbits
+.Ldebug_addr1:
+ .long 12 # unit_length = .short + .byte + .byte + .long + .long
+ .short 5 # version
+ .byte 4 # address_size
+ .byte 0 # segment_selector_size
+ .long 0x00000000
+ .long 0x00000001
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_address_size_not_multiple.s b/test/tools/llvm-dwarfdump/X86/debug_addr_address_size_not_multiple.s
new file mode 100644
index 000000000000..e8835e087962
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_address_size_not_multiple.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o - | \
+# RUN: llvm-dwarfdump -debug-addr - 2> %t.err | FileCheck %s
+# RUN: FileCheck %s -input-file %t.err -check-prefix=ERR
+
+# CHECK: .debug_addr contents:
+# CHECK-NOT: {{.}}
+# ERR: .debug_addr table at offset 0x0 contains data of size 7 which is not a multiple of addr size 4
+# ERR-NOT: {{.}}
+
+# data size is not multiple of address_size
+ .section .debug_addr,"",@progbits
+.Ldebug_addr0:
+ .long 11 # unit_length = .short + .byte + .byte + .long + .long - 1
+ .short 5 # version
+ .byte 4 # address_size
+ .byte 0 # segment_selector_size
+ .long 0x00000000
+ .long 0x00000001
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_dwarf4.s b/test/tools/llvm-dwarfdump/X86/debug_addr_dwarf4.s
new file mode 100644
index 000000000000..57e9dd3c5193
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_dwarf4.s
@@ -0,0 +1,20 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o %t.o
+# RUN: llvm-dwarfdump --debug-addr %t.o | FileCheck %s
+
+# CHECK: .debug_addr contents
+# CHECK-NEXT: length = 0x00000000, version = 0x0004, addr_size = 0x04, seg_size = 0x00
+# CHECK-NEXT: Addrs: [
+# CHECK-NEXT: 0x00000000
+# CHECK-NEXT: 0x00000001
+
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long 7 # Length of Unit
+ .short 4 # DWARF version number
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 4 # Address Size (in bytes)
+ .section .debug_addr,"",@progbits
+ .long 0x00000000
+ .long 0x00000001
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_dwarf64.s b/test/tools/llvm-dwarfdump/X86/debug_addr_dwarf64.s
new file mode 100644
index 000000000000..bed41952df05
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_dwarf64.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o - | \
+# RUN: llvm-dwarfdump -debug-addr - 2> %t.err | FileCheck %s
+# RUN: FileCheck %s -input-file %t.err -check-prefix=ERR
+
+# CHECK: .debug_addr contents:
+# CHECK-NOT: {{.}}
+# ERR: DWARF64 is not supported in .debug_addr at offset 0x0
+# ERR-NOT: {{.}}
+
+# DWARF64 table
+ .section .debug_addr,"",@progbits
+.Ldebug_addr0:
+ .long 0xffffffff # unit_length DWARF64 mark
+ .quad 12 # unit_length
+ .short 5 # version
+ .byte 3 # address_size
+ .byte 0 # segment_selector_size
+ .long 0x00000000
+ .long 0x00000001
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_empty.s b/test/tools/llvm-dwarfdump/X86/debug_addr_empty.s
new file mode 100644
index 000000000000..e80cf8f280a9
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_empty.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o - | \
+# RUN: llvm-dwarfdump -debug-addr - | FileCheck %s
+# CHECK: .debug_addr contents:
+# CHECK-NOT: Addr
+# CHECK-NOT: error:
+
+.section .debug_addr,"",@progbits
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_invalid_addr_size.s b/test/tools/llvm-dwarfdump/X86/debug_addr_invalid_addr_size.s
new file mode 100644
index 000000000000..1ba1afebf5da
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_invalid_addr_size.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o - | \
+# RUN: llvm-dwarfdump -debug-addr - 2> %t.err | FileCheck %s
+# RUN: FileCheck %s -input-file %t.err -check-prefix=ERR
+
+# CHECK: .debug_addr contents:
+# CHECK-NOT: {{.}}
+# ERR: unsupported address size 3
+# ERR-NOT: {{.}}
+
+# invalid addr size
+ .section .debug_addr,"",@progbits
+.Ldebug_addr0:
+ .long 12 # unit_length = .short + .byte + .byte + .long + .long
+ .short 5 # version
+ .byte 3 # address_size
+ .byte 0 # segment_selector_size
+ .long 0x00000000
+ .long 0x00000001
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_segment_selector.s b/test/tools/llvm-dwarfdump/X86/debug_addr_segment_selector.s
new file mode 100644
index 000000000000..21f0322fd2ec
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_segment_selector.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o - | \
+# RUN: llvm-dwarfdump -debug-addr - 2> %t.err | FileCheck %s
+# RUN: FileCheck %s -input-file %t.err -check-prefix=ERR
+
+# CHECK: .debug_addr contents:
+# CHECK-NOT: {{.}}
+# ERR: .debug_addr table at offset 0x0 has unsupported segment selector size 1
+# ERR-NOT: {{.}}
+
+# non-zero segment_selector_size
+# TODO: make this valid
+ .section .debug_addr,"",@progbits
+.Ldebug_addr0:
+ .long 4 # unit_length = .short + .byte + .byte
+ .short 5 # version
+ .byte 4 # address_size
+ .byte 1 # segment_selector_size
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_small_length_field.s b/test/tools/llvm-dwarfdump/X86/debug_addr_small_length_field.s
new file mode 100644
index 000000000000..cbecb98e6bec
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_small_length_field.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o - | \
+# RUN: llvm-dwarfdump -debug-addr - 2> %t.err | FileCheck %s
+# RUN: FileCheck %s -input-file %t.err -check-prefix=ERR
+
+# CHECK: .debug_addr contents:
+# CHECK-NOT: {{.}}
+# ERR: .debug_addr table at offset 0x0 has too small length (0x5) to contain a complete header
+# ERR-NOT: {{.}}
+
+# too small length value
+ .section .debug_addr,"",@progbits
+.Ldebug_addr0:
+ .long 1 # unit_length
+ .short 5 # version
+ .byte 4 # address_size
+ .byte 0 # segment_selector_size
+ .long 0x00000000
+ .long 0x00000001
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_too_small_for_length_field.s b/test/tools/llvm-dwarfdump/X86/debug_addr_too_small_for_length_field.s
new file mode 100644
index 000000000000..c26bfcb02e0a
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_too_small_for_length_field.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o - | \
+# RUN: llvm-dwarfdump -debug-addr - 2> %t.err | FileCheck %s
+# RUN: FileCheck %s -input-file %t.err -check-prefix=ERR
+
+# CHECK: .debug_addr contents:
+# CHECK-NOT: {{.}}
+# ERR: section is not large enough to contain a .debug_addr table length at offset 0x0
+# ERR-NOT: {{.}}
+
+# too small section to contain length field
+ .section .debug_addr,"",@progbits
+.Ldebug_addr0:
+ .short 1 # unit_length
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_too_small_for_section.s b/test/tools/llvm-dwarfdump/X86/debug_addr_too_small_for_section.s
new file mode 100644
index 000000000000..facffee69fb6
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_too_small_for_section.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o - | \
+# RUN: llvm-dwarfdump -debug-addr - 2> %t.err | FileCheck %s
+# RUN: FileCheck %s -input-file %t.err -check-prefix=ERR
+
+# CHECK: .debug_addr contents:
+# CHECK-NOT: {{.}}
+# ERR: section is not large enough to contain a .debug_addr table of length 0x10 at offset 0x0
+# ERR-NOT: {{.}}
+
+# too small section to contain section of given length
+ .section .debug_addr,"",@progbits
+.Ldebug_addr0:
+ .long 12 # unit_length
+ .short 5 # version
+ .byte 4 # address_size
+ .byte 0 # segment_selector_size
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_unsupported_version.s b/test/tools/llvm-dwarfdump/X86/debug_addr_unsupported_version.s
new file mode 100644
index 000000000000..f30dd8f0b979
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_unsupported_version.s
@@ -0,0 +1,42 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o - | \
+# RUN: llvm-dwarfdump -debug-addr - 2> %t.err | FileCheck %s
+# RUN: FileCheck %s -input-file %t.err -check-prefix=ERR
+
+# ERR: version 6 of .debug_addr section at offset 0x0 is not supported
+# ERR-NOT: {{.}}
+
+# CHECK: .debug_addr contents
+# CHECK-NEXT: length = 0x0000000c, version = 0x0005, addr_size = 0x04, seg_size = 0x00
+# CHECK-NEXT: Addrs: [
+# CHECK-NEXT: 0x00000002
+# CHECK-NEXT: 0x00000003
+# CHECK-NEXT: ]
+# CHECK-NOT: {{.}}
+
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long 8 # Length of Unit
+ .short 5 # DWARF version number
+ .byte 1 # DWARF unit type
+ .byte 4 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+
+ .section .debug_addr,"",@progbits
+.Ldebug_addr0:
+ .long 12 # unit_length = .short + .byte + .byte + .long + .long
+ .short 6 # version
+ .byte 4 # address_size
+ .byte 0 # segment_selector_size
+ .long 0x00000000
+ .long 0x00000001
+
+ .section .debug_addr,"",@progbits
+.Ldebug_addr1:
+ .long 12 # unit_length = .short + .byte + .byte + .long + .long
+ .short 5 # version
+ .byte 4 # address_size
+ .byte 0 # segment_selector_size
+ .long 0x00000002
+ .long 0x00000003
diff --git a/test/tools/llvm-dwarfdump/X86/debug_addr_version_mismatch.s b/test/tools/llvm-dwarfdump/X86/debug_addr_version_mismatch.s
new file mode 100644
index 000000000000..e349f3386a6c
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/debug_addr_version_mismatch.s
@@ -0,0 +1,42 @@
+# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o - | \
+# RUN: llvm-dwarfdump -debug-addr - 2> %t.err | FileCheck %s
+# RUN: FileCheck %s -input-file %t.err -check-prefix=ERR
+
+# ERR: .debug_addr table at offset 0x0 has version 4 which is different from the version suggested by the DWARF unit header: 5
+# ERR-NOT: {{.}}
+
+# CHECK: .debug_addr contents
+# CHECK-NEXT: length = 0x0000000c, version = 0x0005, addr_size = 0x04, seg_size = 0x00
+# CHECK-NEXT: Addrs: [
+# CHECK-NEXT: 0x00000000
+# CHECK-NEXT: 0x00000001
+# CHECK-NEXT: ]
+# CHECK-NOT: {{.}}
+
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long 8 # Length of Unit
+ .short 5 # DWARF version number
+ .byte 1 # DWARF unit type
+ .byte 4 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+
+ .section .debug_addr,"",@progbits
+.Ldebug_addr0:
+ .long 12 # unit_length = .short + .byte + .byte + .long + .long
+ .short 4 # version
+ .byte 4 # address_size
+ .byte 0 # segment_selector_size
+ .long 0x00000000
+ .long 0x00000001
+
+ .section .debug_addr,"",@progbits
+.Ldebug_addr1:
+ .long 12 # unit_length = .short + .byte + .byte + .long + .long
+ .short 5 # version
+ .byte 4 # address_size
+ .byte 0 # segment_selector_size
+ .long 0x00000000
+ .long 0x00000001
diff --git a/test/tools/llvm-mca/X86/Atom/resources-x86_32.s b/test/tools/llvm-mca/X86/Atom/resources-x86_32.s
new file mode 100644
index 000000000000..f913a2f3082f
--- /dev/null
+++ b/test/tools/llvm-mca/X86/Atom/resources-x86_32.s
@@ -0,0 +1,72 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=atom -instruction-tables < %s | FileCheck %s
+
+aaa
+
+aad
+aad $7
+
+aam
+aam $7
+
+aas
+
+bound %bx, (%eax)
+bound %ebx, (%eax)
+
+daa
+
+das
+
+into
+
+leave
+
+salc
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 13 6.50 aaa
+# CHECK-NEXT: 1 7 3.50 aad
+# CHECK-NEXT: 1 7 3.50 aad $7
+# CHECK-NEXT: 1 21 10.50 aam
+# CHECK-NEXT: 1 21 10.50 aam $7
+# CHECK-NEXT: 1 13 6.50 aas
+# CHECK-NEXT: 1 11 5.50 U bound %bx, (%eax)
+# CHECK-NEXT: 1 11 5.50 U bound %ebx, (%eax)
+# CHECK-NEXT: 1 18 9.00 daa
+# CHECK-NEXT: 1 20 10.00 das
+# CHECK-NEXT: 1 6 3.00 U into
+# CHECK-NEXT: 1 2 1.00 * leave
+# CHECK-NEXT: 1 1 0.50 U salc
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - AtomPort0
+# CHECK-NEXT: [1] - AtomPort1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1]
+# CHECK-NEXT: 75.50 75.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] Instructions:
+# CHECK-NEXT: 6.50 6.50 aaa
+# CHECK-NEXT: 3.50 3.50 aad
+# CHECK-NEXT: 3.50 3.50 aad $7
+# CHECK-NEXT: 10.50 10.50 aam
+# CHECK-NEXT: 10.50 10.50 aam $7
+# CHECK-NEXT: 6.50 6.50 aas
+# CHECK-NEXT: 5.50 5.50 bound %bx, (%eax)
+# CHECK-NEXT: 5.50 5.50 bound %ebx, (%eax)
+# CHECK-NEXT: 9.00 9.00 daa
+# CHECK-NEXT: 10.00 10.00 das
+# CHECK-NEXT: 3.00 3.00 into
+# CHECK-NEXT: 1.00 1.00 leave
+# CHECK-NEXT: 0.50 0.50 salc
diff --git a/test/tools/llvm-mca/X86/Atom/resources-x86_64.s b/test/tools/llvm-mca/X86/Atom/resources-x86_64.s
index 32823eff8471..bd8578bf8e1a 100644
--- a/test/tools/llvm-mca/X86/Atom/resources-x86_64.s
+++ b/test/tools/llvm-mca/X86/Atom/resources-x86_64.s
@@ -217,6 +217,11 @@ cmpq %rsi, %rdi
cmpq %rsi, (%rax)
cmpq (%rax), %rdi
+cmpsb
+cmpsw
+cmpsl
+cmpsq
+
decb %dil
decb (%rax)
decw %di
@@ -285,6 +290,16 @@ incq (%rax)
lahf
+lodsb
+lodsw
+lodsl
+lodsq
+
+movsb
+movsw
+movsl
+movsq
+
movsbw %al, %di
movzbw %al, %di
movsbw (%rax), %di
@@ -622,6 +637,14 @@ shrdq $7, %rsi, %rdi
shldq $7, %rsi, (%rax)
shrdq $7, %rsi, (%rax)
+stc
+std
+
+stosb
+stosw
+stosl
+stosq
+
subb $7, %al
subb $7, %dil
subb $7, (%rax)
@@ -890,6 +913,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.50 cmpq %rsi, %rdi
# CHECK-NEXT: 1 1 1.00 * cmpq %rsi, (%rax)
# CHECK-NEXT: 1 1 1.00 * cmpq (%rax), %rdi
+# CHECK-NEXT: 1 3 1.50 U cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 3 1.50 U cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 3 1.50 U cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 3 1.50 U cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 1 0.50 decb %dil
# CHECK-NEXT: 1 1 1.00 * * decb (%rax)
# CHECK-NEXT: 1 1 0.50 decw %di
@@ -949,6 +976,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.50 incq %rdi
# CHECK-NEXT: 1 1 1.00 * * incq (%rax)
# CHECK-NEXT: 1 2 1.00 lahf
+# CHECK-NEXT: 1 2 1.00 U lodsb (%rsi), %al
+# CHECK-NEXT: 1 2 1.00 U lodsw (%rsi), %ax
+# CHECK-NEXT: 1 2 1.00 U lodsl (%rsi), %eax
+# CHECK-NEXT: 1 2 1.00 U lodsq (%rsi), %rax
+# CHECK-NEXT: 1 3 1.50 U movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 3 1.50 U movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 3 1.50 U movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 3 1.50 U movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: 1 2 1.00 movsbw %al, %di
# CHECK-NEXT: 1 2 1.00 movzbw %al, %di
# CHECK-NEXT: 1 3 1.50 * movsbw (%rax), %di
@@ -1255,6 +1290,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 9 4.50 shrdq $7, %rsi, %rdi
# CHECK-NEXT: 1 9 4.50 * * shldq $7, %rsi, (%rax)
# CHECK-NEXT: 1 9 4.50 * * shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 1 1 0.50 U stc
+# CHECK-NEXT: 1 21 10.50 U std
+# CHECK-NEXT: 1 1 0.50 U stosb %al, %es:(%rdi)
+# CHECK-NEXT: 1 1 0.50 U stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 1 1 0.50 U stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 1 1 0.50 U stosq %rax, %es:(%rdi)
# CHECK-NEXT: 1 1 0.50 subb $7, %al
# CHECK-NEXT: 1 1 0.50 subb $7, %dil
# CHECK-NEXT: 1 1 1.00 * * subb $7, (%rax)
@@ -1322,7 +1363,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
-# CHECK-NEXT: 1418.00 1088.00
+# CHECK-NEXT: 1447.00 1117.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
@@ -1517,6 +1558,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: 0.50 0.50 cmpq %rsi, %rdi
# CHECK-NEXT: 1.00 - cmpq %rsi, (%rax)
# CHECK-NEXT: 1.00 - cmpq (%rax), %rdi
+# CHECK-NEXT: 1.50 1.50 cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1.50 1.50 cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1.50 1.50 cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1.50 1.50 cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 0.50 0.50 decb %dil
# CHECK-NEXT: 1.00 - decb (%rax)
# CHECK-NEXT: 0.50 0.50 decw %di
@@ -1576,6 +1621,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: 0.50 0.50 incq %rdi
# CHECK-NEXT: 1.00 - incq (%rax)
# CHECK-NEXT: 1.00 1.00 lahf
+# CHECK-NEXT: 1.00 1.00 lodsb (%rsi), %al
+# CHECK-NEXT: 1.00 1.00 lodsw (%rsi), %ax
+# CHECK-NEXT: 1.00 1.00 lodsl (%rsi), %eax
+# CHECK-NEXT: 1.00 1.00 lodsq (%rsi), %rax
+# CHECK-NEXT: 1.50 1.50 movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1.50 1.50 movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1.50 1.50 movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1.50 1.50 movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: 1.00 1.00 movsbw %al, %di
# CHECK-NEXT: 1.00 1.00 movzbw %al, %di
# CHECK-NEXT: 1.50 1.50 movsbw (%rax), %di
@@ -1882,6 +1935,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 4.50 4.50 shrdq $7, %rsi, %rdi
# CHECK-NEXT: 4.50 4.50 shldq $7, %rsi, (%rax)
# CHECK-NEXT: 4.50 4.50 shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 0.50 0.50 stc
+# CHECK-NEXT: 10.50 10.50 std
+# CHECK-NEXT: 0.50 0.50 stosb %al, %es:(%rdi)
+# CHECK-NEXT: 0.50 0.50 stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 0.50 0.50 stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 0.50 0.50 stosq %rax, %es:(%rdi)
# CHECK-NEXT: 0.50 0.50 subb $7, %al
# CHECK-NEXT: 0.50 0.50 subb $7, %dil
# CHECK-NEXT: 1.00 - subb $7, (%rax)
diff --git a/test/tools/llvm-mca/X86/Broadwell/resources-x86_32.s b/test/tools/llvm-mca/X86/Broadwell/resources-x86_32.s
new file mode 100644
index 000000000000..b880fe5b8103
--- /dev/null
+++ b/test/tools/llvm-mca/X86/Broadwell/resources-x86_32.s
@@ -0,0 +1,80 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=broadwell -instruction-tables < %s | FileCheck %s
+
+aaa
+
+aad
+aad $7
+
+aam
+aam $7
+
+aas
+
+bound %bx, (%eax)
+bound %ebx, (%eax)
+
+daa
+
+das
+
+into
+
+leave
+
+salc
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 0.25 aaa
+# CHECK-NEXT: 1 100 0.25 aad
+# CHECK-NEXT: 1 100 0.25 aad $7
+# CHECK-NEXT: 1 100 0.25 aam
+# CHECK-NEXT: 1 100 0.25 aam $7
+# CHECK-NEXT: 1 100 0.25 aas
+# CHECK-NEXT: 1 100 0.25 U bound %bx, (%eax)
+# CHECK-NEXT: 1 100 0.25 U bound %ebx, (%eax)
+# CHECK-NEXT: 1 100 0.25 daa
+# CHECK-NEXT: 1 100 0.25 das
+# CHECK-NEXT: 1 100 0.25 U into
+# CHECK-NEXT: 3 7 0.50 * leave
+# CHECK-NEXT: 1 1 0.25 U salc
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - BWDivider
+# CHECK-NEXT: [1] - BWFPDivider
+# CHECK-NEXT: [2] - BWPort0
+# CHECK-NEXT: [3] - BWPort1
+# CHECK-NEXT: [4] - BWPort2
+# CHECK-NEXT: [5] - BWPort3
+# CHECK-NEXT: [6] - BWPort4
+# CHECK-NEXT: [7] - BWPort5
+# CHECK-NEXT: [8] - BWPort6
+# CHECK-NEXT: [9] - BWPort7
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+# CHECK-NEXT: - - 3.50 3.50 0.50 0.50 - 3.50 3.50 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aaa
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aad
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aad $7
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aam
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aam $7
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aas
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - bound %bx, (%eax)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - bound %ebx, (%eax)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - daa
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - das
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - into
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 0.50 0.50 - leave
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - salc
diff --git a/test/tools/llvm-mca/X86/Broadwell/resources-x86_64.s b/test/tools/llvm-mca/X86/Broadwell/resources-x86_64.s
index ea6d7c6052b0..1083227070ce 100644
--- a/test/tools/llvm-mca/X86/Broadwell/resources-x86_64.s
+++ b/test/tools/llvm-mca/X86/Broadwell/resources-x86_64.s
@@ -217,6 +217,11 @@ cmpq %rsi, %rdi
cmpq %rsi, (%rax)
cmpq (%rax), %rdi
+cmpsb
+cmpsw
+cmpsl
+cmpsq
+
decb %dil
decb (%rax)
decw %di
@@ -285,6 +290,16 @@ incq (%rax)
lahf
+lodsb
+lodsw
+lodsl
+lodsq
+
+movsb
+movsw
+movsl
+movsq
+
movsbw %al, %di
movzbw %al, %di
movsbw (%rax), %di
@@ -622,6 +637,14 @@ shrdq $7, %rsi, %rdi
shldq $7, %rsi, (%rax)
shrdq $7, %rsi, (%rax)
+stc
+std
+
+stosb
+stosw
+stosl
+stosq
+
subb $7, %al
subb $7, %dil
subb $7, (%rax)
@@ -890,6 +913,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 cmpq %rsi, %rdi
# CHECK-NEXT: 2 6 0.50 * cmpq %rsi, (%rax)
# CHECK-NEXT: 2 6 0.50 * cmpq (%rax), %rdi
+# CHECK-NEXT: 1 100 0.25 U cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.25 U cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.25 U cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.25 U cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 1 0.25 decb %dil
# CHECK-NEXT: 3 7 1.00 * * decb (%rax)
# CHECK-NEXT: 1 1 0.25 decw %di
@@ -949,6 +976,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 incq %rdi
# CHECK-NEXT: 3 7 1.00 * * incq (%rax)
# CHECK-NEXT: 1 1 0.50 lahf
+# CHECK-NEXT: 1 100 0.25 U lodsb (%rsi), %al
+# CHECK-NEXT: 1 100 0.25 U lodsw (%rsi), %ax
+# CHECK-NEXT: 1 100 0.25 U lodsl (%rsi), %eax
+# CHECK-NEXT: 1 100 0.25 U lodsq (%rsi), %rax
+# CHECK-NEXT: 1 100 0.25 U movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: 1 1 0.25 movsbw %al, %di
# CHECK-NEXT: 1 1 0.25 movzbw %al, %di
# CHECK-NEXT: 1 5 0.50 * movsbw (%rax), %di
@@ -1255,6 +1290,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 3 1.00 shrdq $7, %rsi, %rdi
# CHECK-NEXT: 4 9 1.00 * * shldq $7, %rsi, (%rax)
# CHECK-NEXT: 4 9 1.00 * * shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 1 1 0.25 U stc
+# CHECK-NEXT: 6 6 1.50 U std
+# CHECK-NEXT: 3 2 1.00 U stosb %al, %es:(%rdi)
+# CHECK-NEXT: 3 2 1.00 U stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 3 2 1.00 U stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 3 2 1.00 U stosq %rax, %es:(%rdi)
# CHECK-NEXT: 1 1 0.25 subb $7, %al
# CHECK-NEXT: 1 1 0.25 subb $7, %dil
# CHECK-NEXT: 3 7 1.00 * * subb $7, (%rax)
@@ -1330,7 +1371,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: 50.00 - 377.00 267.50 218.00 218.00 167.00 161.00 343.50 69.00
+# CHECK-NEXT: 50.00 - 382.50 273.00 219.33 219.33 171.00 166.50 350.00 70.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1525,6 +1566,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpq %rsi, %rdi
# CHECK-NEXT: - - 0.25 0.25 0.50 0.50 - 0.25 0.25 - cmpq %rsi, (%rax)
# CHECK-NEXT: - - 0.25 0.25 0.50 0.50 - 0.25 0.25 - cmpq (%rax), %rdi
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - decb %dil
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 decb (%rax)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - decw %di
@@ -1584,6 +1629,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - incq %rdi
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 incq (%rax)
# CHECK-NEXT: - - 0.50 - - - - - 0.50 - lahf
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - lodsb (%rsi), %al
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - lodsw (%rsi), %ax
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - lodsl (%rsi), %eax
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - lodsq (%rsi), %rax
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsbw %al, %di
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movzbw %al, %di
# CHECK-NEXT: - - - - 0.50 0.50 - - - - movsbw (%rax), %di
@@ -1890,6 +1943,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - - 1.00 - - - - - - shrdq $7, %rsi, %rdi
# CHECK-NEXT: - - 0.25 1.25 0.83 0.83 - 0.25 0.25 0.33 shldq $7, %rsi, (%rax)
# CHECK-NEXT: - - 0.25 1.25 0.83 0.83 - 0.25 0.25 0.33 shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - stc
+# CHECK-NEXT: - - 1.25 1.25 - - - 1.25 2.25 - std
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosb %al, %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosw %ax, %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosl %eax, %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosq %rax, %es:(%rdi)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - subb $7, %al
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - subb $7, %dil
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 subb $7, (%rax)
diff --git a/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s
index bc5ceb5b72e7..001fb8ed6612 100644
--- a/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s
+++ b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s
@@ -11,9 +11,9 @@ cmovae %ebx, %eax
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 3000
-# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total Cycles: 1504
# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: IPC: 1.99
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
@@ -54,14 +54,14 @@ cmovae %ebx, %eax
# CHECK-NEXT: 1.00 - - - - - - - - - - - - - cmovael %ebx, %eax
# CHECK: Timeline view:
-# CHECK-NEXT: Index 012345678
+# CHECK-NEXT: Index 0123456
-# CHECK: [0,0] DeER . . cmpl %eax, %eax
-# CHECK-NEXT: [0,1] D=eER. . cmovael %ebx, %eax
-# CHECK-NEXT: [1,0] .D=eER . cmpl %eax, %eax
-# CHECK-NEXT: [1,1] .D==eER . cmovael %ebx, %eax
-# CHECK-NEXT: [2,0] . D==eER. cmpl %eax, %eax
-# CHECK-NEXT: [2,1] . D===eER cmovael %ebx, %eax
+# CHECK: [0,0] DeER .. cmpl %eax, %eax
+# CHECK-NEXT: [0,1] D=eER.. cmovael %ebx, %eax
+# CHECK-NEXT: [1,0] .DeER.. cmpl %eax, %eax
+# CHECK-NEXT: [1,1] .D=eER. cmovael %ebx, %eax
+# CHECK-NEXT: [2,0] . DeER. cmpl %eax, %eax
+# CHECK-NEXT: [2,1] . D=eER cmovael %ebx, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -70,5 +70,5 @@ cmovae %ebx, %eax
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 2.0 0.3 0.0 cmpl %eax, %eax
-# CHECK-NEXT: 1. 3 3.0 0.0 0.0 cmovael %ebx, %eax
+# CHECK-NEXT: 0. 3 1.0 1.0 0.0 cmpl %eax, %eax
+# CHECK-NEXT: 1. 3 2.0 0.0 0.0 cmovael %ebx, %eax
diff --git a/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s
index 97bf501e577a..04007f24e42b 100644
--- a/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s
+++ b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s
@@ -14,9 +14,9 @@ vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 6000
-# CHECK-NEXT: Total Cycles: 6003
+# CHECK-NEXT: Total Cycles: 3003
# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: IPC: 2.00
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
@@ -61,21 +61,20 @@ vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK-NEXT: - - - - - 1.00 - - - - - 1.00 - - vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK: Timeline view:
-# CHECK-NEXT: 01234
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 012345678
-# CHECK: [0,0] DeER . . . vpcmpeqb %xmm0, %xmm0, %xmm1
-# CHECK-NEXT: [0,1] D=eER. . . vpcmpeqw %xmm1, %xmm1, %xmm2
-# CHECK-NEXT: [0,2] .D=eER . . vpcmpeqd %xmm2, %xmm2, %xmm3
-# CHECK-NEXT: [0,3] .D==eER . . vpcmpeqq %xmm3, %xmm3, %xmm0
-# CHECK-NEXT: [1,0] . D==eER . . vpcmpeqb %xmm0, %xmm0, %xmm1
-# CHECK-NEXT: [1,1] . D===eER . . vpcmpeqw %xmm1, %xmm1, %xmm2
-# CHECK-NEXT: [1,2] . D===eER. . vpcmpeqd %xmm2, %xmm2, %xmm3
-# CHECK-NEXT: [1,3] . D====eER . vpcmpeqq %xmm3, %xmm3, %xmm0
-# CHECK-NEXT: [2,0] . D====eER . vpcmpeqb %xmm0, %xmm0, %xmm1
-# CHECK-NEXT: [2,1] . D=====eER . vpcmpeqw %xmm1, %xmm1, %xmm2
-# CHECK-NEXT: [2,2] . D=====eER. vpcmpeqd %xmm2, %xmm2, %xmm3
-# CHECK-NEXT: [2,3] . D======eER vpcmpeqq %xmm3, %xmm3, %xmm0
+# CHECK: [0,0] DeER . . vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [0,1] DeER . . vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [0,2] .DeER. . vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [0,3] .DeER. . vpcmpeqq %xmm3, %xmm3, %xmm0
+# CHECK-NEXT: [1,0] . DeER . vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [1,1] . DeER . vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [1,2] . DeER . vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [1,3] . DeER . vpcmpeqq %xmm3, %xmm3, %xmm0
+# CHECK-NEXT: [2,0] . DeER. vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [2,1] . DeER. vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [2,2] . DeER vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [2,3] . DeER vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -84,7 +83,7 @@ vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 3.0 0.3 0.0 vpcmpeqb %xmm0, %xmm0, %xmm1
-# CHECK-NEXT: 1. 3 4.0 0.0 0.0 vpcmpeqw %xmm1, %xmm1, %xmm2
-# CHECK-NEXT: 2. 3 4.0 0.0 0.0 vpcmpeqd %xmm2, %xmm2, %xmm3
-# CHECK-NEXT: 3. 3 5.0 0.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm0
+# CHECK-NEXT: 0. 3 1.0 1.0 0.0 vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1. 3 1.0 1.0 0.0 vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: 2. 3 1.0 1.0 0.0 vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 3. 3 1.0 1.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm0
diff --git a/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s
index 00b88954e489..da94624fd551 100644
--- a/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s
+++ b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s
@@ -13,9 +13,9 @@ sbb %eax, %eax
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
-# CHECK-NEXT: Total Cycles: 6745
+# CHECK-NEXT: Total Cycles: 3007
# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.67
+# CHECK-NEXT: IPC: 1.50
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
@@ -49,27 +49,27 @@ sbb %eax, %eax
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 2.01 1.99 - - - - - - 1.00 - - - - -
+# CHECK-NEXT: 2.00 2.00 - - - - - - 1.00 - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %edx, %eax
-# CHECK-NEXT: 0.99 0.01 - - - - - - - - - - - - addl %edx, %edx
-# CHECK-NEXT: 1.01 0.99 - - - - - - - - - - - - sbbl %eax, %eax
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - addl %edx, %edx
+# CHECK-NEXT: 2.00 - - - - - - - - - - - - - sbbl %eax, %eax
# CHECK: Timeline view:
-# CHECK-NEXT: 012345
+# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeER . . imull %edx, %eax
-# CHECK-NEXT: [0,1] .DeE-R . . addl %edx, %edx
-# CHECK-NEXT: [0,2] .D==eER . . sbbl %eax, %eax
-# CHECK-NEXT: [1,0] . D===eeeER . imull %edx, %eax
-# CHECK-NEXT: [1,1] . DeE----R . addl %edx, %edx
-# CHECK-NEXT: [1,2] . D=====eER . sbbl %eax, %eax
-# CHECK-NEXT: [2,0] . D=====eeeER. imull %edx, %eax
-# CHECK-NEXT: [2,1] . DeE------R. addl %edx, %edx
-# CHECK-NEXT: [2,2] . D=======eER sbbl %eax, %eax
+# CHECK: [0,0] DeeeER .. imull %edx, %eax
+# CHECK-NEXT: [0,1] .DeE-R .. addl %edx, %edx
+# CHECK-NEXT: [0,2] .D=eE-R .. sbbl %eax, %eax
+# CHECK-NEXT: [1,0] . D==eeeER.. imull %edx, %eax
+# CHECK-NEXT: [1,1] . DeE---R.. addl %edx, %edx
+# CHECK-NEXT: [1,2] . D=eE---R. sbbl %eax, %eax
+# CHECK-NEXT: [2,0] . D=eeeER. imull %edx, %eax
+# CHECK-NEXT: [2,1] . D=eE--R addl %edx, %edx
+# CHECK-NEXT: [2,2] . D==eE-R sbbl %eax, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -78,6 +78,6 @@ sbb %eax, %eax
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 3.7 0.7 0.0 imull %edx, %eax
-# CHECK-NEXT: 1. 3 1.0 1.0 3.7 addl %edx, %edx
-# CHECK-NEXT: 2. 3 5.7 0.0 0.0 sbbl %eax, %eax
+# CHECK-NEXT: 0. 3 2.0 0.7 0.0 imull %edx, %eax
+# CHECK-NEXT: 1. 3 1.3 1.3 2.0 addl %edx, %edx
+# CHECK-NEXT: 2. 3 2.3 0.0 1.7 sbbl %eax, %eax
diff --git a/test/tools/llvm-mca/X86/BtVer2/one-idioms.s b/test/tools/llvm-mca/X86/BtVer2/one-idioms.s
index 3beaf829c1a4..3c20002d9c8c 100644
--- a/test/tools/llvm-mca/X86/BtVer2/one-idioms.s
+++ b/test/tools/llvm-mca/X86/BtVer2/one-idioms.s
@@ -1,9 +1,11 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -register-file-stats -iterations=1 < %s | FileCheck %s
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=1 -register-file-stats < %s | FileCheck %s
# These are dependency-breaking one-idioms.
# Much like zero-idioms, but they produce ones, and do consume resources.
+# perf stats reports a throughput of 2.00 IPC.
+
pcmpeqb %mm2, %mm2
pcmpeqd %mm2, %mm2
pcmpeqw %mm2, %mm2
@@ -25,11 +27,11 @@ vpcmpeqw %xmm3, %xmm3, %xmm5
# FIXME: their handling is broken in llvm-mca.
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 15
-# CHECK-NEXT: Total Cycles: 12
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1500
+# CHECK-NEXT: Total Cycles: 753
# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 1.25
+# CHECK-NEXT: IPC: 1.99
# CHECK-NEXT: Block RThroughput: 7.5
# CHECK: Instruction Info:
@@ -58,13 +60,13 @@ vpcmpeqw %xmm3, %xmm3, %xmm5
# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm3, %xmm3, %xmm5
# CHECK: Register File statistics:
-# CHECK-NEXT: Total number of mappings created: 15
-# CHECK-NEXT: Max number of mappings used: 8
+# CHECK-NEXT: Total number of mappings created: 1500
+# CHECK-NEXT: Max number of mappings used: 6
# CHECK: * Register File #1 -- JFpuPRF:
# CHECK-NEXT: Number of physical registers: 72
-# CHECK-NEXT: Total number of mappings created: 15
-# CHECK-NEXT: Max number of mappings used: 8
+# CHECK-NEXT: Total number of mappings created: 1500
+# CHECK-NEXT: Max number of mappings used: 6
# CHECK: * Register File #2 -- JIntegerPRF:
# CHECK-NEXT: Number of physical registers: 64
@@ -89,45 +91,45 @@ vpcmpeqw %xmm3, %xmm3, %xmm5
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: - - - - - 7.00 8.00 - - - - 7.00 8.00 -
+# CHECK-NEXT: - - - - - 7.50 7.50 - - - - 7.50 7.50 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
-# CHECK-NEXT: - - - - - - 1.00 - - - - - 1.00 - pcmpeqb %mm2, %mm2
-# CHECK-NEXT: - - - - - 1.00 - - - - - 1.00 - - pcmpeqd %mm2, %mm2
-# CHECK-NEXT: - - - - - 1.00 - - - - - 1.00 - - pcmpeqw %mm2, %mm2
-# CHECK-NEXT: - - - - - - 1.00 - - - - - 1.00 - pcmpeqb %xmm2, %xmm2
-# CHECK-NEXT: - - - - - - 1.00 - - - - - 1.00 - pcmpeqd %xmm2, %xmm2
-# CHECK-NEXT: - - - - - 1.00 - - - - - 1.00 - - pcmpeqq %xmm2, %xmm2
-# CHECK-NEXT: - - - - - 1.00 - - - - - 1.00 - - pcmpeqw %xmm2, %xmm2
-# CHECK-NEXT: - - - - - - 1.00 - - - - - 1.00 - vpcmpeqb %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: - - - - - - 1.00 - - - - - 1.00 - vpcmpeqd %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: - - - - - 1.00 - - - - - 1.00 - - vpcmpeqq %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: - - - - - - 1.00 - - - - - 1.00 - vpcmpeqw %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: - - - - - 1.00 - - - - - 1.00 - - vpcmpeqb %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: - - - - - - 1.00 - - - - - 1.00 - vpcmpeqd %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: - - - - - 1.00 - - - - - 1.00 - - vpcmpeqq %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: - - - - - - 1.00 - - - - - 1.00 - vpcmpeqw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pcmpeqb %mm2, %mm2
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pcmpeqd %mm2, %mm2
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pcmpeqw %mm2, %mm2
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pcmpeqb %xmm2, %xmm2
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pcmpeqd %xmm2, %xmm2
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pcmpeqq %xmm2, %xmm2
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pcmpeqw %xmm2, %xmm2
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpcmpeqb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpcmpeqd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpcmpeqq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpcmpeqw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpcmpeqb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpcmpeqd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpcmpeqq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpcmpeqw %xmm3, %xmm3, %xmm5
# CHECK: Timeline view:
-# CHECK-NEXT: 01
+# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeER . .. pcmpeqb %mm2, %mm2
-# CHECK-NEXT: [0,1] D=eER. .. pcmpeqd %mm2, %mm2
-# CHECK-NEXT: [0,2] .D=eER .. pcmpeqw %mm2, %mm2
-# CHECK-NEXT: [0,3] .DeE-R .. pcmpeqb %xmm2, %xmm2
-# CHECK-NEXT: [0,4] . DeE-R .. pcmpeqd %xmm2, %xmm2
-# CHECK-NEXT: [0,5] . D=eER .. pcmpeqq %xmm2, %xmm2
-# CHECK-NEXT: [0,6] . D=eER .. pcmpeqw %xmm2, %xmm2
-# CHECK-NEXT: [0,7] . DeE-R .. vpcmpeqb %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,8] . DeE-R .. vpcmpeqd %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,9] . D=eER .. vpcmpeqq %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,10] . D=eER.. vpcmpeqw %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,11] . D==eER. vpcmpeqb %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,12] . .D=eER. vpcmpeqd %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,13] . .D==eER vpcmpeqq %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,14] . . D=eER vpcmpeqw %xmm3, %xmm3, %xmm5
+# CHECK: [0,0] DeER . . pcmpeqb %mm2, %mm2
+# CHECK-NEXT: [0,1] DeER . . pcmpeqd %mm2, %mm2
+# CHECK-NEXT: [0,2] .DeER. . pcmpeqw %mm2, %mm2
+# CHECK-NEXT: [0,3] .DeER. . pcmpeqb %xmm2, %xmm2
+# CHECK-NEXT: [0,4] . DeER . pcmpeqd %xmm2, %xmm2
+# CHECK-NEXT: [0,5] . DeER . pcmpeqq %xmm2, %xmm2
+# CHECK-NEXT: [0,6] . DeER . pcmpeqw %xmm2, %xmm2
+# CHECK-NEXT: [0,7] . DeER . vpcmpeqb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,8] . DeER . vpcmpeqd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,9] . DeER . vpcmpeqq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,10] . DeER . vpcmpeqw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,11] . DeER . vpcmpeqb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,12] . .DeER. vpcmpeqd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,13] . .DeER. vpcmpeqq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,14] . . DeER vpcmpeqw %xmm3, %xmm3, %xmm5
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -137,17 +139,17 @@ vpcmpeqw %xmm3, %xmm3, %xmm5
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 pcmpeqb %mm2, %mm2
-# CHECK-NEXT: 1. 1 2.0 0.0 0.0 pcmpeqd %mm2, %mm2
-# CHECK-NEXT: 2. 1 2.0 0.0 0.0 pcmpeqw %mm2, %mm2
-# CHECK-NEXT: 3. 1 1.0 1.0 1.0 pcmpeqb %xmm2, %xmm2
-# CHECK-NEXT: 4. 1 1.0 0.0 1.0 pcmpeqd %xmm2, %xmm2
-# CHECK-NEXT: 5. 1 2.0 0.0 0.0 pcmpeqq %xmm2, %xmm2
-# CHECK-NEXT: 6. 1 2.0 0.0 0.0 pcmpeqw %xmm2, %xmm2
-# CHECK-NEXT: 7. 1 1.0 1.0 1.0 vpcmpeqb %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 8. 1 1.0 0.0 1.0 vpcmpeqd %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 9. 1 2.0 0.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 10. 1 2.0 0.0 0.0 vpcmpeqw %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 11. 1 3.0 0.0 0.0 vpcmpeqb %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 12. 1 2.0 0.0 0.0 vpcmpeqd %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 13. 1 3.0 1.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 14. 1 2.0 1.0 0.0 vpcmpeqw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 pcmpeqd %mm2, %mm2
+# CHECK-NEXT: 2. 1 1.0 1.0 0.0 pcmpeqw %mm2, %mm2
+# CHECK-NEXT: 3. 1 1.0 1.0 0.0 pcmpeqb %xmm2, %xmm2
+# CHECK-NEXT: 4. 1 1.0 1.0 0.0 pcmpeqd %xmm2, %xmm2
+# CHECK-NEXT: 5. 1 1.0 1.0 0.0 pcmpeqq %xmm2, %xmm2
+# CHECK-NEXT: 6. 1 1.0 1.0 0.0 pcmpeqw %xmm2, %xmm2
+# CHECK-NEXT: 7. 1 1.0 1.0 0.0 vpcmpeqb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 8. 1 1.0 1.0 0.0 vpcmpeqd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 9. 1 1.0 1.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 10. 1 1.0 1.0 0.0 vpcmpeqw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 11. 1 1.0 1.0 0.0 vpcmpeqb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 12. 1 1.0 1.0 0.0 vpcmpeqd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 13. 1 1.0 1.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 14. 1 1.0 1.0 0.0 vpcmpeqw %xmm3, %xmm3, %xmm5
diff --git a/test/tools/llvm-mca/X86/BtVer2/resources-x86_32.s b/test/tools/llvm-mca/X86/BtVer2/resources-x86_32.s
new file mode 100644
index 000000000000..5556e78a0155
--- /dev/null
+++ b/test/tools/llvm-mca/X86/BtVer2/resources-x86_32.s
@@ -0,0 +1,84 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s
+
+aaa
+
+aad
+aad $7
+
+aam
+aam $7
+
+aas
+
+bound %bx, (%eax)
+bound %ebx, (%eax)
+
+daa
+
+das
+
+into
+
+leave
+
+salc
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 0.50 aaa
+# CHECK-NEXT: 1 100 0.50 aad
+# CHECK-NEXT: 1 100 0.50 aad $7
+# CHECK-NEXT: 1 100 0.50 aam
+# CHECK-NEXT: 1 100 0.50 aam $7
+# CHECK-NEXT: 1 100 0.50 aas
+# CHECK-NEXT: 1 100 0.50 U bound %bx, (%eax)
+# CHECK-NEXT: 1 100 0.50 U bound %ebx, (%eax)
+# CHECK-NEXT: 1 100 0.50 daa
+# CHECK-NEXT: 1 100 0.50 das
+# CHECK-NEXT: 1 100 0.50 U into
+# CHECK-NEXT: 1 1 0.50 * leave
+# CHECK-NEXT: 1 1 0.50 U salc
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: 6.50 6.50 - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - aaa
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - aad
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - aad $7
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - aam
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - aam $7
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - aas
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - bound %bx, (%eax)
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - bound %ebx, (%eax)
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - daa
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - das
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - into
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - leave
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - salc
diff --git a/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s b/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s
index 8a99598a5762..c5ffa6ced328 100644
--- a/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s
+++ b/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s
@@ -217,6 +217,11 @@ cmpq %rsi, %rdi
cmpq %rsi, (%rax)
cmpq (%rax), %rdi
+cmpsb
+cmpsw
+cmpsl
+cmpsq
+
decb %dil
decb (%rax)
decw %di
@@ -285,6 +290,16 @@ incq (%rax)
lahf
+lodsb
+lodsw
+lodsl
+lodsq
+
+movsb
+movsw
+movsl
+movsq
+
movsbw %al, %di
movzbw %al, %di
movsbw (%rax), %di
@@ -622,6 +637,14 @@ shrdq $7, %rsi, %rdi
shldq $7, %rsi, (%rax)
shrdq $7, %rsi, (%rax)
+stc
+std
+
+stosb
+stosw
+stosl
+stosq
+
subb $7, %al
subb $7, %dil
subb $7, (%rax)
@@ -890,6 +913,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.50 cmpq %rsi, %rdi
# CHECK-NEXT: 1 4 1.00 * cmpq %rsi, (%rax)
# CHECK-NEXT: 1 4 1.00 * cmpq (%rax), %rdi
+# CHECK-NEXT: 1 100 0.50 U cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.50 U cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.50 U cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.50 U cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 1 0.50 decb %dil
# CHECK-NEXT: 2 5 1.00 * * decb (%rax)
# CHECK-NEXT: 1 1 0.50 decw %di
@@ -949,6 +976,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.50 incq %rdi
# CHECK-NEXT: 2 5 1.00 * * incq (%rax)
# CHECK-NEXT: 1 1 0.50 lahf
+# CHECK-NEXT: 1 100 0.50 U lodsb (%rsi), %al
+# CHECK-NEXT: 1 100 0.50 U lodsw (%rsi), %ax
+# CHECK-NEXT: 1 100 0.50 U lodsl (%rsi), %eax
+# CHECK-NEXT: 1 100 0.50 U lodsq (%rsi), %rax
+# CHECK-NEXT: 1 100 0.50 U movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.50 U movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.50 U movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.50 U movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: 1 1 0.50 movsbw %al, %di
# CHECK-NEXT: 1 1 0.50 movzbw %al, %di
# CHECK-NEXT: 1 4 1.00 * movsbw (%rax), %di
@@ -1255,6 +1290,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 6 3 3.00 shrdq $7, %rsi, %rdi
# CHECK-NEXT: 8 9 11.00 * * shldq $7, %rsi, (%rax)
# CHECK-NEXT: 8 9 11.00 * * shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 1 1 0.50 U stc
+# CHECK-NEXT: 1 1 0.50 U std
+# CHECK-NEXT: 1 100 0.50 U stosb %al, %es:(%rdi)
+# CHECK-NEXT: 1 100 0.50 U stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 1 100 0.50 U stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 1 100 0.50 U stosq %rax, %es:(%rdi)
# CHECK-NEXT: 1 1 0.50 subb $7, %al
# CHECK-NEXT: 1 1 0.50 subb $7, %dil
# CHECK-NEXT: 2 5 1.00 * * subb $7, (%rax)
@@ -1334,7 +1375,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 493.00 543.00 380.00 - - - - 295.00 64.00 195.00 - - - -
+# CHECK-NEXT: 502.00 552.00 380.00 - - - - 295.00 64.00 195.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -1529,6 +1570,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cmpq %rsi, %rdi
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - cmpq %rsi, (%rax)
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - cmpq (%rax), %rdi
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - decb %dil
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - decb (%rax)
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - decw %di
@@ -1588,6 +1633,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - incq %rdi
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - incq (%rax)
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - lahf
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - lodsb (%rsi), %al
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - lodsw (%rsi), %ax
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - lodsl (%rsi), %eax
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - lodsq (%rsi), %rax
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsbw %al, %di
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movzbw %al, %di
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movsbw (%rax), %di
@@ -1894,6 +1947,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 3.00 3.00 - - - - - - - - - - - - shrdq $7, %rsi, %rdi
# CHECK-NEXT: 11.00 11.00 - - - - - 1.00 - - - - - - shldq $7, %rsi, (%rax)
# CHECK-NEXT: 11.00 11.00 - - - - - 1.00 - - - - - - shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - stc
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - std
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - stosb %al, %es:(%rdi)
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - stosq %rax, %es:(%rdi)
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - subb $7, %al
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - subb $7, %dil
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - subb $7, (%rax)
diff --git a/test/tools/llvm-mca/X86/Generic/resources-x86_32.s b/test/tools/llvm-mca/X86/Generic/resources-x86_32.s
new file mode 100644
index 000000000000..b4672620cf43
--- /dev/null
+++ b/test/tools/llvm-mca/X86/Generic/resources-x86_32.s
@@ -0,0 +1,78 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+aaa
+
+aad
+aad $7
+
+aam
+aam $7
+
+aas
+
+bound %bx, (%eax)
+bound %ebx, (%eax)
+
+daa
+
+das
+
+into
+
+leave
+
+salc
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 0.33 aaa
+# CHECK-NEXT: 1 100 0.33 aad
+# CHECK-NEXT: 1 100 0.33 aad $7
+# CHECK-NEXT: 1 100 0.33 aam
+# CHECK-NEXT: 1 100 0.33 aam $7
+# CHECK-NEXT: 1 100 0.33 aas
+# CHECK-NEXT: 1 100 0.33 U bound %bx, (%eax)
+# CHECK-NEXT: 1 100 0.33 U bound %ebx, (%eax)
+# CHECK-NEXT: 1 100 0.33 daa
+# CHECK-NEXT: 1 100 0.33 das
+# CHECK-NEXT: 1 100 0.33 U into
+# CHECK-NEXT: 3 7 0.67 * leave
+# CHECK-NEXT: 1 1 0.33 U salc
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 4.67 4.67 - 4.67 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aaa
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aad
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aad $7
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aam
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aam $7
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aas
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - bound %bx, (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - bound %ebx, (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - daa
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - das
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - into
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 leave
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - salc
diff --git a/test/tools/llvm-mca/X86/Generic/resources-x86_64.s b/test/tools/llvm-mca/X86/Generic/resources-x86_64.s
index e27b322fa9b1..c04a56673db9 100644
--- a/test/tools/llvm-mca/X86/Generic/resources-x86_64.s
+++ b/test/tools/llvm-mca/X86/Generic/resources-x86_64.s
@@ -217,6 +217,11 @@ cmpq %rsi, %rdi
cmpq %rsi, (%rax)
cmpq (%rax), %rdi
+cmpsb
+cmpsw
+cmpsl
+cmpsq
+
decb %dil
decb (%rax)
decw %di
@@ -285,6 +290,16 @@ incq (%rax)
lahf
+lodsb
+lodsw
+lodsl
+lodsq
+
+movsb
+movsw
+movsl
+movsq
+
movsbw %al, %di
movzbw %al, %di
movsbw (%rax), %di
@@ -622,6 +637,14 @@ shrdq $7, %rsi, %rdi
shldq $7, %rsi, (%rax)
shrdq $7, %rsi, (%rax)
+stc
+std
+
+stosb
+stosw
+stosl
+stosq
+
subb $7, %al
subb $7, %dil
subb $7, (%rax)
@@ -890,6 +913,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.33 cmpq %rsi, %rdi
# CHECK-NEXT: 2 6 0.50 * cmpq %rsi, (%rax)
# CHECK-NEXT: 2 6 0.50 * cmpq (%rax), %rdi
+# CHECK-NEXT: 5 8 1.00 U cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 5 8 1.00 U cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 5 8 1.00 U cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 5 8 1.00 U cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 1 0.33 decb %dil
# CHECK-NEXT: 3 7 1.00 * * decb (%rax)
# CHECK-NEXT: 1 1 0.33 decw %di
@@ -949,6 +976,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.33 incq %rdi
# CHECK-NEXT: 3 7 1.00 * * incq (%rax)
# CHECK-NEXT: 1 1 0.50 lahf
+# CHECK-NEXT: 3 7 0.67 U lodsb (%rsi), %al
+# CHECK-NEXT: 3 7 0.67 U lodsw (%rsi), %ax
+# CHECK-NEXT: 2 6 0.50 U lodsl (%rsi), %eax
+# CHECK-NEXT: 2 6 0.50 U lodsq (%rsi), %rax
+# CHECK-NEXT: 5 8 1.00 U movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 5 8 1.00 U movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 5 8 1.00 U movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 5 8 1.00 U movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: 1 1 0.33 movsbw %al, %di
# CHECK-NEXT: 1 1 0.33 movzbw %al, %di
# CHECK-NEXT: 1 5 0.50 * movsbw (%rax), %di
@@ -1255,6 +1290,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 2 2 0.67 shrdq $7, %rsi, %rdi
# CHECK-NEXT: 5 8 1.00 * * shldq $7, %rsi, (%rax)
# CHECK-NEXT: 5 8 1.00 * * shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 1 1 0.33 U stc
+# CHECK-NEXT: 1 1 0.33 U std
+# CHECK-NEXT: 3 5 1.00 U stosb %al, %es:(%rdi)
+# CHECK-NEXT: 3 5 1.00 U stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 3 5 1.00 U stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 3 5 1.00 U stosq %rax, %es:(%rdi)
# CHECK-NEXT: 1 1 0.33 subb $7, %al
# CHECK-NEXT: 1 1 0.33 subb $7, %dil
# CHECK-NEXT: 3 7 1.00 * * subb $7, (%rax)
@@ -1328,7 +1369,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: 160.00 - 365.50 171.00 210.00 356.50 254.00 254.00
+# CHECK-NEXT: 160.00 - 376.17 181.67 218.00 367.17 266.00 266.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -1523,6 +1564,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpq %rsi, %rdi
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpq %rsi, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpq (%rax), %rdi
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - decb %dil
# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 decb (%rax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - decw %di
@@ -1582,6 +1627,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - incq %rdi
# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 incq (%rax)
# CHECK-NEXT: - - 0.50 - - 0.50 - - lahf
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 lodsb (%rsi), %al
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 lodsw (%rsi), %ax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 lodsl (%rsi), %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 lodsq (%rsi), %rax
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movsbw %al, %di
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movzbw %al, %di
# CHECK-NEXT: - - - - - - 0.50 0.50 movsbw (%rax), %di
@@ -1888,6 +1941,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - shrdq $7, %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 1.00 0.83 1.00 1.00 shldq $7, %rsi, (%rax)
# CHECK-NEXT: - - 0.83 0.33 1.00 0.83 1.00 1.00 shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - stc
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - std
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 0.50 0.50 stosb %al, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 0.50 0.50 stosw %ax, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 0.50 0.50 stosl %eax, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 0.50 0.50 stosq %rax, %es:(%rdi)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subb $7, %al
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subb $7, %dil
# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subb $7, (%rax)
diff --git a/test/tools/llvm-mca/X86/Haswell/resources-x86_32.s b/test/tools/llvm-mca/X86/Haswell/resources-x86_32.s
new file mode 100644
index 000000000000..2a2db69888ce
--- /dev/null
+++ b/test/tools/llvm-mca/X86/Haswell/resources-x86_32.s
@@ -0,0 +1,80 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=haswell -instruction-tables < %s | FileCheck %s
+
+aaa
+
+aad
+aad $7
+
+aam
+aam $7
+
+aas
+
+bound %bx, (%eax)
+bound %ebx, (%eax)
+
+daa
+
+das
+
+into
+
+leave
+
+salc
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 0.25 aaa
+# CHECK-NEXT: 1 100 0.25 aad
+# CHECK-NEXT: 1 100 0.25 aad $7
+# CHECK-NEXT: 1 100 0.25 aam
+# CHECK-NEXT: 1 100 0.25 aam $7
+# CHECK-NEXT: 1 100 0.25 aas
+# CHECK-NEXT: 15 1 3.75 U bound %bx, (%eax)
+# CHECK-NEXT: 15 1 3.75 U bound %ebx, (%eax)
+# CHECK-NEXT: 1 100 0.25 daa
+# CHECK-NEXT: 1 100 0.25 das
+# CHECK-NEXT: 4 1 1.00 U into
+# CHECK-NEXT: 3 7 0.50 * leave
+# CHECK-NEXT: 1 1 0.25 U salc
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - HWDivider
+# CHECK-NEXT: [1] - HWFPDivider
+# CHECK-NEXT: [2] - HWPort0
+# CHECK-NEXT: [3] - HWPort1
+# CHECK-NEXT: [4] - HWPort2
+# CHECK-NEXT: [5] - HWPort3
+# CHECK-NEXT: [6] - HWPort4
+# CHECK-NEXT: [7] - HWPort5
+# CHECK-NEXT: [8] - HWPort6
+# CHECK-NEXT: [9] - HWPort7
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+# CHECK-NEXT: - - 2.75 2.75 0.50 0.50 - 2.75 2.75 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aaa
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aad
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aad $7
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aam
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aam $7
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aas
+# CHECK-NEXT: - - - - - - - - - - bound %bx, (%eax)
+# CHECK-NEXT: - - - - - - - - - - bound %ebx, (%eax)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - daa
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - das
+# CHECK-NEXT: - - - - - - - - - - into
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 0.50 0.50 - leave
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - salc
diff --git a/test/tools/llvm-mca/X86/Haswell/resources-x86_64.s b/test/tools/llvm-mca/X86/Haswell/resources-x86_64.s
index 7a48e799abc2..9d6083a6c853 100644
--- a/test/tools/llvm-mca/X86/Haswell/resources-x86_64.s
+++ b/test/tools/llvm-mca/X86/Haswell/resources-x86_64.s
@@ -217,6 +217,11 @@ cmpq %rsi, %rdi
cmpq %rsi, (%rax)
cmpq (%rax), %rdi
+cmpsb
+cmpsw
+cmpsl
+cmpsq
+
decb %dil
decb (%rax)
decw %di
@@ -285,6 +290,16 @@ incq (%rax)
lahf
+lodsb
+lodsw
+lodsl
+lodsq
+
+movsb
+movsw
+movsl
+movsq
+
movsbw %al, %di
movzbw %al, %di
movsbw (%rax), %di
@@ -622,6 +637,14 @@ shrdq $7, %rsi, %rdi
shldq $7, %rsi, (%rax)
shrdq $7, %rsi, (%rax)
+stc
+std
+
+stosb
+stosw
+stosl
+stosq
+
subb $7, %al
subb $7, %dil
subb $7, (%rax)
@@ -890,6 +913,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 cmpq %rsi, %rdi
# CHECK-NEXT: 2 6 0.50 * cmpq %rsi, (%rax)
# CHECK-NEXT: 2 6 0.50 * cmpq (%rax), %rdi
+# CHECK-NEXT: 5 4 1.00 U cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 5 4 1.00 U cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 5 4 1.00 U cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 5 4 1.00 U cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 1 0.25 decb %dil
# CHECK-NEXT: 3 7 1.00 * * decb (%rax)
# CHECK-NEXT: 1 1 0.25 decw %di
@@ -949,6 +976,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 incq %rdi
# CHECK-NEXT: 3 7 1.00 * * incq (%rax)
# CHECK-NEXT: 1 1 0.50 lahf
+# CHECK-NEXT: 3 1 0.50 U lodsb (%rsi), %al
+# CHECK-NEXT: 3 1 0.50 U lodsw (%rsi), %ax
+# CHECK-NEXT: 2 1 0.50 U lodsl (%rsi), %eax
+# CHECK-NEXT: 2 1 0.50 U lodsq (%rsi), %rax
+# CHECK-NEXT: 5 4 1.00 U movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 5 4 1.00 U movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 5 4 1.00 U movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 5 4 1.00 U movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: 1 1 0.25 movsbw %al, %di
# CHECK-NEXT: 1 1 0.25 movzbw %al, %di
# CHECK-NEXT: 1 5 0.50 * movsbw (%rax), %di
@@ -1255,6 +1290,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 3 1.00 shrdq $7, %rsi, %rdi
# CHECK-NEXT: 4 10 1.00 * * shldq $7, %rsi, (%rax)
# CHECK-NEXT: 4 10 1.00 * * shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 1 1 0.25 U stc
+# CHECK-NEXT: 6 6 1.50 U std
+# CHECK-NEXT: 3 2 1.00 U stosb %al, %es:(%rdi)
+# CHECK-NEXT: 3 2 1.00 U stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 3 2 1.00 U stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 3 2 1.00 U stosq %rax, %es:(%rdi)
# CHECK-NEXT: 1 1 0.25 subb $7, %al
# CHECK-NEXT: 1 1 0.25 subb $7, %dil
# CHECK-NEXT: 3 7 1.00 * * subb $7, (%rax)
@@ -1330,7 +1371,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: 80.00 - 428.00 288.50 209.00 209.00 158.00 184.00 423.50 66.00
+# CHECK-NEXT: 80.00 - 437.00 297.50 220.33 220.33 166.00 193.00 433.50 67.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1525,6 +1566,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpq %rsi, %rdi
# CHECK-NEXT: - - 0.25 0.25 0.50 0.50 - 0.25 0.25 - cmpq %rsi, (%rax)
# CHECK-NEXT: - - 0.25 0.25 0.50 0.50 - 0.25 0.25 - cmpq (%rax), %rdi
+# CHECK-NEXT: - - 0.75 0.75 1.00 1.00 - 0.75 0.75 - cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 0.75 0.75 1.00 1.00 - 0.75 0.75 - cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 0.75 0.75 1.00 1.00 - 0.75 0.75 - cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 0.75 0.75 1.00 1.00 - 0.75 0.75 - cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - decb %dil
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 decb (%rax)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - decw %di
@@ -1584,6 +1629,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - incq %rdi
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 incq (%rax)
# CHECK-NEXT: - - 0.50 - - - - - 0.50 - lahf
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 0.50 0.50 - lodsb (%rsi), %al
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 0.50 0.50 - lodsw (%rsi), %ax
+# CHECK-NEXT: - - 0.25 0.25 0.50 0.50 - 0.25 0.25 - lodsl (%rsi), %eax
+# CHECK-NEXT: - - 0.25 0.25 0.50 0.50 - 0.25 0.25 - lodsq (%rsi), %rax
+# CHECK-NEXT: - - 0.50 0.50 1.00 1.00 1.00 0.50 0.50 - movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.50 0.50 1.00 1.00 1.00 0.50 0.50 - movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.50 0.50 1.00 1.00 1.00 0.50 0.50 - movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.50 0.50 1.00 1.00 1.00 0.50 0.50 - movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsbw %al, %di
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movzbw %al, %di
# CHECK-NEXT: - - - - 0.50 0.50 - - - - movsbw (%rax), %di
@@ -1890,6 +1943,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - - 1.00 - - - - - - shrdq $7, %rsi, %rdi
# CHECK-NEXT: - - 0.25 1.25 0.83 0.83 - 0.25 0.25 0.33 shldq $7, %rsi, (%rax)
# CHECK-NEXT: - - 0.25 1.25 0.83 0.83 - 0.25 0.25 0.33 shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - stc
+# CHECK-NEXT: - - 1.25 1.25 - - - 1.25 2.25 - std
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosb %al, %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosw %ax, %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosl %eax, %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosq %rax, %es:(%rdi)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - subb $7, %al
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - subb $7, %dil
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 subb $7, (%rax)
diff --git a/test/tools/llvm-mca/X86/SLM/resources-x86_32.s b/test/tools/llvm-mca/X86/SLM/resources-x86_32.s
new file mode 100644
index 000000000000..bc194ed6237b
--- /dev/null
+++ b/test/tools/llvm-mca/X86/SLM/resources-x86_32.s
@@ -0,0 +1,78 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=slm -instruction-tables < %s | FileCheck %s
+
+aaa
+
+aad
+aad $7
+
+aam
+aam $7
+
+aas
+
+bound %bx, (%eax)
+bound %ebx, (%eax)
+
+daa
+
+das
+
+into
+
+leave
+
+salc
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 1.00 aaa
+# CHECK-NEXT: 1 100 1.00 aad
+# CHECK-NEXT: 1 100 1.00 aad $7
+# CHECK-NEXT: 1 100 1.00 aam
+# CHECK-NEXT: 1 100 1.00 aam $7
+# CHECK-NEXT: 1 100 1.00 aas
+# CHECK-NEXT: 1 100 1.00 U bound %bx, (%eax)
+# CHECK-NEXT: 1 100 1.00 U bound %ebx, (%eax)
+# CHECK-NEXT: 1 100 1.00 daa
+# CHECK-NEXT: 1 100 1.00 das
+# CHECK-NEXT: 1 100 1.00 U into
+# CHECK-NEXT: 1 1 0.50 * leave
+# CHECK-NEXT: 1 1 0.50 U salc
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SLMDivider
+# CHECK-NEXT: [1] - SLMFPDivider
+# CHECK-NEXT: [2] - SLMFPMultiplier
+# CHECK-NEXT: [3] - SLM_FPC_RSV0
+# CHECK-NEXT: [4] - SLM_FPC_RSV1
+# CHECK-NEXT: [5] - SLM_IEC_RSV0
+# CHECK-NEXT: [6] - SLM_IEC_RSV1
+# CHECK-NEXT: [7] - SLM_MEC_RSV
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
+# CHECK-NEXT: - - - 11.00 - 1.00 1.00 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - aaa
+# CHECK-NEXT: - - - 1.00 - - - - aad
+# CHECK-NEXT: - - - 1.00 - - - - aad $7
+# CHECK-NEXT: - - - 1.00 - - - - aam
+# CHECK-NEXT: - - - 1.00 - - - - aam $7
+# CHECK-NEXT: - - - 1.00 - - - - aas
+# CHECK-NEXT: - - - 1.00 - - - - bound %bx, (%eax)
+# CHECK-NEXT: - - - 1.00 - - - - bound %ebx, (%eax)
+# CHECK-NEXT: - - - 1.00 - - - - daa
+# CHECK-NEXT: - - - 1.00 - - - - das
+# CHECK-NEXT: - - - 1.00 - - - - into
+# CHECK-NEXT: - - - - - 0.50 0.50 - leave
+# CHECK-NEXT: - - - - - 0.50 0.50 - salc
diff --git a/test/tools/llvm-mca/X86/SLM/resources-x86_64.s b/test/tools/llvm-mca/X86/SLM/resources-x86_64.s
index 0c50dd08bc23..c272cf69004f 100644
--- a/test/tools/llvm-mca/X86/SLM/resources-x86_64.s
+++ b/test/tools/llvm-mca/X86/SLM/resources-x86_64.s
@@ -217,6 +217,11 @@ cmpq %rsi, %rdi
cmpq %rsi, (%rax)
cmpq (%rax), %rdi
+cmpsb
+cmpsw
+cmpsl
+cmpsq
+
decb %dil
decb (%rax)
decw %di
@@ -285,6 +290,16 @@ incq (%rax)
lahf
+lodsb
+lodsw
+lodsl
+lodsq
+
+movsb
+movsw
+movsl
+movsq
+
movsbw %al, %di
movzbw %al, %di
movsbw (%rax), %di
@@ -622,6 +637,14 @@ shrdq $7, %rsi, %rdi
shldq $7, %rsi, (%rax)
shrdq $7, %rsi, (%rax)
+stc
+std
+
+stosb
+stosw
+stosl
+stosq
+
subb $7, %al
subb $7, %dil
subb $7, (%rax)
@@ -890,6 +913,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.50 cmpq %rsi, %rdi
# CHECK-NEXT: 1 4 1.00 * cmpq %rsi, (%rax)
# CHECK-NEXT: 1 4 1.00 * cmpq (%rax), %rdi
+# CHECK-NEXT: 1 100 1.00 U cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 1.00 U cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 1.00 U cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 1.00 U cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 1 0.50 decb %dil
# CHECK-NEXT: 2 5 2.00 * * decb (%rax)
# CHECK-NEXT: 1 1 0.50 decw %di
@@ -949,6 +976,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.50 incq %rdi
# CHECK-NEXT: 2 5 2.00 * * incq (%rax)
# CHECK-NEXT: 1 1 0.50 lahf
+# CHECK-NEXT: 1 100 1.00 U lodsb (%rsi), %al
+# CHECK-NEXT: 1 100 1.00 U lodsw (%rsi), %ax
+# CHECK-NEXT: 1 100 1.00 U lodsl (%rsi), %eax
+# CHECK-NEXT: 1 100 1.00 U lodsq (%rsi), %rax
+# CHECK-NEXT: 1 100 1.00 U movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 1.00 U movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 1.00 U movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 1.00 U movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: 1 1 0.50 movsbw %al, %di
# CHECK-NEXT: 1 1 0.50 movzbw %al, %di
# CHECK-NEXT: 1 4 1.00 * movsbw (%rax), %di
@@ -1255,6 +1290,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 1.00 shrdq $7, %rsi, %rdi
# CHECK-NEXT: 2 4 2.00 * * shldq $7, %rsi, (%rax)
# CHECK-NEXT: 2 4 2.00 * * shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 1 1 0.50 U stc
+# CHECK-NEXT: 1 1 0.50 U std
+# CHECK-NEXT: 1 100 1.00 U stosb %al, %es:(%rdi)
+# CHECK-NEXT: 1 100 1.00 U stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 1 100 1.00 U stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 1 100 1.00 U stosq %rax, %es:(%rdi)
# CHECK-NEXT: 1 1 0.50 subb $7, %al
# CHECK-NEXT: 1 1 0.50 subb $7, %dil
# CHECK-NEXT: 2 5 2.00 * * subb $7, (%rax)
@@ -1328,7 +1369,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: 400.00 - - - - 544.00 386.00 502.00
+# CHECK-NEXT: 400.00 - - 16.00 - 545.00 387.00 502.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -1523,6 +1564,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - - - - 0.50 0.50 - cmpq %rsi, %rdi
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 cmpq %rsi, (%rax)
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 cmpq (%rax), %rdi
+# CHECK-NEXT: - - - 1.00 - - - - cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - - 1.00 - - - - cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - - 1.00 - - - - cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - - 1.00 - - - - cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: - - - - - 0.50 0.50 - decb %dil
# CHECK-NEXT: - - - - - 1.00 1.00 2.00 decb (%rax)
# CHECK-NEXT: - - - - - 0.50 0.50 - decw %di
@@ -1582,6 +1627,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - - - - 0.50 0.50 - incq %rdi
# CHECK-NEXT: - - - - - 1.00 1.00 2.00 incq (%rax)
# CHECK-NEXT: - - - - - 0.50 0.50 - lahf
+# CHECK-NEXT: - - - 1.00 - - - - lodsb (%rsi), %al
+# CHECK-NEXT: - - - 1.00 - - - - lodsw (%rsi), %ax
+# CHECK-NEXT: - - - 1.00 - - - - lodsl (%rsi), %eax
+# CHECK-NEXT: - - - 1.00 - - - - lodsq (%rsi), %rax
+# CHECK-NEXT: - - - 1.00 - - - - movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - - 1.00 - - - - movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - - 1.00 - - - - movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - - 1.00 - - - - movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: - - - - - 0.50 0.50 - movsbw %al, %di
# CHECK-NEXT: - - - - - 0.50 0.50 - movzbw %al, %di
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movsbw (%rax), %di
@@ -1888,6 +1941,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - - - - 1.00 - - shrdq $7, %rsi, %rdi
# CHECK-NEXT: - - - - - 1.00 - 2.00 shldq $7, %rsi, (%rax)
# CHECK-NEXT: - - - - - 1.00 - 2.00 shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: - - - - - 0.50 0.50 - stc
+# CHECK-NEXT: - - - - - 0.50 0.50 - std
+# CHECK-NEXT: - - - 1.00 - - - - stosb %al, %es:(%rdi)
+# CHECK-NEXT: - - - 1.00 - - - - stosw %ax, %es:(%rdi)
+# CHECK-NEXT: - - - 1.00 - - - - stosl %eax, %es:(%rdi)
+# CHECK-NEXT: - - - 1.00 - - - - stosq %rax, %es:(%rdi)
# CHECK-NEXT: - - - - - 0.50 0.50 - subb $7, %al
# CHECK-NEXT: - - - - - 0.50 0.50 - subb $7, %dil
# CHECK-NEXT: - - - - - 1.00 1.00 2.00 subb $7, (%rax)
diff --git a/test/tools/llvm-mca/X86/SandyBridge/resources-x86_32.s b/test/tools/llvm-mca/X86/SandyBridge/resources-x86_32.s
new file mode 100644
index 000000000000..4441391fd9bb
--- /dev/null
+++ b/test/tools/llvm-mca/X86/SandyBridge/resources-x86_32.s
@@ -0,0 +1,78 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=sandybridge -instruction-tables < %s | FileCheck %s
+
+aaa
+
+aad
+aad $7
+
+aam
+aam $7
+
+aas
+
+bound %bx, (%eax)
+bound %ebx, (%eax)
+
+daa
+
+das
+
+into
+
+leave
+
+salc
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 0.33 aaa
+# CHECK-NEXT: 1 100 0.33 aad
+# CHECK-NEXT: 1 100 0.33 aad $7
+# CHECK-NEXT: 1 100 0.33 aam
+# CHECK-NEXT: 1 100 0.33 aam $7
+# CHECK-NEXT: 1 100 0.33 aas
+# CHECK-NEXT: 1 100 0.33 U bound %bx, (%eax)
+# CHECK-NEXT: 1 100 0.33 U bound %ebx, (%eax)
+# CHECK-NEXT: 1 100 0.33 daa
+# CHECK-NEXT: 1 100 0.33 das
+# CHECK-NEXT: 1 100 0.33 U into
+# CHECK-NEXT: 3 7 0.67 * leave
+# CHECK-NEXT: 1 1 0.33 U salc
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 4.67 4.67 - 4.67 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aaa
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aad
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aad $7
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aam
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aam $7
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aas
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - bound %bx, (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - bound %ebx, (%eax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - daa
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - das
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - into
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 leave
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - salc
diff --git a/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s b/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s
index a9627e85b60f..cda2e7e6b5aa 100644
--- a/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s
+++ b/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s
@@ -217,6 +217,11 @@ cmpq %rsi, %rdi
cmpq %rsi, (%rax)
cmpq (%rax), %rdi
+cmpsb
+cmpsw
+cmpsl
+cmpsq
+
decb %dil
decb (%rax)
decw %di
@@ -285,6 +290,16 @@ incq (%rax)
lahf
+lodsb
+lodsw
+lodsl
+lodsq
+
+movsb
+movsw
+movsl
+movsq
+
movsbw %al, %di
movzbw %al, %di
movsbw (%rax), %di
@@ -622,6 +637,14 @@ shrdq $7, %rsi, %rdi
shldq $7, %rsi, (%rax)
shrdq $7, %rsi, (%rax)
+stc
+std
+
+stosb
+stosw
+stosl
+stosq
+
subb $7, %al
subb $7, %dil
subb $7, (%rax)
@@ -890,6 +913,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.33 cmpq %rsi, %rdi
# CHECK-NEXT: 2 6 0.50 * cmpq %rsi, (%rax)
# CHECK-NEXT: 2 6 0.50 * cmpq (%rax), %rdi
+# CHECK-NEXT: 5 8 1.00 U cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 5 8 1.00 U cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 5 8 1.00 U cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 5 8 1.00 U cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 1 0.33 decb %dil
# CHECK-NEXT: 3 7 1.00 * * decb (%rax)
# CHECK-NEXT: 1 1 0.33 decw %di
@@ -949,6 +976,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.33 incq %rdi
# CHECK-NEXT: 3 7 1.00 * * incq (%rax)
# CHECK-NEXT: 1 1 0.50 lahf
+# CHECK-NEXT: 3 7 0.67 U lodsb (%rsi), %al
+# CHECK-NEXT: 3 7 0.67 U lodsw (%rsi), %ax
+# CHECK-NEXT: 2 6 0.50 U lodsl (%rsi), %eax
+# CHECK-NEXT: 2 6 0.50 U lodsq (%rsi), %rax
+# CHECK-NEXT: 5 8 1.00 U movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 5 8 1.00 U movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 5 8 1.00 U movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 5 8 1.00 U movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: 1 1 0.33 movsbw %al, %di
# CHECK-NEXT: 1 1 0.33 movzbw %al, %di
# CHECK-NEXT: 1 5 0.50 * movsbw (%rax), %di
@@ -1255,6 +1290,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 2 2 0.67 shrdq $7, %rsi, %rdi
# CHECK-NEXT: 5 8 1.00 * * shldq $7, %rsi, (%rax)
# CHECK-NEXT: 5 8 1.00 * * shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 1 1 0.33 U stc
+# CHECK-NEXT: 1 1 0.33 U std
+# CHECK-NEXT: 3 5 1.00 U stosb %al, %es:(%rdi)
+# CHECK-NEXT: 3 5 1.00 U stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 3 5 1.00 U stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 3 5 1.00 U stosq %rax, %es:(%rdi)
# CHECK-NEXT: 1 1 0.33 subb $7, %al
# CHECK-NEXT: 1 1 0.33 subb $7, %dil
# CHECK-NEXT: 3 7 1.00 * * subb $7, (%rax)
@@ -1328,7 +1369,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: 160.00 - 365.50 171.00 210.00 356.50 254.00 254.00
+# CHECK-NEXT: 160.00 - 376.17 181.67 218.00 367.17 266.00 266.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -1523,6 +1564,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cmpq %rsi, %rdi
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpq %rsi, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 cmpq (%rax), %rdi
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - decb %dil
# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 decb (%rax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - decw %di
@@ -1582,6 +1627,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - incq %rdi
# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 incq (%rax)
# CHECK-NEXT: - - 0.50 - - 0.50 - - lahf
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 lodsb (%rsi), %al
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 lodsw (%rsi), %ax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 lodsl (%rsi), %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 lodsq (%rsi), %rax
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.67 0.67 1.00 0.67 1.00 1.00 movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movsbw %al, %di
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movzbw %al, %di
# CHECK-NEXT: - - - - - - 0.50 0.50 movsbw (%rax), %di
@@ -1888,6 +1941,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - shrdq $7, %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 1.00 0.83 1.00 1.00 shldq $7, %rsi, (%rax)
# CHECK-NEXT: - - 0.83 0.33 1.00 0.83 1.00 1.00 shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - stc
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - std
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 0.50 0.50 stosb %al, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 0.50 0.50 stosw %ax, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 0.50 0.50 stosl %eax, %es:(%rdi)
+# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 0.50 0.50 stosq %rax, %es:(%rdi)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subb $7, %al
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - subb $7, %dil
# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 subb $7, (%rax)
diff --git a/test/tools/llvm-mca/X86/SkylakeClient/resources-x86_32.s b/test/tools/llvm-mca/X86/SkylakeClient/resources-x86_32.s
new file mode 100644
index 000000000000..103cc3ced847
--- /dev/null
+++ b/test/tools/llvm-mca/X86/SkylakeClient/resources-x86_32.s
@@ -0,0 +1,80 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=skylake -instruction-tables < %s | FileCheck %s
+
+aaa
+
+aad
+aad $7
+
+aam
+aam $7
+
+aas
+
+bound %bx, (%eax)
+bound %ebx, (%eax)
+
+daa
+
+das
+
+into
+
+leave
+
+salc
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 0.25 aaa
+# CHECK-NEXT: 1 100 0.25 aad
+# CHECK-NEXT: 1 100 0.25 aad $7
+# CHECK-NEXT: 1 100 0.25 aam
+# CHECK-NEXT: 1 100 0.25 aam $7
+# CHECK-NEXT: 1 100 0.25 aas
+# CHECK-NEXT: 1 100 0.25 U bound %bx, (%eax)
+# CHECK-NEXT: 1 100 0.25 U bound %ebx, (%eax)
+# CHECK-NEXT: 1 100 0.25 daa
+# CHECK-NEXT: 1 100 0.25 das
+# CHECK-NEXT: 1 100 0.25 U into
+# CHECK-NEXT: 3 7 0.50 * leave
+# CHECK-NEXT: 1 1 0.25 U salc
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SKLDivider
+# CHECK-NEXT: [1] - SKLFPDivider
+# CHECK-NEXT: [2] - SKLPort0
+# CHECK-NEXT: [3] - SKLPort1
+# CHECK-NEXT: [4] - SKLPort2
+# CHECK-NEXT: [5] - SKLPort3
+# CHECK-NEXT: [6] - SKLPort4
+# CHECK-NEXT: [7] - SKLPort5
+# CHECK-NEXT: [8] - SKLPort6
+# CHECK-NEXT: [9] - SKLPort7
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+# CHECK-NEXT: - - 3.50 3.50 0.50 0.50 - 3.50 3.50 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aaa
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aad
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aad $7
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aam
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aam $7
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aas
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - bound %bx, (%eax)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - bound %ebx, (%eax)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - daa
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - das
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - into
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 0.50 0.50 - leave
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - salc
diff --git a/test/tools/llvm-mca/X86/SkylakeClient/resources-x86_64.s b/test/tools/llvm-mca/X86/SkylakeClient/resources-x86_64.s
index e21c4085aad2..5f88e2cf7dc8 100644
--- a/test/tools/llvm-mca/X86/SkylakeClient/resources-x86_64.s
+++ b/test/tools/llvm-mca/X86/SkylakeClient/resources-x86_64.s
@@ -217,6 +217,11 @@ cmpq %rsi, %rdi
cmpq %rsi, (%rax)
cmpq (%rax), %rdi
+cmpsb
+cmpsw
+cmpsl
+cmpsq
+
decb %dil
decb (%rax)
decw %di
@@ -285,6 +290,16 @@ incq (%rax)
lahf
+lodsb
+lodsw
+lodsl
+lodsq
+
+movsb
+movsw
+movsl
+movsq
+
movsbw %al, %di
movzbw %al, %di
movsbw (%rax), %di
@@ -622,6 +637,14 @@ shrdq $7, %rsi, %rdi
shldq $7, %rsi, (%rax)
shrdq $7, %rsi, (%rax)
+stc
+std
+
+stosb
+stosw
+stosl
+stosq
+
subb $7, %al
subb $7, %dil
subb $7, (%rax)
@@ -890,6 +913,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 cmpq %rsi, %rdi
# CHECK-NEXT: 2 6 0.50 * cmpq %rsi, (%rax)
# CHECK-NEXT: 2 6 0.50 * cmpq (%rax), %rdi
+# CHECK-NEXT: 1 100 0.25 U cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.25 U cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.25 U cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.25 U cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 1 0.25 decb %dil
# CHECK-NEXT: 3 7 1.00 * * decb (%rax)
# CHECK-NEXT: 1 1 0.25 decw %di
@@ -949,6 +976,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 incq %rdi
# CHECK-NEXT: 3 7 1.00 * * incq (%rax)
# CHECK-NEXT: 1 1 0.50 lahf
+# CHECK-NEXT: 1 100 0.25 U lodsb (%rsi), %al
+# CHECK-NEXT: 1 100 0.25 U lodsw (%rsi), %ax
+# CHECK-NEXT: 1 100 0.25 U lodsl (%rsi), %eax
+# CHECK-NEXT: 1 100 0.25 U lodsq (%rsi), %rax
+# CHECK-NEXT: 1 100 0.25 U movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: 1 1 0.25 movsbw %al, %di
# CHECK-NEXT: 1 1 0.25 movzbw %al, %di
# CHECK-NEXT: 1 5 0.50 * movsbw (%rax), %di
@@ -1255,6 +1290,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 3 1.00 shrdq $7, %rsi, %rdi
# CHECK-NEXT: 4 9 1.00 * * shldq $7, %rsi, (%rax)
# CHECK-NEXT: 4 9 1.00 * * shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 1 1 0.25 U stc
+# CHECK-NEXT: 6 6 1.50 U std
+# CHECK-NEXT: 3 2 1.00 U stosb %al, %es:(%rdi)
+# CHECK-NEXT: 3 2 1.00 U stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 3 2 1.00 U stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 3 2 1.00 U stosq %rax, %es:(%rdi)
# CHECK-NEXT: 1 1 0.25 subb $7, %al
# CHECK-NEXT: 1 1 0.25 subb $7, %dil
# CHECK-NEXT: 3 7 1.00 * * subb $7, (%rax)
@@ -1330,7 +1371,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: 60.00 - 444.50 248.50 218.00 218.00 167.00 197.00 430.00 69.00
+# CHECK-NEXT: 60.00 - 450.00 254.00 219.33 219.33 171.00 202.50 436.50 70.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1525,6 +1566,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpq %rsi, %rdi
# CHECK-NEXT: - - 0.25 0.25 0.50 0.50 - 0.25 0.25 - cmpq %rsi, (%rax)
# CHECK-NEXT: - - 0.25 0.25 0.50 0.50 - 0.25 0.25 - cmpq (%rax), %rdi
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - decb %dil
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 decb (%rax)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - decw %di
@@ -1584,6 +1629,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - incq %rdi
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 incq (%rax)
# CHECK-NEXT: - - 0.50 - - - - - 0.50 - lahf
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - lodsb (%rsi), %al
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - lodsw (%rsi), %ax
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - lodsl (%rsi), %eax
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - lodsq (%rsi), %rax
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsbw %al, %di
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movzbw %al, %di
# CHECK-NEXT: - - - - 0.50 0.50 - - - - movsbw (%rax), %di
@@ -1890,6 +1943,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - - 1.00 - - - - - - shrdq $7, %rsi, %rdi
# CHECK-NEXT: - - 0.25 1.25 0.83 0.83 - 0.25 0.25 0.33 shldq $7, %rsi, (%rax)
# CHECK-NEXT: - - 0.25 1.25 0.83 0.83 - 0.25 0.25 0.33 shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - stc
+# CHECK-NEXT: - - 1.25 1.25 - - - 1.25 2.25 - std
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosb %al, %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosw %ax, %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosl %eax, %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosq %rax, %es:(%rdi)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - subb $7, %al
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - subb $7, %dil
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 subb $7, (%rax)
diff --git a/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_32.s b/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_32.s
new file mode 100644
index 000000000000..ebb503cdf379
--- /dev/null
+++ b/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_32.s
@@ -0,0 +1,80 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=skylake-avx512 -instruction-tables < %s | FileCheck %s
+
+aaa
+
+aad
+aad $7
+
+aam
+aam $7
+
+aas
+
+bound %bx, (%eax)
+bound %ebx, (%eax)
+
+daa
+
+das
+
+into
+
+leave
+
+salc
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 0.25 aaa
+# CHECK-NEXT: 1 100 0.25 aad
+# CHECK-NEXT: 1 100 0.25 aad $7
+# CHECK-NEXT: 1 100 0.25 aam
+# CHECK-NEXT: 1 100 0.25 aam $7
+# CHECK-NEXT: 1 100 0.25 aas
+# CHECK-NEXT: 1 100 0.25 U bound %bx, (%eax)
+# CHECK-NEXT: 1 100 0.25 U bound %ebx, (%eax)
+# CHECK-NEXT: 1 100 0.25 daa
+# CHECK-NEXT: 1 100 0.25 das
+# CHECK-NEXT: 1 100 0.25 U into
+# CHECK-NEXT: 3 7 0.50 * leave
+# CHECK-NEXT: 1 1 0.25 U salc
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SKXDivider
+# CHECK-NEXT: [1] - SKXFPDivider
+# CHECK-NEXT: [2] - SKXPort0
+# CHECK-NEXT: [3] - SKXPort1
+# CHECK-NEXT: [4] - SKXPort2
+# CHECK-NEXT: [5] - SKXPort3
+# CHECK-NEXT: [6] - SKXPort4
+# CHECK-NEXT: [7] - SKXPort5
+# CHECK-NEXT: [8] - SKXPort6
+# CHECK-NEXT: [9] - SKXPort7
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+# CHECK-NEXT: - - 3.50 3.50 0.50 0.50 - 3.50 3.50 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aaa
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aad
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aad $7
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aam
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aam $7
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - aas
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - bound %bx, (%eax)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - bound %ebx, (%eax)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - daa
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - das
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - into
+# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 0.50 0.50 - leave
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - salc
diff --git a/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s b/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s
index 7671bb0e21ea..6cd2ae1198e8 100644
--- a/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s
+++ b/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s
@@ -217,6 +217,11 @@ cmpq %rsi, %rdi
cmpq %rsi, (%rax)
cmpq (%rax), %rdi
+cmpsb
+cmpsw
+cmpsl
+cmpsq
+
decb %dil
decb (%rax)
decw %di
@@ -285,6 +290,16 @@ incq (%rax)
lahf
+lodsb
+lodsw
+lodsl
+lodsq
+
+movsb
+movsw
+movsl
+movsq
+
movsbw %al, %di
movzbw %al, %di
movsbw (%rax), %di
@@ -622,6 +637,14 @@ shrdq $7, %rsi, %rdi
shldq $7, %rsi, (%rax)
shrdq $7, %rsi, (%rax)
+stc
+std
+
+stosb
+stosw
+stosl
+stosq
+
subb $7, %al
subb $7, %dil
subb $7, (%rax)
@@ -890,6 +913,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 cmpq %rsi, %rdi
# CHECK-NEXT: 2 6 0.50 * cmpq %rsi, (%rax)
# CHECK-NEXT: 2 6 0.50 * cmpq (%rax), %rdi
+# CHECK-NEXT: 1 100 0.25 U cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.25 U cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.25 U cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.25 U cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 1 0.25 decb %dil
# CHECK-NEXT: 3 7 1.00 * * decb (%rax)
# CHECK-NEXT: 1 1 0.25 decw %di
@@ -949,6 +976,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 incq %rdi
# CHECK-NEXT: 3 7 1.00 * * incq (%rax)
# CHECK-NEXT: 1 1 0.50 lahf
+# CHECK-NEXT: 1 100 0.25 U lodsb (%rsi), %al
+# CHECK-NEXT: 1 100 0.25 U lodsw (%rsi), %ax
+# CHECK-NEXT: 1 100 0.25 U lodsl (%rsi), %eax
+# CHECK-NEXT: 1 100 0.25 U lodsq (%rsi), %rax
+# CHECK-NEXT: 1 100 0.25 U movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: 1 1 0.25 movsbw %al, %di
# CHECK-NEXT: 1 1 0.25 movzbw %al, %di
# CHECK-NEXT: 1 5 0.50 * movsbw (%rax), %di
@@ -1255,6 +1290,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 3 1.00 shrdq $7, %rsi, %rdi
# CHECK-NEXT: 4 9 1.00 * * shldq $7, %rsi, (%rax)
# CHECK-NEXT: 4 9 1.00 * * shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 1 1 0.25 U stc
+# CHECK-NEXT: 6 6 1.50 U std
+# CHECK-NEXT: 3 2 1.00 U stosb %al, %es:(%rdi)
+# CHECK-NEXT: 3 2 1.00 U stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 3 2 1.00 U stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 3 2 1.00 U stosq %rax, %es:(%rdi)
# CHECK-NEXT: 1 1 0.25 subb $7, %al
# CHECK-NEXT: 1 1 0.25 subb $7, %dil
# CHECK-NEXT: 3 7 1.00 * * subb $7, (%rax)
@@ -1330,7 +1371,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: 60.00 - 444.75 248.75 218.00 218.00 167.00 197.25 430.25 69.00
+# CHECK-NEXT: 60.00 - 450.25 254.25 219.33 219.33 171.00 202.75 436.75 70.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1525,6 +1566,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpq %rsi, %rdi
# CHECK-NEXT: - - 0.25 0.25 0.50 0.50 - 0.25 0.25 - cmpq %rsi, (%rax)
# CHECK-NEXT: - - 0.25 0.25 0.50 0.50 - 0.25 0.25 - cmpq (%rax), %rdi
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - decb %dil
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 decb (%rax)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - decw %di
@@ -1584,6 +1629,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - incq %rdi
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 incq (%rax)
# CHECK-NEXT: - - 0.50 - - - - - 0.50 - lahf
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - lodsb (%rsi), %al
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - lodsw (%rsi), %ax
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - lodsl (%rsi), %eax
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - lodsq (%rsi), %rax
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movsbw %al, %di
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - movzbw %al, %di
# CHECK-NEXT: - - - - 0.50 0.50 - - - - movsbw (%rax), %di
@@ -1890,6 +1943,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - - 1.00 - - - - - - shrdq $7, %rsi, %rdi
# CHECK-NEXT: - - 0.25 1.25 0.83 0.83 - 0.25 0.25 0.33 shldq $7, %rsi, (%rax)
# CHECK-NEXT: - - 0.25 1.25 0.83 0.83 - 0.25 0.25 0.33 shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - stc
+# CHECK-NEXT: - - 1.25 1.25 - - - 1.25 2.25 - std
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosb %al, %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosw %ax, %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosl %eax, %es:(%rdi)
+# CHECK-NEXT: - - 0.25 0.25 0.33 0.33 1.00 0.25 0.25 0.33 stosq %rax, %es:(%rdi)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - subb $7, %al
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - subb $7, %dil
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 subb $7, (%rax)
diff --git a/test/tools/llvm-mca/X86/Znver1/resources-x86_32.s b/test/tools/llvm-mca/X86/Znver1/resources-x86_32.s
new file mode 100644
index 000000000000..85bd0dc9de4b
--- /dev/null
+++ b/test/tools/llvm-mca/X86/Znver1/resources-x86_32.s
@@ -0,0 +1,82 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=znver1 -instruction-tables < %s | FileCheck %s
+
+aaa
+
+aad
+aad $7
+
+aam
+aam $7
+
+aas
+
+bound %bx, (%eax)
+bound %ebx, (%eax)
+
+daa
+
+das
+
+into
+
+leave
+
+salc
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 0.25 aaa
+# CHECK-NEXT: 1 100 0.25 aad
+# CHECK-NEXT: 1 100 0.25 aad $7
+# CHECK-NEXT: 1 100 0.25 aam
+# CHECK-NEXT: 1 100 0.25 aam $7
+# CHECK-NEXT: 1 100 0.25 aas
+# CHECK-NEXT: 1 100 0.25 U bound %bx, (%eax)
+# CHECK-NEXT: 1 100 0.25 U bound %ebx, (%eax)
+# CHECK-NEXT: 1 100 0.25 daa
+# CHECK-NEXT: 1 100 0.25 das
+# CHECK-NEXT: 1 100 0.25 U into
+# CHECK-NEXT: 2 8 0.50 * leave
+# CHECK-NEXT: 1 1 0.25 U salc
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - ZnAGU0
+# CHECK-NEXT: [1] - ZnAGU1
+# CHECK-NEXT: [2] - ZnALU0
+# CHECK-NEXT: [3] - ZnALU1
+# CHECK-NEXT: [4] - ZnALU2
+# CHECK-NEXT: [5] - ZnALU3
+# CHECK-NEXT: [6] - ZnDivider
+# CHECK-NEXT: [7] - ZnFPU0
+# CHECK-NEXT: [8] - ZnFPU1
+# CHECK-NEXT: [9] - ZnFPU2
+# CHECK-NEXT: [10] - ZnFPU3
+# CHECK-NEXT: [11] - ZnMultiplier
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
+# CHECK-NEXT: 0.50 0.50 0.50 0.50 0.50 0.50 - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - aaa
+# CHECK-NEXT: - - - - - - - - - - - - aad
+# CHECK-NEXT: - - - - - - - - - - - - aad $7
+# CHECK-NEXT: - - - - - - - - - - - - aam
+# CHECK-NEXT: - - - - - - - - - - - - aam $7
+# CHECK-NEXT: - - - - - - - - - - - - aas
+# CHECK-NEXT: - - - - - - - - - - - - bound %bx, (%eax)
+# CHECK-NEXT: - - - - - - - - - - - - bound %ebx, (%eax)
+# CHECK-NEXT: - - - - - - - - - - - - daa
+# CHECK-NEXT: - - - - - - - - - - - - das
+# CHECK-NEXT: - - - - - - - - - - - - into
+# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - leave
+# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - salc
diff --git a/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s b/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s
index 261328b15dc5..c2b6cf785601 100644
--- a/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s
+++ b/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s
@@ -217,6 +217,11 @@ cmpq %rsi, %rdi
cmpq %rsi, (%rax)
cmpq (%rax), %rdi
+cmpsb
+cmpsw
+cmpsl
+cmpsq
+
decb %dil
decb (%rax)
decw %di
@@ -285,6 +290,16 @@ incq (%rax)
lahf
+lodsb
+lodsw
+lodsl
+lodsq
+
+movsb
+movsw
+movsl
+movsq
+
movsbw %al, %di
movzbw %al, %di
movsbw (%rax), %di
@@ -622,6 +637,14 @@ shrdq $7, %rsi, %rdi
shldq $7, %rsi, (%rax)
shrdq $7, %rsi, (%rax)
+stc
+std
+
+stosb
+stosw
+stosl
+stosq
+
subb $7, %al
subb $7, %dil
subb $7, (%rax)
@@ -890,6 +913,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 cmpq %rsi, %rdi
# CHECK-NEXT: 2 5 0.50 * cmpq %rsi, (%rax)
# CHECK-NEXT: 2 5 0.50 * cmpq (%rax), %rdi
+# CHECK-NEXT: 1 100 0.25 U cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.25 U cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.25 U cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: 1 100 0.25 U cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 1 0.25 decb %dil
# CHECK-NEXT: 2 5 0.50 * * decb (%rax)
# CHECK-NEXT: 1 1 0.25 decw %di
@@ -949,6 +976,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 incq %rdi
# CHECK-NEXT: 2 5 0.50 * * incq (%rax)
# CHECK-NEXT: 1 100 0.25 lahf
+# CHECK-NEXT: 1 100 0.25 U lodsb (%rsi), %al
+# CHECK-NEXT: 1 100 0.25 U lodsw (%rsi), %ax
+# CHECK-NEXT: 1 100 0.25 U lodsl (%rsi), %eax
+# CHECK-NEXT: 1 100 0.25 U lodsq (%rsi), %rax
+# CHECK-NEXT: 1 100 0.25 U movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: 1 1 0.25 movsbw %al, %di
# CHECK-NEXT: 1 1 0.25 movzbw %al, %di
# CHECK-NEXT: 2 5 0.50 * movsbw (%rax), %di
@@ -1255,6 +1290,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 shrdq $7, %rsi, %rdi
# CHECK-NEXT: 2 5 0.50 * * shldq $7, %rsi, (%rax)
# CHECK-NEXT: 2 5 0.50 * * shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: 1 1 0.25 U stc
+# CHECK-NEXT: 1 1 0.25 U std
+# CHECK-NEXT: 1 100 0.25 U stosb %al, %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U stosw %ax, %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U stosl %eax, %es:(%rdi)
+# CHECK-NEXT: 1 100 0.25 U stosq %rax, %es:(%rdi)
# CHECK-NEXT: 1 1 0.25 subb $7, %al
# CHECK-NEXT: 1 1 0.25 subb $7, %dil
# CHECK-NEXT: 2 5 0.50 * * subb $7, (%rax)
@@ -1332,7 +1373,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 147.50 147.50 131.00 165.00 147.00 131.00 392.00 - - - - 34.00
+# CHECK-NEXT: 147.50 147.50 131.50 165.50 147.50 131.50 392.00 - - - - 34.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -1527,6 +1568,10 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - cmpq %rsi, %rdi
# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - cmpq %rsi, (%rax)
# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - cmpq (%rax), %rdi
+# CHECK-NEXT: - - - - - - - - - - - - cmpsb %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - - - - - - - - - - - cmpsw %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - - - - - - - - - - - cmpsl %es:(%rdi), (%rsi)
+# CHECK-NEXT: - - - - - - - - - - - - cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - decb %dil
# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - decb (%rax)
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - decw %di
@@ -1586,6 +1631,14 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - incq %rdi
# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - incq (%rax)
# CHECK-NEXT: - - - - - - - - - - - - lahf
+# CHECK-NEXT: - - - - - - - - - - - - lodsb (%rsi), %al
+# CHECK-NEXT: - - - - - - - - - - - - lodsw (%rsi), %ax
+# CHECK-NEXT: - - - - - - - - - - - - lodsl (%rsi), %eax
+# CHECK-NEXT: - - - - - - - - - - - - lodsq (%rsi), %rax
+# CHECK-NEXT: - - - - - - - - - - - - movsb (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - - - - - - - - - - - movsw (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - - - - - - - - - - - movsl (%rsi), %es:(%rdi)
+# CHECK-NEXT: - - - - - - - - - - - - movsq (%rsi), %es:(%rdi)
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movsbw %al, %di
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movzbw %al, %di
# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movsbw (%rax), %di
@@ -1892,6 +1945,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - shrdq $7, %rsi, %rdi
# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - shldq $7, %rsi, (%rax)
# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - shrdq $7, %rsi, (%rax)
+# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - stc
+# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - std
+# CHECK-NEXT: - - - - - - - - - - - - stosb %al, %es:(%rdi)
+# CHECK-NEXT: - - - - - - - - - - - - stosw %ax, %es:(%rdi)
+# CHECK-NEXT: - - - - - - - - - - - - stosl %eax, %es:(%rdi)
+# CHECK-NEXT: - - - - - - - - - - - - stosq %rax, %es:(%rdi)
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - subb $7, %al
# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - subb $7, %dil
# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - subb $7, (%rax)
diff --git a/test/tools/llvm-objcopy/strip-debug.test b/test/tools/llvm-objcopy/strip-debug.test
index c8f90d9ca19e..6c833f301c7f 100644
--- a/test/tools/llvm-objcopy/strip-debug.test
+++ b/test/tools/llvm-objcopy/strip-debug.test
@@ -67,7 +67,7 @@
# RUN: llvm-objcopy --strip-debug %t.thin.a %t2.thin.a
# RUN: cat %t.thin.a | FileCheck %s --check-prefix=VERIFY-THIN-ARCHIVE
# RUN: cat %t2.thin.a | FileCheck %s --check-prefix=VERIFY-THIN-ARCHIVE
-
+
# VERIFY-THIN-ARCHIVE: !<thin>
# Verify that the member of a thin archive was properly modified.
@@ -94,6 +94,12 @@ Sections:
- Name: .debugfoo
Type: SHT_PROGBITS
Content: "00000000"
+ - Name: .zdebugfoo
+ Type: SHT_PROGBITS
+ Content: "00000000"
+ - Name: .gdb_index
+ Type: SHT_PROGBITS
+ Content: "00000000"
- Name: .text
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp
index e685a59277ba..430e8e063e3c 100644
--- a/tools/dsymutil/DwarfLinker.cpp
+++ b/tools/dsymutil/DwarfLinker.cpp
@@ -756,6 +756,50 @@ void DwarfLinker::keepDIEAndDependencies(
}
}
+namespace {
+/// This class represents an item in the work list. In addition to it's obvious
+/// purpose of representing the state associated with a particular run of the
+/// work loop, it also serves as a marker to indicate that we should run the
+/// "continuation" code.
+///
+/// Originally, the latter was lambda which allowed arbitrary code to be run.
+/// Because we always need to run the exact same code, it made more sense to
+/// use a boolean and repurpose the already existing DIE field.
+struct WorklistItem {
+ DWARFDie Die;
+ unsigned Flags;
+ bool IsContinuation;
+ CompileUnit::DIEInfo *ChildInfo = nullptr;
+
+ /// Construct a classic worklist item.
+ WorklistItem(DWARFDie Die, unsigned Flags)
+ : Die(Die), Flags(Flags), IsContinuation(false){};
+
+ /// Creates a continuation marker.
+ WorklistItem(DWARFDie Die) : Die(Die), IsContinuation(true){};
+};
+} // namespace
+
+// Helper that updates the completeness of the current DIE. It depends on the
+// fact that the incompletness of its children is already computed.
+static void updateIncompleteness(const DWARFDie &Die,
+ CompileUnit::DIEInfo &ChildInfo,
+ CompileUnit &CU) {
+ // Only propagate incomplete members.
+ if (Die.getTag() != dwarf::DW_TAG_structure_type &&
+ Die.getTag() != dwarf::DW_TAG_class_type)
+ return;
+
+ unsigned Idx = CU.getOrigUnit().getDIEIndex(Die);
+ CompileUnit::DIEInfo &MyInfo = CU.getInfo(Idx);
+
+ if (MyInfo.Incomplete)
+ return;
+
+ if (ChildInfo.Incomplete || ChildInfo.Prune)
+ MyInfo.Incomplete = true;
+}
+
/// Recursively walk the \p DIE tree and look for DIEs to
/// keep. Store that information in \p CU's DIEInfo.
///
@@ -770,58 +814,80 @@ void DwarfLinker::keepDIEAndDependencies(
/// traversal we are currently doing.
///
/// The return value indicates whether the DIE is incomplete.
-bool DwarfLinker::lookForDIEsToKeep(RelocationManager &RelocMgr,
+void DwarfLinker::lookForDIEsToKeep(RelocationManager &RelocMgr,
RangesTy &Ranges, const UnitListTy &Units,
const DWARFDie &Die,
const DebugMapObject &DMO, CompileUnit &CU,
unsigned Flags) {
- unsigned Idx = CU.getOrigUnit().getDIEIndex(Die);
- CompileUnit::DIEInfo &MyInfo = CU.getInfo(Idx);
- bool AlreadyKept = MyInfo.Keep;
- if (MyInfo.Prune)
- return true;
+ // LIFO work list.
+ SmallVector<WorklistItem, 4> Worklist;
+ Worklist.emplace_back(Die, Flags);
+
+ while (!Worklist.empty()) {
+ WorklistItem Current = Worklist.back();
+ Worklist.pop_back();
+
+ if (Current.IsContinuation) {
+ updateIncompleteness(Current.Die, *Current.ChildInfo, CU);
+ continue;
+ }
+
+ unsigned Idx = CU.getOrigUnit().getDIEIndex(Current.Die);
+ CompileUnit::DIEInfo &MyInfo = CU.getInfo(Idx);
+
+ // At this point we are guaranteed to have a continuation marker before us
+ // in the worklist, except for the last DIE.
+ if (!Worklist.empty())
+ Worklist.back().ChildInfo = &MyInfo;
+
+ if (MyInfo.Prune)
+ continue;
+
+ // If the Keep flag is set, we are marking a required DIE's dependencies.
+ // If our target is already marked as kept, we're all set.
+ bool AlreadyKept = MyInfo.Keep;
+ if ((Current.Flags & TF_DependencyWalk) && AlreadyKept)
+ continue;
- // If the Keep flag is set, we are marking a required DIE's
- // dependencies. If our target is already marked as kept, we're all
- // set.
- if ((Flags & TF_DependencyWalk) && AlreadyKept)
- return MyInfo.Incomplete;
-
- // We must not call shouldKeepDIE while called from keepDIEAndDependencies,
- // because it would screw up the relocation finding logic.
- if (!(Flags & TF_DependencyWalk))
- Flags = shouldKeepDIE(RelocMgr, Ranges, Die, DMO, CU, MyInfo, Flags);
-
- // If it is a newly kept DIE mark it as well as all its dependencies as kept.
- if (!AlreadyKept && (Flags & TF_Keep)) {
- bool UseOdr = (Flags & TF_DependencyWalk) ? (Flags & TF_ODR) : CU.hasODR();
- keepDIEAndDependencies(RelocMgr, Ranges, Units, Die, MyInfo, DMO, CU,
- UseOdr);
- }
- // The TF_ParentWalk flag tells us that we are currently walking up
- // the parent chain of a required DIE, and we don't want to mark all
- // the children of the parents as kept (consider for example a
- // DW_TAG_namespace node in the parent chain). There are however a
- // set of DIE types for which we want to ignore that directive and still
- // walk their children.
- if (dieNeedsChildrenToBeMeaningful(Die.getTag()))
- Flags &= ~TF_ParentWalk;
-
- if (!Die.hasChildren() || (Flags & TF_ParentWalk))
- return MyInfo.Incomplete;
-
- bool Incomplete = false;
- for (auto Child : Die.children()) {
- Incomplete |=
- lookForDIEsToKeep(RelocMgr, Ranges, Units, Child, DMO, CU, Flags);
-
- // If any of the members are incomplete we propagate the incompleteness.
- if (!MyInfo.Incomplete && Incomplete &&
- (Die.getTag() == dwarf::DW_TAG_structure_type ||
- Die.getTag() == dwarf::DW_TAG_class_type))
- MyInfo.Incomplete = true;
- }
- return MyInfo.Incomplete;
+ // We must not call shouldKeepDIE while called from keepDIEAndDependencies,
+ // because it would screw up the relocation finding logic.
+ if (!(Current.Flags & TF_DependencyWalk))
+ Current.Flags = shouldKeepDIE(RelocMgr, Ranges, Current.Die, DMO, CU,
+ MyInfo, Current.Flags);
+
+ // If it is a newly kept DIE mark it as well as all its dependencies as
+ // kept.
+ if (!AlreadyKept && (Current.Flags & TF_Keep)) {
+ bool UseOdr = (Current.Flags & TF_DependencyWalk)
+ ? (Current.Flags & TF_ODR)
+ : CU.hasODR();
+ keepDIEAndDependencies(RelocMgr, Ranges, Units, Current.Die, MyInfo, DMO,
+ CU, UseOdr);
+ }
+
+ // The TF_ParentWalk flag tells us that we are currently walking up
+ // the parent chain of a required DIE, and we don't want to mark all
+ // the children of the parents as kept (consider for example a
+ // DW_TAG_namespace node in the parent chain). There are however a
+ // set of DIE types for which we want to ignore that directive and still
+ // walk their children.
+ if (dieNeedsChildrenToBeMeaningful(Current.Die.getTag()))
+ Current.Flags &= ~TF_ParentWalk;
+
+ if (!Current.Die.hasChildren() || (Current.Flags & TF_ParentWalk))
+ continue;
+
+ // Add children in reverse order to the worklist to effectively process
+ // them in order.
+ for (auto Child : reverse(Current.Die.children())) {
+ // Add continuation marker before every child to calculate incompleteness
+ // after the last child is processed. We can't store this information in
+ // the same item because we might have to process other continuations
+ // first.
+ Worklist.emplace_back(Current.Die);
+ Worklist.emplace_back(Child, Current.Flags);
+ }
+ }
}
/// Assign an abbreviation number to \p Abbrev.
diff --git a/tools/dsymutil/DwarfLinker.h b/tools/dsymutil/DwarfLinker.h
index 4097acc568ad..b1a950ff0fa2 100644
--- a/tools/dsymutil/DwarfLinker.h
+++ b/tools/dsymutil/DwarfLinker.h
@@ -183,7 +183,7 @@ private:
/// keep. Store that information in \p CU's DIEInfo.
///
/// The return value indicates whether the DIE is incomplete.
- bool lookForDIEsToKeep(RelocationManager &RelocMgr, RangesTy &Ranges,
+ void lookForDIEsToKeep(RelocationManager &RelocMgr, RangesTy &Ranges,
const UnitListTy &Units, const DWARFDie &DIE,
const DebugMapObject &DMO, CompileUnit &CU,
unsigned Flags);
diff --git a/tools/dsymutil/MachOUtils.cpp b/tools/dsymutil/MachOUtils.cpp
index eda530b810c0..fc498cc49c19 100644
--- a/tools/dsymutil/MachOUtils.cpp
+++ b/tools/dsymutil/MachOUtils.cpp
@@ -27,6 +27,27 @@ namespace llvm {
namespace dsymutil {
namespace MachOUtils {
+llvm::Error ArchAndFile::createTempFile() {
+ llvm::SmallString<128> TmpModel;
+ llvm::sys::path::system_temp_directory(true, TmpModel);
+ llvm::sys::path::append(TmpModel, "dsym.tmp%%%%%.dwarf");
+ Expected<sys::fs::TempFile> T = sys::fs::TempFile::create(TmpModel);
+
+ if (!T)
+ return T.takeError();
+
+ File = llvm::Optional<sys::fs::TempFile>(std::move(*T));
+ return Error::success();
+}
+
+llvm::StringRef ArchAndFile::path() const { return File->TmpName; }
+
+ArchAndFile::~ArchAndFile() {
+ if (File)
+ if (auto E = File->discard())
+ llvm::consumeError(std::move(E));
+}
+
std::string getArchName(StringRef Arch) {
if (Arch.startswith("thumb"))
return (llvm::Twine("arm") + Arch.drop_front(5)).str();
@@ -53,21 +74,16 @@ static bool runLipo(StringRef SDKPath, SmallVectorImpl<StringRef> &Args) {
return true;
}
-bool generateUniversalBinary(SmallVectorImpl<ArchAndFilename> &ArchFiles,
+bool generateUniversalBinary(SmallVectorImpl<ArchAndFile> &ArchFiles,
StringRef OutputFileName,
const LinkOptions &Options, StringRef SDKPath) {
- // No need to merge one file into a universal fat binary. First, try
- // to move it (rename) to the final location. If that fails because
- // of cross-device link issues then copy and delete.
+ // No need to merge one file into a universal fat binary.
if (ArchFiles.size() == 1) {
- StringRef From(ArchFiles.front().Path);
- if (sys::fs::rename(From, OutputFileName)) {
- if (std::error_code EC = sys::fs::copy_file(From, OutputFileName)) {
- WithColor::error() << "while copying " << From << " to "
- << OutputFileName << ": " << EC.message() << "\n";
- return false;
- }
- sys::fs::remove(From);
+ if (auto E = ArchFiles.front().File->keep(OutputFileName)) {
+ WithColor::error() << "while keeping " << ArchFiles.front().path()
+ << " as " << OutputFileName << ": "
+ << toString(std::move(E)) << "\n";
+ return false;
}
return true;
}
@@ -77,7 +93,7 @@ bool generateUniversalBinary(SmallVectorImpl<ArchAndFilename> &ArchFiles,
Args.push_back("-create");
for (auto &Thin : ArchFiles)
- Args.push_back(Thin.Path);
+ Args.push_back(Thin.path());
// Align segments to match dsymutil-classic alignment
for (auto &Thin : ArchFiles) {
diff --git a/tools/dsymutil/MachOUtils.h b/tools/dsymutil/MachOUtils.h
index 0db8ed1a1e31..a8be89e906b5 100644
--- a/tools/dsymutil/MachOUtils.h
+++ b/tools/dsymutil/MachOUtils.h
@@ -10,6 +10,7 @@
#define LLVM_TOOLS_DSYMUTIL_MACHOUTILS_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FileSystem.h"
#include <string>
namespace llvm {
@@ -20,12 +21,20 @@ class DebugMap;
struct LinkOptions;
namespace MachOUtils {
-struct ArchAndFilename {
- std::string Arch, Path;
- ArchAndFilename(StringRef Arch, StringRef Path) : Arch(Arch), Path(Path) {}
+struct ArchAndFile {
+ std::string Arch;
+ // Optional because TempFile has no default constructor.
+ Optional<llvm::sys::fs::TempFile> File;
+
+ llvm::Error createTempFile();
+ llvm::StringRef path() const;
+
+ ArchAndFile(StringRef Arch) : Arch(Arch) {}
+ ArchAndFile(ArchAndFile &&A) = default;
+ ~ArchAndFile();
};
-bool generateUniversalBinary(SmallVectorImpl<ArchAndFilename> &ArchFiles,
+bool generateUniversalBinary(SmallVectorImpl<ArchAndFile> &ArchFiles,
StringRef OutputFileName, const LinkOptions &,
StringRef SDKPath);
diff --git a/tools/dsymutil/dsymutil.cpp b/tools/dsymutil/dsymutil.cpp
index fc447b30be98..c0e6d505941f 100644
--- a/tools/dsymutil/dsymutil.cpp
+++ b/tools/dsymutil/dsymutil.cpp
@@ -313,13 +313,6 @@ static std::string getOutputFileName(llvm::StringRef InputFile) {
return BundleDir.str();
}
-static Expected<sys::fs::TempFile> createTempFile() {
- llvm::SmallString<128> TmpModel;
- llvm::sys::path::system_temp_directory(true, TmpModel);
- llvm::sys::path::append(TmpModel, "dsym.tmp%%%%%.dwarf");
- return sys::fs::TempFile::create(TmpModel);
-}
-
/// Parses the command line options into the LinkOptions struct and performs
/// some sanity checking. Returns an error in case the latter fails.
static Expected<LinkOptions> getOptions() {
@@ -400,18 +393,6 @@ static Expected<std::vector<std::string>> getInputs(bool DsymAsInput) {
return Inputs;
}
-namespace {
-struct TempFileVector {
- std::vector<sys::fs::TempFile> Files;
- ~TempFileVector() {
- for (sys::fs::TempFile &Tmp : Files) {
- if (Error E = Tmp.discard())
- errs() << toString(std::move(E));
- }
- }
-};
-} // namespace
-
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
@@ -523,8 +504,7 @@ int main(int argc, char **argv) {
!DumpDebugMap && (OutputFileOpt != "-") &&
(DebugMapPtrsOrErr->size() != 1 || OptionsOrErr->Update);
- llvm::SmallVector<MachOUtils::ArchAndFilename, 4> TempFiles;
- TempFileVector TempFileStore;
+ llvm::SmallVector<MachOUtils::ArchAndFile, 4> TempFiles;
std::atomic_char AllOK(1);
for (auto &Map : *DebugMapPtrsOrErr) {
if (Verbose || DumpDebugMap)
@@ -543,16 +523,18 @@ int main(int argc, char **argv) {
std::shared_ptr<raw_fd_ostream> OS;
std::string OutputFile = getOutputFileName(InputFile);
if (NeedsTempFiles) {
- Expected<sys::fs::TempFile> T = createTempFile();
- if (!T) {
- errs() << toString(T.takeError());
+ TempFiles.emplace_back(Map->getTriple().getArchName().str());
+
+ auto E = TempFiles.back().createTempFile();
+ if (E) {
+ errs() << toString(std::move(E));
return 1;
}
- OS = std::make_shared<raw_fd_ostream>(T->FD, /*shouldClose*/ false);
- OutputFile = T->TmpName;
- TempFileStore.Files.push_back(std::move(*T));
- TempFiles.emplace_back(Map->getTriple().getArchName().str(),
- OutputFile);
+
+ auto &TempFile = *(TempFiles.back().File);
+ OS = std::make_shared<raw_fd_ostream>(TempFile.FD,
+ /*shouldClose*/ false);
+ OutputFile = TempFile.TmpName;
} else {
std::error_code EC;
OS = std::make_shared<raw_fd_ostream>(NoOutput ? "-" : OutputFile, EC,
diff --git a/tools/llvm-mca/DispatchStage.cpp b/tools/llvm-mca/DispatchStage.cpp
index be6f1f89be5c..1f508886c298 100644
--- a/tools/llvm-mca/DispatchStage.cpp
+++ b/tools/llvm-mca/DispatchStage.cpp
@@ -107,17 +107,21 @@ void DispatchStage::dispatch(InstRef IR) {
// instruction. A dependency-breaking instruction is a zero-latency
// instruction that doesn't consume hardware resources.
// An example of dependency-breaking instruction on X86 is a zero-idiom XOR.
- if (!Desc.isZeroLatency())
- for (std::unique_ptr<ReadState> &RS : IS.getUses())
+ bool IsDependencyBreaking = IS.isDependencyBreaking();
+ for (std::unique_ptr<ReadState> &RS : IS.getUses())
+ if (RS->isImplicitRead() || !IsDependencyBreaking)
updateRAWDependencies(*RS, STI);
// By default, a dependency-breaking zero-latency instruction is expected to
// be optimized at register renaming stage. That means, no physical register
// is allocated to the instruction.
+ bool ShouldAllocateRegisters =
+ !(Desc.isZeroLatency() && IsDependencyBreaking);
SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles());
- for (std::unique_ptr<WriteState> &WS : IS.getDefs())
+ for (std::unique_ptr<WriteState> &WS : IS.getDefs()) {
PRF.addRegisterWrite(WriteRef(IR.first, WS.get()), RegisterFiles,
- !Desc.isZeroLatency());
+ ShouldAllocateRegisters);
+ }
// Reserve slots in the RCU, and notify the instruction that it has been
// dispatched to the schedulers for execution.
diff --git a/tools/llvm-mca/DispatchStage.h b/tools/llvm-mca/DispatchStage.h
index f21789a29c50..4262a241c08c 100644
--- a/tools/llvm-mca/DispatchStage.h
+++ b/tools/llvm-mca/DispatchStage.h
@@ -38,7 +38,7 @@ class Scheduler;
// the following conditions are met:
// 1) There are enough entries in the reorder buffer (see class
// RetireControlUnit) to write the opcodes associated with the instruction.
-// 2) There are enough temporaries to rename output register operands.
+// 2) There are enough physical registers to rename output register operands.
// 3) There are enough entries available in the used buffered resource(s).
//
// The number of micro opcodes that can be dispatched in one cycle is limited by
diff --git a/tools/llvm-mca/InstrBuilder.cpp b/tools/llvm-mca/InstrBuilder.cpp
index dbd457196f9d..053b7b4e8175 100644
--- a/tools/llvm-mca/InstrBuilder.cpp
+++ b/tools/llvm-mca/InstrBuilder.cpp
@@ -443,6 +443,10 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
// register writes implicitly clear the upper portion of a super-register.
MCIA.clearsSuperRegisters(MRI, MCI, WriteMask);
+ // Check if this is a dependency breaking instruction.
+ if (MCIA.isDependencyBreaking(STI, MCI))
+ NewIS->setDependencyBreaking();
+
// Initialize writes.
unsigned WriteIndex = 0;
for (const WriteDescriptor &WD : D.Writes) {
diff --git a/tools/llvm-mca/Instruction.h b/tools/llvm-mca/Instruction.h
index ddf5c3a5e33f..3b2f90528f2e 100644
--- a/tools/llvm-mca/Instruction.h
+++ b/tools/llvm-mca/Instruction.h
@@ -170,8 +170,6 @@ class ReadState {
bool IsReady;
public:
- bool isReady() const { return IsReady; }
-
ReadState(const ReadDescriptor &Desc, unsigned RegID)
: RD(Desc), RegisterID(RegID), DependentWrites(0),
CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true) {}
@@ -182,6 +180,9 @@ public:
unsigned getSchedClass() const { return RD.SchedClassID; }
unsigned getRegisterID() const { return RegisterID; }
+ bool isReady() const { return IsReady; }
+ bool isImplicitRead() const { return RD.isImplicitRead(); }
+
void cycleEvent();
void writeStartEvent(unsigned Cycles);
void setDependentWrites(unsigned Writes) {
@@ -299,6 +300,8 @@ class Instruction {
// Retire Unit token ID for this instruction.
unsigned RCUTokenID;
+ bool IsDepBreaking;
+
using UniqueDef = std::unique_ptr<WriteState>;
using UniqueUse = std::unique_ptr<ReadState>;
using VecDefs = std::vector<UniqueDef>;
@@ -314,7 +317,8 @@ class Instruction {
public:
Instruction(const InstrDesc &D)
- : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES) {}
+ : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), RCUTokenID(0),
+ IsDepBreaking(false) {}
Instruction(const Instruction &Other) = delete;
Instruction &operator=(const Instruction &Other) = delete;
@@ -326,6 +330,9 @@ public:
unsigned getRCUTokenID() const { return RCUTokenID; }
int getCyclesLeft() const { return CyclesLeft; }
+ bool isDependencyBreaking() const { return IsDepBreaking; }
+ void setDependencyBreaking() { IsDepBreaking = true; }
+
unsigned getNumUsers() const {
unsigned NumUsers = 0;
for (const UniqueDef &Def : Defs)
diff --git a/tools/llvm-mca/README.txt b/tools/llvm-mca/README.txt
deleted file mode 100644
index 8b1670db0fca..000000000000
--- a/tools/llvm-mca/README.txt
+++ /dev/null
@@ -1,865 +0,0 @@
-llvm-mca - LLVM Machine Code Analyzer
--------------------------------------
-
-llvm-mca is a performance analysis tool that uses information which is already
-available in LLVM (e.g., scheduling models) to statically measure the
-performance of machine code in a specific cpu.
-
-Performance is measured in terms of throughput as well as processor resource
-consumption. The tool currently works for processors with an out-of-order
-backend, for which there is a scheduling model available in LLVM.
-
-The main goal of this tool is not just to predict the performance of the code
-when run on the target, but also help with diagnosing potential performance
-issues.
-
-Given an assembly code sequence, llvm-mca estimates the IPC (instructions Per
-cycle), as well as hardware resources pressure. The analysis and reporting style
-were inspired by the IACA tool from Intel.
-
-The presence of long data dependency chains, as well as poor usage of hardware
-resources may lead to bottlenecks in the backend. The tool is able to generate
-a detailed report which should help with identifying and analyzing sources of
-bottlenecks.
-
-Scheduling models are mostly used to compute instruction latencies, to obtain
-read-advance information, and understand how processor resources are used by
-instructions. By design, the quality of the performance analysis conducted by
-the tool is inevitably affected by the quality of the target scheduling models.
-However, scheduling models intentionally do not describe all processor details,
-since the goal is just to enable the scheduling of machine instructions during
-compilation. That means, there are processor details which are not important for
-the purpose of scheduling instructions (and therefore not described by the
-scheduling model), but are very important for this tool.
-
-A few examples of details that are missing in scheduling models are:
- - Actual dispatch width (it often differs from the issue width).
- - Number of read/write ports in the register file(s).
- - Length of the load/store queue in the LSUnit.
-
-It is also very difficult to find a "good" abstract model to describe the
-behavior of out-of-order processors. So, we have to keep in mind that all of
-these aspects are going to affect the quality of the static analysis performed
-by the tool.
-
-An extensive list of known limitations is reported in one of the last sections
-of this document. There is also a section related to design problems which must
-be addressed (hopefully with the help of the community). At the moment, the
-tool has been mostly tested for x86 targets, but there are still several
-limitations, some of which could be overcome by integrating extra information
-into the scheduling models.
-
-How the tool works
-------------------
-
-The tool takes assembly code as input. Assembly code is parsed into a sequence
-of MCInst with the help of the existing LLVM target assembly parsers. The parsed
-sequence of MCInst is then analyzed by a 'Pipeline' module to generate a
-performance report.
-
-The Pipeline module internally emulates the execution of the machine code
-sequence in a loop of iterations (which by default is 100). At the end of this
-process, the pipeline collects a number of statistics which are then printed out
-in the form of a report.
-
-Here is an example of performance report generated by the tool for a dot-product
-of two packed float vectors of four elements. The analysis is conducted for
-target x86, cpu btver2:
-
-///////////////////
-
-Iterations: 300
-Instructions: 900
-Total Cycles: 610
-Dispatch Width: 2
-IPC: 1.48
-
-
-Resources:
-[0] - JALU0
-[1] - JALU1
-[2] - JDiv
-[3] - JFPM
-[4] - JFPU0
-[5] - JFPU1
-[6] - JLAGU
-[7] - JSAGU
-[8] - JSTC
-[9] - JVIMUL
-
-
-Resource pressure per iteration:
-[0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
- - - - - 2.00 1.00 - - - -
-
-Resource pressure by instruction:
-[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
- - - - - - 1.00 - - - - vmulps %xmm0, %xmm1, %xmm2
- - - - - 1.00 - - - - - vhaddps %xmm2, %xmm2, %xmm3
- - - - - 1.00 - - - - - vhaddps %xmm3, %xmm3, %xmm4
-
-
-Instruction Info:
-[1]: #uOps
-[2]: Latency
-[3]: RThroughput
-[4]: MayLoad
-[5]: MayStore
-[6]: HasSideEffects
-
-[1] [2] [3] [4] [5] [6] Instructions:
- 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2
- 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3
- 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4
-
-///////////////////
-
-According to this report, the dot-product kernel has been executed 300 times,
-for a total of 900 instructions dynamically executed.
-
-The report is structured in three main sections. A first section collects a few
-performance numbers; the goal of this section is to give a very quick overview
-of the performance throughput. In this example, the two important performance
-indicators are a) the predicted total number of cycles, and b) the IPC.
-IPC is probably the most important throughput indicator. A big delta between the
-Dispatch Width and the computed IPC is an indicator of potential performance
-issues.
-
-The second section is the so-called "resource pressure view". This view reports
-the average number of resource cycles consumed every iteration by instructions
-for every processor resource unit available on the target. Information is
-structured in two tables. The first table reports the number of resource cycles
-spent on average every iteration. The second table correlates the resource
-cycles to the machine instruction in the sequence. For example, every iteration
-of the dot-product, instruction 'vmulps' always executes on resource unit [5]
-(JFPU1 - floating point pipeline #1), consuming an average of 1 resource cycle
-per iteration. Note that on Jaguar, vector FP multiply can only be issued to
-pipeline JFPU1, while horizontal FP adds can only be issued to pipeline JFPU0.
-
-The third (and last) section of the report shows the latency and reciprocal
-throughput of every instruction in the sequence. That section also reports extra
-information related to the number of micro opcodes, and opcode properties (i.e.,
-'MayLoad', 'MayStore', and 'UnmodeledSideEffects').
-
-The resource pressure view helps with identifying bottlenecks caused by high
-usage of specific hardware resources. Situations with resource pressure mainly
-concentrated on a few resources should, in general, be avoided. Ideally,
-pressure should be uniformly distributed between multiple resources.
-
-Timeline View
--------------
-
-A detailed report of each instruction's state transitions over time can be
-enabled using the command line flag '-timeline'. This prints an extra section
-in the report which contains the so-called "timeline view". Below is the
-timeline view for the dot-product example from the previous section.
-
-///////////////
-Timeline view:
- 012345
-Index 0123456789
-
-[0,0] DeeER. . . vmulps %xmm0, %xmm1, %xmm2
-[0,1] D==eeeER . . vhaddps %xmm2, %xmm2, %xmm3
-[0,2] .D====eeeER . vhaddps %xmm3, %xmm3, %xmm4
-
-[1,0] .DeeE-----R . vmulps %xmm0, %xmm1, %xmm2
-[1,1] . D=eeeE---R . vhaddps %xmm2, %xmm2, %xmm3
-[1,2] . D====eeeER . vhaddps %xmm3, %xmm3, %xmm4
-
-[2,0] . DeeE-----R . vmulps %xmm0, %xmm1, %xmm2
-[2,1] . D====eeeER . vhaddps %xmm2, %xmm2, %xmm3
-[2,2] . D======eeeER vhaddps %xmm3, %xmm3, %xmm4
-
-
-Average Wait times (based on the timeline view):
-[0]: Executions
-[1]: Average time spent waiting in a scheduler's queue
-[2]: Average time spent waiting in a scheduler's queue while ready
-[3]: Average time elapsed from WB until retire stage
-
- [0] [1] [2] [3]
-0. 3 1.0 1.0 3.3 vmulps %xmm0, %xmm1, %xmm2
-1. 3 3.3 0.7 1.0 vhaddps %xmm2, %xmm2, %xmm3
-2. 3 5.7 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4
-///////////////
-
-The timeline view is very interesting because it shows how instructions changed
-in state during execution. It also gives an idea of how the tool "sees"
-instructions executed on the target.
-
-The timeline view is structured in two tables. The first table shows how
-instructions change in state over time (measured in cycles); the second table
-(named "Average Wait times") reports useful timing statistics which should help
-diagnose performance bottlenecks caused by long data dependencies and
-sub-optimal usage of hardware resources.
-
-An instruction in the timeline view is identified by a pair of indices, where
-the 'first' index identifies an iteration, and the 'second' index is the actual
-instruction index (i.e., where it appears in the code sequence).
-
-Excluding the first and last column, the remaining columns are in cycles.
-Cycles are numbered sequentially starting from 0. The following characters are
-used to describe the state of an instruction:
-
- D : Instruction dispatched.
- e : Instruction executing.
- E : Instruction executed.
- R : Instruction retired.
- = : Instruction already dispatched, waiting to be executed.
- - : Instruction executed, waiting to be retired.
-
-Based on the timeline view from the example, we know that:
- - Instruction [1, 0] was dispatched at cycle 1.
- - Instruction [1, 0] started executing at cycle 2.
- - Instruction [1, 0] reached the write back stage at cycle 4.
- - Instruction [1, 0] was retired at cycle 10.
-
-Instruction [1, 0] (i.e., the vmulps from iteration #1) doesn't have to wait in
-the scheduler's queue for the operands to become available. By the time the
-vmulps is dispatched, operands are already available, and pipeline JFPU1 is
-ready to serve another instruction. So the instruction can be immediately
-issued on the JFPU1 pipeline. That is demonstrated by the fact that the
-instruction only spent 1cy in the scheduler's queue.
-
-There is a gap of 5 cycles between the write-back stage and the retire event.
-That is because instructions must retire in program order, so [1,0] has to wait
-for [0, 2] to be retired first (i.e., it has to wait until cycle 10).
-
-In the dot-product example, all instructions are in a RAW (Read After Write)
-dependency chain. Register %xmm2 written by the vmulps is immediately used by
-the first vhaddps, and register %xmm3 written by the first vhaddps is used by
-the second vhaddps. Long data dependencies negatively affect the ILP
-(Instruction Level Parallelism).
-
-In the dot-product example, there are anti-dependencies introduced by
-instructions from different iterations. However, those dependencies can be
-removed at register renaming stage (at the cost of allocating register aliases,
-and therefore consuming temporary registers).
-
-Table "Average Wait times" helps diagnose performance issues that are caused by
-the presence of long latency instructions and potentially long data dependencies
-which may limit the ILP. Note that the tool by default assumes at least 1cy
-between the dispatch event and the issue event.
-
-When the performance is limited by data dependencies and/or long latency
-instructions, the number of cycles spent while in the "ready" state is expected
-to be very small when compared with the total number of cycles spent in the
-scheduler's queue. So the difference between the two counters is a good
-indicator of how big of an impact data dependencies had on the execution of
-instructions. When performance is mostly limited by the lack of hardware
-resources, the delta between the two counters is small. However, the number of
-cycles spent in the queue tends to be bigger (i.e., more than 1-3cy) especially
-when compared with other low latency instructions.
-
-Extra statistics to further diagnose performance issues.
---------------------------------------------------------
-
-Flag '-verbose' enables extra statistics and performance counters for the
-dispatch logic, the reorder buffer, the retire control unit and the register
-file.
-
-Below is an example of verbose output generated by the tool for the dot-product
-example discussed in the previous sections.
-
-///////////////////
-Iterations: 300
-Instructions: 900
-Total Cycles: 610
-Dispatch Width: 2
-IPC: 1.48
-
-
-Dynamic Dispatch Stall Cycles:
-RAT - Register unavailable: 0
-RCU - Retire tokens unavailable: 0
-SCHEDQ - Scheduler full: 272
-LQ - Load queue full: 0
-SQ - Store queue full: 0
-GROUP - Static restrictions on the dispatch group: 0
-
-
-Register Alias Table:
-Total number of mappings created: 900
-Max number of mappings used: 35
-
-
-Dispatch Logic - number of cycles where we saw N instructions dispatched:
-[# dispatched], [# cycles]
- 0, 24 (3.9%)
- 1, 272 (44.6%)
- 2, 314 (51.5%)
-
-
-Schedulers - number of cycles where we saw N instructions issued:
-[# issued], [# cycles]
- 0, 7 (1.1%)
- 1, 306 (50.2%)
- 2, 297 (48.7%)
-
-
-Retire Control Unit - number of cycles where we saw N instructions retired:
-[# retired], [# cycles]
- 0, 109 (17.9%)
- 1, 102 (16.7%)
- 2, 399 (65.4%)
-
-
-Scheduler's queue usage:
-JALU01, 0/20
-JFPU01, 18/18
-JLSAGU, 0/12
-///////////////////
-
-Based on the verbose report, the pipeline was only able to dispatch two
-instructions 51.5% of the time. The dispatch group was limited to one
-instruction 44.6% of the cycles, which corresponds to 272 cycles.
-
-If we look at section "Dynamic Dispatch Stall Cycles", we can see how counter
-SCHEDQ reports 272 cycles. Counter SCHEDQ is incremented every time the
-dispatch logic is unable to dispatch a full group of two instructions because
-the scheduler's queue is full.
-
-Section "Scheduler's queue usage" shows how the maximum number of buffer entries
-(i.e., scheduler's queue entries) used at runtime for resource JFPU01 reached
-its maximum. Note that AMD Jaguar implements three schedulers:
- * JALU01 - A scheduler for ALU instructions
- * JLSAGU - A scheduler for address generation
- * JFPU01 - A scheduler floating point operations.
-
-The dot-product is a kernel of three floating point instructions (a vector
-multiply followed by two horizontal adds). That explains why only the floating
-point scheduler appears to be used according to section "Scheduler's queue
-usage".
-
-A full scheduler's queue is either caused by data dependency chains, or by a
-sub-optimal usage of hardware resources. Sometimes, resource pressure can be
-mitigated by rewriting the kernel using different instructions that consume
-different scheduler resources. Schedulers with a small queue are less resilient
-to bottlenecks caused by the presence of long data dependencies.
-
-In this example, we can conclude that the IPC is mostly limited by data
-dependencies, and not by resource pressure.
-
-LLVM-MCA instruction flow
--------------------------
-
-This section describes the instruction flow through the out-of-order backend,
-as well as the functional units involved in the process.
-
-An instruction goes through a default sequence of stages:
- - Dispatch (Instruction is dispatched to the schedulers).
- - Issue (Instruction is issued to the processor pipelines).
- - Write Back (Instruction is executed, and results are written back).
- - Retire (Instruction is retired; writes are architecturally committed).
-
-The tool only models the out-of-order portion of a processor. Therefore, the
-instruction fetch and decode stages are not modeled. Performance bottlenecks in
-the frontend are not diagnosed by this tool. The tool assumes that instructions
-have all been decoded and placed in a queue. Also, the tool doesn't know
-anything about branch prediction.
-
-The long term plan is to make the process customizable, so that processors can
-define their own. This is a future work.
-
-Instruction Dispatch
---------------------
-
-During the Dispatch stage, instructions are picked in program order from a queue
-of already decoded instructions, and dispatched in groups to the hardware
-schedulers. The dispatch logic is implemented by class DispatchStage in file
-DispatchStage.h.
-
-The size of a dispatch group depends on the availability of hardware resources,
-and it cannot exceed the value of field 'DispatchWidth' in class DispatchStage.
-Note that field DispatchWidth defaults to the value of field 'IssueWidth' from
-the scheduling model.
-
-Users can override the DispatchWidth value with flag "-dispatch=<N>" (where 'N'
-is an unsigned quantity).
-
-An instruction can be dispatched if:
- - The size of the dispatch group is smaller than DispatchWidth
- - There are enough entries in the reorder buffer
- - There are enough temporary registers to do register renaming
- - Schedulers are not full.
-
-Since r329067, scheduling models can now optionally specify which register
-files are available on the processor. Class DispatchStage(see DispatchStage.h)
-would use that information to initialize register file descriptors.
-
-By default, if the model doesn't describe register files, the tool
-(optimistically) assumes a single register file with an unbounded number of
-temporary registers. Users can limit the number of temporary registers that
-are globally available for register renaming using flag
-`-register-file-size=<N>`, where N is the number of temporaries. A value of
-zero for N means 'unbounded'. Knowing how many temporaries are available for
-register renaming, the tool can predict dispatch stalls caused by the lack of
-temporaries.
-
-The number of reorder buffer entries consumed by an instruction depends on the
-number of micro-opcodes it specifies in the target scheduling model (see field
-'NumMicroOpcodes' of TableGen class ProcWriteResources and its derived classes;
-TargetSchedule.td).
-
-The reorder buffer is implemented by class RetireControlUnit (see
-DispatchStage.h). Its goal is to track the progress of instructions that are
-"in-flight", and retire instructions in program order. The number of entries
-in the reorder buffer defaults to the value of field 'MicroOpBufferSize' from
-the target scheduling model.
-
-Instructions that are dispatched to the schedulers consume scheduler buffer
-entries. The tool queries the scheduling model to figure out the set of
-buffered resources consumed by an instruction. Buffered resources are treated
-like "scheduler" resources, and the field 'BufferSize' (from the processor
-resource TableGen definition) defines the size of the scheduler's queue.
-
-Zero latency instructions (for example NOP instructions) don't consume scheduler
-resources. However, those instructions still reserve a number of slots in the
-reorder buffer.
-
-Instruction Issue
------------------
-
-As mentioned in the previous section, each scheduler resource implements a queue
-of instructions. An instruction has to wait in the scheduler's queue until
-input register operands become available. Only at that point, does the
-instruction becomes eligible for execution and may be issued (potentially
-out-of-order) to a pipeline for execution.
-
-Instruction latencies can be computed by the tool with the help of the
-scheduling model; latency values are defined by the scheduling model through
-ProcWriteResources objects.
-
-Class Scheduler (see file Scheduler.h) knows how to emulate multiple processor
-schedulers. A Scheduler is responsible for tracking data dependencies, and
-dynamically select which processor resources are consumed/used by instructions.
-
-Internally, the Scheduler class delegates the management of processor resource
-units and resource groups to the ResourceManager class. ResourceManager is also
-responsible for selecting resource units that are effectively consumed by
-instructions. For example, if an instruction consumes 1cy of a resource group,
-the ResourceManager object selects one of the available units from the group; by
-default, it uses a round-robin selector to guarantee that resource usage is
-uniformly distributed between all units of a group.
-
-Internally, class Scheduler implements three instruction queues:
- - WaitQueue: a queue of instructions whose operands are not ready yet.
- - ReadyQueue: a queue of instructions ready to execute.
- - IssuedQueue: a queue of instructions executing.
-
-Depending on the operands availability, instructions that are dispatched to the
-Scheduler are either placed into the WaitQueue or into the ReadyQueue.
-
-Every cycle, class Scheduler checks if instructions can be moved from the
-WaitQueue to the ReadyQueue, and if instructions from the ReadyQueue can be
-issued to the underlying pipelines. The algorithm prioritizes older
-instructions over younger instructions.
-
-Objects of class ResourceState (see Scheduler.h) describe processor resources.
-There is an instance of class ResourceState for each single processor resource
-specified by the scheduling model. A ResourceState object for a processor
-resource with multiple units dynamically tracks the availability of every single
-unit. For example, the ResourceState of a resource group tracks the
-availability of every resource in that group. Internally, ResourceState
-implements a round-robin selector to dynamically pick the next unit to use from
-the group.
-
-Write-Back and Retire Stage
----------------------------
-
-Issued instructions are moved from the ReadyQueue to the IssuedQueue. There,
-instructions wait until they reach the write-back stage. At that point, they
-get removed from the queue and the retire control unit is notified.
-
-On the event of "instruction executed", the retire control unit flags the
-instruction as "ready to retire".
-
-Instruction are retired in program order; an "instruction retired" event is sent
-to the register file which frees the temporary registers allocated for the
-instruction at register renaming stage.
-
-Load/Store Unit and Memory Consistency Model
---------------------------------------------
-
-The tool attempts to emulate out-of-order execution of memory operations. Class
-LSUnit (see file LSUnit.h) emulates a load/store unit implementing queues for
-speculative execution of loads and stores.
-
-Each load (or store) consumes an entry in the load (or store) queue. The number
-of slots in the load/store queues is unknown by the tool, since there is no
-mention of it in the scheduling model. In practice, users can specify flag
-`-lqueue=N` (vic. `-squeue=N`) to limit the number of entries in the queue to be
-equal to exactly N (an unsigned value). If N is zero, then the tool assumes an
-unbounded queue (this is the default).
-
-LSUnit implements a relaxed consistency model for memory loads and stores. The
-rules are:
-1) A younger load is allowed to pass an older load only if there is no
- intervening store in between the two loads.
-2) An younger store is not allowed to pass an older store.
-3) A younger store is not allowed to pass an older load.
-4) A younger load is allowed to pass an older store provided that the load does
- not alias with the store.
-
-By default, this class conservatively (i.e., pessimistically) assumes that loads
-always may-alias store operations. Essentially, this LSUnit doesn't perform
-any sort of alias analysis to rule out cases where loads and stores don't
-overlap with each other. The downside of this approach however is that younger
-loads are never allowed to pass older stores. To make it possible for a
-younger load to pass an older store, users can use the command line flag
--noalias. Under 'noalias', a younger load is always allowed to pass an older
-store.
-
-Note that, in the case of write-combining memory, rule 2. could be relaxed a bit
-to allow reordering of non-aliasing store operations. That being said, at the
-moment, there is no way to further relax the memory model (flag -noalias is the
-only option). Essentially, there is no option to specify a different memory
-type (for example: write-back, write-combining, write-through; etc.) and
-consequently to weaken or strengthen the memory model.
-
-Other limitations are:
- * LSUnit doesn't know when store-to-load forwarding may occur.
- * LSUnit doesn't know anything about the cache hierarchy and memory types.
- * LSUnit doesn't know how to identify serializing operations and memory fences.
-
-No assumption is made on the store buffer size. As mentioned before, LSUnit
-conservatively assumes a may-alias relation between loads and stores, and it
-doesn't attempt to identify cases where store-to-load forwarding would occur in
-practice.
-
-LSUnit doesn't attempt to predict whether a load or store hits or misses the L1
-cache. It only knows if an instruction "MayLoad" and/or "MayStore". For loads,
-the scheduling model provides an "optimistic" load-to-use latency (which usually
-matches the load-to-use latency for when there is a hit in the L1D).
-
-Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
-memory-barrier like instructions. LSUnit conservatively assumes that an
-instruction which has both 'MayLoad' and 'UnmodeledSideEffects' behaves like a
-"soft" load-barrier. That means, it serializes loads without forcing a flush of
-the load queue. Similarly, instructions flagged with both 'MayStore' and
-'UnmodeledSideEffects' are treated like store barriers. A full memory barrier
-is a 'MayLoad' and 'MayStore' instruction with 'UnmodeledSideEffects'. This is
-inaccurate, but it is the best that we can do at the moment with the current
-information available in LLVM.
-
-A load/store barrier consumes one entry of the load/store queue. A load/store
-barrier enforces ordering of loads/stores. A younger load cannot pass a load
-barrier. Also, a younger store cannot pass a store barrier. A younger load has
-to wait for the memory/load barrier to execute. A load/store barrier is
-"executed" when it becomes the oldest entry in the load/store queue(s). That
-also means, by construction, all the older loads/stores have been executed.
-
-In conclusion the full set of rules is:
- 1. A store may not pass a previous store.
- 2. A load may not pass a previous store unless flag 'NoAlias' is set.
- 3. A load may pass a previous load.
- 4. A store may not pass a previous load (regardless of flag 'NoAlias').
- 5. A load has to wait until an older load barrier is fully executed.
- 6. A store has to wait until an older store barrier is fully executed.
-
-Known limitations
------------------
-Previous sections described cases where the tool is missing information to give
-an accurate report. For example, the first sections of this document explained
-how the lack of knowledge about the processor negatively affects the performance
-analysis. The lack of knowledge is often a consequence of how scheduling models
-are defined; as mentioned before, scheduling models intentionally don't describe
-processors in fine details. That being said, the LLVM machine model can be
-extended to expose more details, as long as they are opt-in for targets.
-
-The accuracy of the performance analysis is also affected by assumptions made by
-the processor model used by the tool.
-
-Most recent Intel and AMD processors implement dedicated LoopBuffer/OpCache in
-the hardware frontend to speedup the throughput in the presence of tight loops.
-The presence of these buffers complicates the decoding logic, and requires
-knowledge on the branch predictor too. Class 'SchedMachineModel' in TableGen
-provides a field named 'LoopMicroOpBufferSize' which is used to describe loop
-buffers. However, the purpose of that field is to enable loop unrolling of
-tight loops; essentially, it affects the cost model used by pass loop-unroll.
-
-At the current state, the tool only describes the out-of-order portion of a
-processor, and consequently doesn't try to predict the frontend throughput. That
-being said, this tool could be definitely extended in future to also account for
-the hardware frontend when doing performance analysis. This would inevitably
-require extra (extensive) processor knowledge related to all the available
-decoding paths in the hardware frontend, as well as branch prediction.
-
-Currently, the tool assumes a zero-latency "perfect" fetch&decode
-stage; the full sequence of decoded instructions is immediately visible to the
-dispatch logic from the start.
-
-The tool doesn't know about simultaneous mutithreading. According to the tool,
-processor resources are not statically/dynamically partitioned. Processor
-resources are fully available to the hardware thread executing the
-microbenchmark.
-
-The execution model implemented by this tool assumes that instructions are
-firstly dispatched in groups to hardware schedulers, and then issued to
-pipelines for execution. The model assumes dynamic scheduling of instructions.
-Instructions are placed in a queue and potentially executed out-of-order (based
-on the operand availability). The dispatch stage is definitely distinct from the
-issue stage. This will change in future; as mentioned in the first section, the
-end goal is to let processors customize the process.
-
-This model doesn't correctly describe processors where the dispatch/issue is a
-single stage. This is what happens for example in VLIW processors, where
-instructions are packaged and statically scheduled at compile time; it is up to
-the compiler to predict the latency of instructions and package issue groups
-accordingly. For such targets, there is no dynamic scheduling done by the
-hardware.
-
-Existing classes (DispatchStage, Scheduler, etc.) could be extended/adapted to
-support processors with a single dispatch/issue stage. The execution flow would
-require some changes in the way how existing components (i.e., DispatchStage,
-Scheduler, etc.) interact. This can be a future development.
-
-The following sections describes other known limitations. The goal is not to
-provide an extensive list of limitations; we want to report what we believe are
-the most important limitations, and suggest possible methods to overcome them.
-
-Load/Store barrier instructions and serializing operations
-----------------------------------------------------------
-Section "Load/Store Unit and Memory Consistency Model" already mentioned how
-LLVM doesn't know about serializing operations and memory barriers. Most of it
-boils down to the fact that class MCInstrDesc (intentionally) doesn't expose
-those properties. Instead, both serializing operations and memory barriers
-"have side-effects" according to MCInstrDesc. That is because, at least for
-scheduling purposes, knowing that an instruction has unmodeled side effects is
-often enough to treat the instruction like a compiler scheduling barrier.
-
-A performance analysis tool could use the extra knowledge on barriers and
-serializing operations to generate a more accurate performance report. One way
-to improve this is by reserving a couple of bits in field 'Flags' from class
-MCInstrDesc: one bit for barrier operations, and another bit to mark
-instructions as serializing operations.
-
-Lack of support for instruction itineraries
--------------------------------------------
-The current version of the tool doesn't know how to process instruction
-itineraries. This is probably one of the most important limitations, since it
-affects a few out-of-order processors in LLVM.
-
-As mentioned in section 'Instruction Issue', class Scheduler delegates to an
-instance of class ResourceManager the handling of processor resources.
-ResourceManager is where most of the scheduling logic is implemented.
-
-Adding support for instruction itineraries requires that we teach
-ResourceManager how to handle functional units and instruction stages. This
-development can be a future extension, and it would probably require a few
-changes to the ResourceManager interface.
-
-Instructions that affect control flow are not correctly modeled
----------------------------------------------------------------
-Examples of instructions that affect the control flow are: return, indirect
-branches, calls, etc. The tool doesn't try to predict/evaluate branch targets.
-In particular, the tool doesn't model any sort of branch prediction, nor does it
-attempt to track changes to the program counter. The tool always assumes that
-the input assembly sequence is the body of a microbenchmark (a simple loop
-executed for a number of iterations). The "next" instruction in sequence is
-always the next instruction to dispatch.
-
-Call instructions default to an arbitrary high latency of 100cy. A warning is
-generated if the tool encounters a call instruction in the sequence. Return
-instructions are not evaluated, and therefore control flow is not affected.
-However, the tool still queries the processor scheduling model to obtain latency
-information for instructions that affect the control flow.
-
-Known limitations on X86 processors
------------------------------------
-
-1) Partial register updates versus full register updates.
-
-On x86-64, a 32-bit GPR write fully updates the super-register. Example:
- add %edi %eax ## eax += edi
-
-Here, register %eax aliases the lower half of 64-bit register %rax. On x86-64,
-register %rax is fully updated by the 'add' (the upper half of %rax is zeroed).
-Essentially, it "kills" any previous definition of (the upper half of) register
-%rax.
-
-On the other hand, 8/16 bit register writes only perform a so-called "partial
-register update". Example:
- add %di, %ax ## ax += di
-
-Here, register %eax is only partially updated. To be more specific, the lower
-half of %eax is set, and the upper half is left unchanged. There is also no
-change in the upper 48 bits of register %rax.
-
-To get accurate performance analysis, the tool has to know which instructions
-perform a partial register update, and which instructions fully update the
-destination's super-register.
-
-One way to expose this information is (again) via TableGen. For example, we
-could add a flag in the TableGen instruction class to tag instructions that
-perform partial register updates. Something like this: 'bit
-hasPartialRegisterUpdate = 1'. However, this would force a `let
-hasPartialRegisterUpdate = 0` on several instruction definitions.
-
-Another approach is to have a MCSubtargetInfo hook similar to this:
- virtual bool updatesSuperRegisters(unsigned short opcode) { return false; }
-
-Targets will be able to override this method if needed. Again, this is just an
-idea. But the plan is to have this fixed as a future development.
-
-2) Macro Op fusion.
-
-The tool doesn't know about macro-op fusion. On modern x86 processors, a
-'cmp/test' followed by a 'jmp' is fused into a single macro operation. The
-advantage is that the fused pair only consumes a single slot in the dispatch
-group.
-
-As a future development, the tool should be extended to address macro-fusion.
-Ideally, we could have LLVM generate a table enumerating all the opcode pairs
-that can be fused together. That table could be exposed to the tool via the
-MCSubtargetInfo interface. This is just an idea; there may be better ways to
-implement this.
-
-3) Intel processors: mixing legacy SSE with AVX instructions.
-
-On modern Intel processors with AVX, mixing legacy SSE code with AVX code
-negatively impacts the performance. The tool is not aware of this issue, and
-the performance penalty is not accounted when doing the analysis. This is
-something that we would like to improve in future.
-
-4) Zero-latency register moves and Zero-idioms.
-
-Most modern AMD/Intel processors know how to optimize out register-register
-moves and zero idioms at register renaming stage. The tool doesn't know
-about these patterns, and this may negatively impact the performance analysis.
-
-Known design problems
----------------------
-This section describes two design issues that are currently affecting the tool.
-The long term plan is to "fix" these issues.
-Both limitations would be easily fixed if we teach the tool how to directly
-manipulate MachineInstr objects (instead of MCInst objects).
-
-1) Variant instructions not correctly modeled.
-
-The tool doesn't know how to analyze instructions with a "variant" scheduling
-class descriptor. A variant scheduling class needs to be resolved dynamically.
-The "actual" scheduling class often depends on the subtarget, as well as
-properties of the specific MachineInstr object.
-
-Unfortunately, the tool manipulates MCInst, and it doesn't know anything about
-MachineInstr. As a consequence, the tool cannot use the existing machine
-subtarget hooks that are normally used to resolve the variant scheduling class.
-This is a major design issue which mostly affects ARM/AArch64 targets. It
-mostly boils down to the fact that the existing scheduling framework was meant
-to work for MachineInstr.
-
-When the tool encounters a "variant" instruction, it assumes a generic 1cy
-latency. However, the tool would not be able to tell which processor resources
-are effectively consumed by the variant instruction.
-
-2) MCInst and MCInstrDesc.
-
-Performance analysis tools require data dependency information to correctly
-predict the runtime performance of the code. This tool must always be able to
-obtain the set of implicit/explicit register defs/uses for every instruction of
-the input assembly sequence.
-
-In the first section of this document, it was mentioned how the tool takes as
-input an assembly sequence. That sequence is parsed into a MCInst sequence with
-the help of assembly parsers available from the targets.
-
-A MCInst is a very low-level instruction representation. The tool can inspect
-the MCOperand sequence of an MCInst to identify register operands. However,
-there is no way to tell register operands that are definitions from register
-operands that are uses.
-
-In LLVM, class MCInstrDesc is used to fully describe target instructions and
-their operands. The opcode of a machine instruction (a MachineInstr object) can
-be used to query the instruction set through method `MCInstrInfo::get' to obtain
-the associated MCInstrDesc object.
-
-However class MCInstrDesc describes properties and operands of MachineInstr
-objects. Essentially, MCInstrDesc is not meant to be used to describe MCInst
-objects. To be more specific, MCInstrDesc objects are automatically generated
-via TableGen from the instruction set description in the target .td files. For
-example, field `MCInstrDesc::NumDefs' is always equal to the cardinality of the
-`(outs)` set from the TableGen instruction definition.
-
-By construction, register definitions always appear at the beginning of the
-MachineOperands list in MachineInstr. Basically, the (outs) are the first
-operands of a MachineInstr, and the (ins) will come after in the machine operand
-list. Knowing the number of register definitions is enough to identify
-all the register operands that are definitions.
-
-In a normal compilation process, MCInst objects are generated from MachineInstr
-objects through a lowering step. By default the lowering logic simply iterates
-over the machine operands of a MachineInstr, and converts/expands them into
-equivalent MCOperand objects.
-
-The default lowering strategy has the advantage of preserving all of the above
-mentioned assumptions on the machine operand sequence. That means, register
-definitions would still be at the beginning of the MCOperand sequence, and
-register uses would come after.
-
-Targets may still define custom lowering routines for specific opcodes. Some of
-these routines may lower operands in a way that potentially breaks (some of) the
-assumptions on the machine operand sequence which were valid for MachineInstr.
-Luckily, this is not the most common form of lowering done by the targets, and
-the vast majority of the MachineInstr are lowered based on the default strategy
-which preserves the original machine operand sequence. This is especially true
-for x86, where the custom lowering logic always preserves the original (i.e.,
-from the MachineInstr) operand sequence.
-
-This tool currently works under the strong (and potentially incorrect)
-assumption that register def/uses in a MCInst can always be identified by
-querying the machine instruction descriptor for the opcode. This assumption made
-it possible to develop this tool and get good numbers at least for the
-processors available in the x86 backend.
-
-That being said, the analysis is still potentially incorrect for other targets.
-So we plan (with the help of the community) to find a proper mechanism to map
-when possible MCOperand indices back to MachineOperand indices of the equivalent
-MachineInstr. This would be equivalent to describing changes made by the
-lowering step which affected the operand sequence. For example, we could have an
-index for every register MCOperand (or -1, if the operand didn't exist in the
-original MachineInstr). The mapping could look like this <0,1,3,2>. Here,
-MCOperand #2 was obtained from the lowering of MachineOperand #3. etc.
-
-This information could be automatically generated via TableGen for all the
-instructions whose custom lowering step breaks assumptions made by the tool on
-the register operand sequence (In general, these instructions should be the
-minority of a target's instruction set). Unfortunately, we don't have that
-information now. As a consequence, we assume that the number of explicit
-register definitions is the same number specified in MCInstrDesc. We also
-assume that register definitions always come first in the operand sequence.
-
-In conclusion: these are for now the strong assumptions made by the tool:
- * The number of explicit and implicit register definitions in a MCInst
- matches the number of explicit and implicit definitions specified by the
- MCInstrDesc object.
- * Register uses always come after register definitions.
- * If an opcode specifies an optional definition, then the optional
- definition is always the last register operand in the sequence.
-
-Note that some of the information accessible from the MCInstrDesc is always
-valid for MCInst. For example: implicit register defs, implicit register uses
-and 'MayLoad/MayStore/HasUnmodeledSideEffects' opcode properties still apply to
-MCInst. The tool knows about this, and uses that information during its
-analysis.
-
-Future work
------------
- * Address limitations (described in section "Known limitations").
- * Let processors specify the selection strategy for processor resource groups
- and resources with multiple units. The tool currently uses a round-robin
- selector to pick the next resource to use.
- * Address limitations specifically described in section "Known limitations on
- X86 processors".
- * Address design issues identified in section "Known design problems".
- * Define a standard interface for "Views". This would let users customize the
- performance report generated by the tool.
-
-When interfaces are mature/stable:
- * Move the logic into a library. This will enable a number of other
- interesting use cases.
-
-Work is currently tracked on https://bugs.llvm.org. llvm-mca bugs are tagged
-with prefix [llvm-mca]. You can easily find the full list of open bugs if you
-search for that tag.
diff --git a/tools/llvm-mca/RetireControlUnit.h b/tools/llvm-mca/RetireControlUnit.h
index 3530ff21ba0d..8acc8bcc98fe 100644
--- a/tools/llvm-mca/RetireControlUnit.h
+++ b/tools/llvm-mca/RetireControlUnit.h
@@ -31,7 +31,7 @@ namespace mca {
/// this RetireControlUnit (RCU) gets notified.
///
/// On instruction retired, register updates are all architecturally
-/// committed, and any temporary registers originally allocated for the
+/// committed, and any physicall registers previously allocated for the
/// retired instruction are freed.
struct RetireControlUnit : public HardwareUnit {
// A RUToken is created by the RCU for every instruction dispatched to the
diff --git a/tools/llvm-mca/RetireStage.cpp b/tools/llvm-mca/RetireStage.cpp
index 386ec54d7ba3..55c3b887e478 100644
--- a/tools/llvm-mca/RetireStage.cpp
+++ b/tools/llvm-mca/RetireStage.cpp
@@ -45,10 +45,12 @@ void RetireStage::cycleStart() {
void RetireStage::notifyInstructionRetired(const InstRef &IR) {
LLVM_DEBUG(dbgs() << "[E] Instruction Retired: #" << IR << '\n');
SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ const Instruction &Inst = *IR.getInstruction();
+ const InstrDesc &Desc = Inst.getDesc();
- for (const std::unique_ptr<WriteState> &WS : IR.getInstruction()->getDefs())
- PRF.removeRegisterWrite(*WS.get(), FreedRegs, !Desc.isZeroLatency());
+ bool ShouldFreeRegs = !(Desc.isZeroLatency() && Inst.isDependencyBreaking());
+ for (const std::unique_ptr<WriteState> &WS : Inst.getDefs())
+ PRF.removeRegisterWrite(*WS.get(), FreedRegs, ShouldFreeRegs);
notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
}
diff --git a/tools/llvm-mca/llvm-mca.cpp b/tools/llvm-mca/llvm-mca.cpp
index 2d292f375e6e..897ff232a36d 100644
--- a/tools/llvm-mca/llvm-mca.cpp
+++ b/tools/llvm-mca/llvm-mca.cpp
@@ -96,7 +96,7 @@ static cl::opt<unsigned>
static cl::opt<unsigned>
RegisterFileSize("register-file-size",
- cl::desc("Maximum number of temporary registers which can "
+ cl::desc("Maximum number of physical registers which can "
"be used for register mappings"),
cl::cat(ToolOptions), cl::init(0));
diff --git a/tools/llvm-objcopy/llvm-objcopy.cpp b/tools/llvm-objcopy/llvm-objcopy.cpp
index 4ccc67cc75db..21a1622db765 100644
--- a/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -185,6 +185,11 @@ LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) {
} // end namespace objcopy
} // end namespace llvm
+static bool IsDebugSection(const SectionBase &Sec) {
+ return Sec.Name.startswith(".debug") || Sec.Name.startswith(".zdebug") ||
+ Sec.Name == ".gdb_index";
+}
+
static bool IsDWOSection(const SectionBase &Sec) {
return Sec.Name.endswith(".dwo");
}
@@ -316,8 +321,7 @@ static void HandleArgs(const CopyConfig &Config, Object &Obj,
// Removes:
if (!Config.ToRemove.empty()) {
RemovePred = [&Config](const SectionBase &Sec) {
- return std::find(std::begin(Config.ToRemove), std::end(Config.ToRemove),
- Sec.Name) != std::end(Config.ToRemove);
+ return find(Config.ToRemove, Sec.Name) != Config.ToRemove.end();
};
}
@@ -346,7 +350,7 @@ static void HandleArgs(const CopyConfig &Config, Object &Obj,
case SHT_STRTAB:
return true;
}
- return Sec.Name.startswith(".debug");
+ return IsDebugSection(Sec);
};
if (Config.StripSections) {
@@ -357,7 +361,7 @@ static void HandleArgs(const CopyConfig &Config, Object &Obj,
if (Config.StripDebug) {
RemovePred = [RemovePred](const SectionBase &Sec) {
- return RemovePred(Sec) || Sec.Name.startswith(".debug");
+ return RemovePred(Sec) || IsDebugSection(Sec);
};
}
@@ -385,8 +389,7 @@ static void HandleArgs(const CopyConfig &Config, Object &Obj,
if (!Config.OnlyKeep.empty()) {
RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) {
// Explicitly keep these sections regardless of previous removes.
- if (std::find(std::begin(Config.OnlyKeep), std::end(Config.OnlyKeep),
- Sec.Name) != std::end(Config.OnlyKeep))
+ if (find(Config.OnlyKeep, Sec.Name) != Config.OnlyKeep.end())
return false;
// Allow all implicit removes.
@@ -408,8 +411,7 @@ static void HandleArgs(const CopyConfig &Config, Object &Obj,
if (!Config.Keep.empty()) {
RemovePred = [Config, RemovePred](const SectionBase &Sec) {
// Explicitly keep these sections regardless of previous removes.
- if (std::find(std::begin(Config.Keep), std::end(Config.Keep), Sec.Name) !=
- std::end(Config.Keep))
+ if (find(Config.Keep, Sec.Name) != Config.Keep.end())
return false;
// Otherwise defer to RemovePred.
return RemovePred(Sec);
diff --git a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index b454ab345456..f4e38a32a511 100644
--- a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -760,3 +760,9 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) {
P.formatLine("original type = {0}", UDT.Type);
return Error::success();
}
+
+Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
+ UsingNamespaceSym &UN) {
+ P.format(" `{0}`", UN.Name);
+ return Error::success();
+}
diff --git a/unittests/DebugInfo/CodeView/TypeIndexDiscoveryTest.cpp b/unittests/DebugInfo/CodeView/TypeIndexDiscoveryTest.cpp
index 60ca56b0d143..7ee56042f4be 100644
--- a/unittests/DebugInfo/CodeView/TypeIndexDiscoveryTest.cpp
+++ b/unittests/DebugInfo/CodeView/TypeIndexDiscoveryTest.cpp
@@ -600,4 +600,11 @@ TEST_F(TypeIndexIteratorTest, VariableSizeIntegers) {
BaseClassRecord BaseClass2(MemberAccess::Public, TypeIndex(48), 1);
writeFieldList(BaseClass1, BaseClass2);
checkTypeReferences(0, TypeIndex(47), TypeIndex(48));
-} \ No newline at end of file
+}
+
+TEST_F(TypeIndexIteratorTest, UsingNamespace) {
+ UsingNamespaceSym UN(SymbolRecordKind::UsingNamespaceSym);
+ UN.Name = "std";
+ writeSymbolRecords(UN);
+ checkTypeReferences(0);
+}
diff --git a/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
index 442dea3c52f7..273809fcbd32 100644
--- a/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
+++ b/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
@@ -1122,26 +1122,57 @@ TEST(DWARFDebugInfo, TestRelations) {
EXPECT_EQ(C1.getParent(), C);
EXPECT_EQ(C2.getParent(), C);
- // Make sure bidirectional iterator works as expected.
- auto Begin = A.begin();
- auto End = A.end();
- auto It = A.begin();
-
- EXPECT_EQ(It, Begin);
- EXPECT_EQ(*It, B);
- ++It;
- EXPECT_EQ(*It, C);
- ++It;
- EXPECT_EQ(*It, D);
- ++It;
- EXPECT_EQ(It, End);
- --It;
- EXPECT_EQ(*It, D);
- --It;
- EXPECT_EQ(*It, C);
- --It;
- EXPECT_EQ(*It, B);
- EXPECT_EQ(It, Begin);
+ // Make sure iterators work as expected.
+ EXPECT_THAT(std::vector<DWARFDie>(A.begin(), A.end()),
+ testing::ElementsAre(B, C, D));
+ EXPECT_THAT(std::vector<DWARFDie>(A.rbegin(), A.rend()),
+ testing::ElementsAre(D, C, B));
+
+ // Make sure iterator is bidirectional.
+ {
+ auto Begin = A.begin();
+ auto End = A.end();
+ auto It = A.begin();
+
+ EXPECT_EQ(It, Begin);
+ EXPECT_EQ(*It, B);
+ ++It;
+ EXPECT_EQ(*It, C);
+ ++It;
+ EXPECT_EQ(*It, D);
+ ++It;
+ EXPECT_EQ(It, End);
+ --It;
+ EXPECT_EQ(*It, D);
+ --It;
+ EXPECT_EQ(*It, C);
+ --It;
+ EXPECT_EQ(*It, B);
+ EXPECT_EQ(It, Begin);
+ }
+
+ // Make sure reverse iterator is bidirectional.
+ {
+ auto Begin = A.rbegin();
+ auto End = A.rend();
+ auto It = A.rbegin();
+
+ EXPECT_EQ(It, Begin);
+ EXPECT_EQ(*It, D);
+ ++It;
+ EXPECT_EQ(*It, C);
+ ++It;
+ EXPECT_EQ(*It, B);
+ ++It;
+ EXPECT_EQ(It, End);
+ --It;
+ EXPECT_EQ(*It, B);
+ --It;
+ EXPECT_EQ(*It, C);
+ --It;
+ EXPECT_EQ(*It, D);
+ EXPECT_EQ(It, Begin);
+ }
}
TEST(DWARFDebugInfo, TestDWARFDie) {
diff --git a/unittests/ExecutionEngine/Orc/RPCUtilsTest.cpp b/unittests/ExecutionEngine/Orc/RPCUtilsTest.cpp
index 7fe449b70169..c884aaa718a9 100644
--- a/unittests/ExecutionEngine/Orc/RPCUtilsTest.cpp
+++ b/unittests/ExecutionEngine/Orc/RPCUtilsTest.cpp
@@ -133,10 +133,10 @@ namespace DummyRPCAPI {
};
class AllTheTypes
- : public Function<AllTheTypes,
- void(int8_t, uint8_t, int16_t, uint16_t, int32_t,
- uint32_t, int64_t, uint64_t, bool, std::string,
- std::vector<int>)> {
+ : public Function<AllTheTypes, void(int8_t, uint8_t, int16_t, uint16_t,
+ int32_t, uint32_t, int64_t, uint64_t,
+ bool, std::string, std::vector<int>,
+ std::set<int>, std::map<int, bool>)> {
public:
static const char* getName() { return "AllTheTypes"; }
};
@@ -451,43 +451,50 @@ TEST(DummyRPC, TestSerialization) {
DummyRPCEndpoint Server(*Channels.second);
std::thread ServerThread([&]() {
- Server.addHandler<DummyRPCAPI::AllTheTypes>(
- [&](int8_t S8, uint8_t U8, int16_t S16, uint16_t U16,
- int32_t S32, uint32_t U32, int64_t S64, uint64_t U64,
- bool B, std::string S, std::vector<int> V) {
-
- EXPECT_EQ(S8, -101) << "int8_t serialization broken";
- EXPECT_EQ(U8, 250) << "uint8_t serialization broken";
- EXPECT_EQ(S16, -10000) << "int16_t serialization broken";
- EXPECT_EQ(U16, 10000) << "uint16_t serialization broken";
- EXPECT_EQ(S32, -1000000000) << "int32_t serialization broken";
- EXPECT_EQ(U32, 1000000000ULL) << "uint32_t serialization broken";
- EXPECT_EQ(S64, -10000000000) << "int64_t serialization broken";
- EXPECT_EQ(U64, 10000000000ULL) << "uint64_t serialization broken";
- EXPECT_EQ(B, true) << "bool serialization broken";
- EXPECT_EQ(S, "foo") << "std::string serialization broken";
- EXPECT_EQ(V, std::vector<int>({42, 7}))
- << "std::vector serialization broken";
- return Error::success();
- });
-
- {
- // Poke the server to handle the negotiate call.
- auto Err = Server.handleOne();
- EXPECT_FALSE(!!Err) << "Server failed to handle call to negotiate";
- }
-
- {
- // Poke the server to handle the AllTheTypes call.
- auto Err = Server.handleOne();
- EXPECT_FALSE(!!Err) << "Server failed to handle call to void(bool)";
- }
+ Server.addHandler<DummyRPCAPI::AllTheTypes>([&](int8_t S8, uint8_t U8,
+ int16_t S16, uint16_t U16,
+ int32_t S32, uint32_t U32,
+ int64_t S64, uint64_t U64,
+ bool B, std::string S,
+ std::vector<int> V,
+ std::set<int> S2,
+ std::map<int, bool> M) {
+ EXPECT_EQ(S8, -101) << "int8_t serialization broken";
+ EXPECT_EQ(U8, 250) << "uint8_t serialization broken";
+ EXPECT_EQ(S16, -10000) << "int16_t serialization broken";
+ EXPECT_EQ(U16, 10000) << "uint16_t serialization broken";
+ EXPECT_EQ(S32, -1000000000) << "int32_t serialization broken";
+ EXPECT_EQ(U32, 1000000000ULL) << "uint32_t serialization broken";
+ EXPECT_EQ(S64, -10000000000) << "int64_t serialization broken";
+ EXPECT_EQ(U64, 10000000000ULL) << "uint64_t serialization broken";
+ EXPECT_EQ(B, true) << "bool serialization broken";
+ EXPECT_EQ(S, "foo") << "std::string serialization broken";
+ EXPECT_EQ(V, std::vector<int>({42, 7}))
+ << "std::vector serialization broken";
+ EXPECT_EQ(S2, std::set<int>({7, 42})) << "std::set serialization broken";
+ EXPECT_EQ(M, (std::map<int, bool>({{7, false}, {42, true}})))
+ << "std::map serialization broken";
+ return Error::success();
});
+ {
+ // Poke the server to handle the negotiate call.
+ auto Err = Server.handleOne();
+ EXPECT_FALSE(!!Err) << "Server failed to handle call to negotiate";
+ }
+
+ {
+ // Poke the server to handle the AllTheTypes call.
+ auto Err = Server.handleOne();
+ EXPECT_FALSE(!!Err) << "Server failed to handle call to void(bool)";
+ }
+ });
{
// Make an async call.
- std::vector<int> v({42, 7});
+ std::vector<int> V({42, 7});
+ std::set<int> S({7, 42});
+ std::map<int, bool> M({{7, false}, {42, true}});
auto Err = Client.callAsync<DummyRPCAPI::AllTheTypes>(
[](Error Err) {
EXPECT_FALSE(!!Err) << "Async AllTheTypes response handler failed";
@@ -497,7 +504,7 @@ TEST(DummyRPC, TestSerialization) {
static_cast<int16_t>(-10000), static_cast<uint16_t>(10000),
static_cast<int32_t>(-1000000000), static_cast<uint32_t>(1000000000),
static_cast<int64_t>(-10000000000), static_cast<uint64_t>(10000000000),
- true, std::string("foo"), v);
+ true, std::string("foo"), V, S, M);
EXPECT_FALSE(!!Err) << "Client.callAsync failed for AllTheTypes";
}
diff --git a/unittests/Transforms/Vectorize/CMakeLists.txt b/unittests/Transforms/Vectorize/CMakeLists.txt
index 4f2b8e9a139c..5a9142d17b1e 100644
--- a/unittests/Transforms/Vectorize/CMakeLists.txt
+++ b/unittests/Transforms/Vectorize/CMakeLists.txt
@@ -6,6 +6,8 @@ set(LLVM_LINK_COMPONENTS
)
add_llvm_unittest(VectorizeTests
+ VPlanDominatorTreeTest.cpp
+ VPlanLoopInfoTest.cpp
VPlanTest.cpp
VPlanHCFGTest.cpp
)
diff --git a/unittests/Transforms/Vectorize/VPlanDominatorTreeTest.cpp b/unittests/Transforms/Vectorize/VPlanDominatorTreeTest.cpp
new file mode 100644
index 000000000000..57f07392d8b8
--- /dev/null
+++ b/unittests/Transforms/Vectorize/VPlanDominatorTreeTest.cpp
@@ -0,0 +1,196 @@
+//===- llvm/unittests/Transforms/Vectorize/VPlanDominatorTreeTest.cpp -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../lib/Transforms/Vectorize/VPlanHCFGBuilder.h"
+#include "VPlanTestBase.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+class VPlanDominatorTreeTest : public VPlanTestBase {};
+
+TEST_F(VPlanDominatorTreeTest, BasicVPBBDomination) {
+ const char *ModuleString =
+ "define void @f(i32* %a, i32* %b, i32* %c, i32 %N, i32 %M, i32 %K) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body:\n"
+ " %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]\n"
+ " br i1 true, label %if.then, label %if.else\n"
+ "if.then:\n"
+ " br label %for.inc\n"
+ "if.else:\n"
+ " br label %for.inc\n"
+ "for.inc:\n"
+ " %iv.next = add nuw nsw i64 %iv, 1\n"
+ " %exitcond = icmp eq i64 %iv.next, 300\n"
+ " br i1 %exitcond, label %for.end, label %for.body\n"
+ "for.end:\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("f");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildPlainCFG(LoopHeader);
+
+ // Build VPlan domination tree analysis.
+ VPRegionBlock *TopRegion = cast<VPRegionBlock>(Plan->getEntry());
+ VPDominatorTree VPDT;
+ VPDT.recalculate(*TopRegion);
+
+ VPBlockBase *PH = TopRegion->getEntry();
+ VPBlockBase *H = PH->getSingleSuccessor();
+ VPBlockBase *IfThen = H->getSuccessors()[0];
+ VPBlockBase *IfElse = H->getSuccessors()[1];
+ VPBlockBase *Latch = IfThen->getSingleSuccessor();
+ VPBlockBase *Exit = Latch->getSuccessors()[0] != H
+ ? Latch->getSuccessors()[0]
+ : Latch->getSuccessors()[1];
+ // Reachability.
+ EXPECT_TRUE(VPDT.isReachableFromEntry(PH));
+ EXPECT_TRUE(VPDT.isReachableFromEntry(H));
+ EXPECT_TRUE(VPDT.isReachableFromEntry(IfThen));
+ EXPECT_TRUE(VPDT.isReachableFromEntry(IfElse));
+ EXPECT_TRUE(VPDT.isReachableFromEntry(Latch));
+ EXPECT_TRUE(VPDT.isReachableFromEntry(Exit));
+
+ // VPBB dominance.
+ EXPECT_TRUE(VPDT.dominates(PH, PH));
+ EXPECT_TRUE(VPDT.dominates(PH, H));
+ EXPECT_TRUE(VPDT.dominates(PH, IfThen));
+ EXPECT_TRUE(VPDT.dominates(PH, IfElse));
+ EXPECT_TRUE(VPDT.dominates(PH, Latch));
+ EXPECT_TRUE(VPDT.dominates(PH, Exit));
+
+ EXPECT_FALSE(VPDT.dominates(H, PH));
+ EXPECT_TRUE(VPDT.dominates(H, H));
+ EXPECT_TRUE(VPDT.dominates(H, IfThen));
+ EXPECT_TRUE(VPDT.dominates(H, IfElse));
+ EXPECT_TRUE(VPDT.dominates(H, Latch));
+ EXPECT_TRUE(VPDT.dominates(H, Exit));
+
+ EXPECT_FALSE(VPDT.dominates(IfThen, PH));
+ EXPECT_FALSE(VPDT.dominates(IfThen, H));
+ EXPECT_TRUE(VPDT.dominates(IfThen, IfThen));
+ EXPECT_FALSE(VPDT.dominates(IfThen, IfElse));
+ EXPECT_FALSE(VPDT.dominates(IfThen, Latch));
+ EXPECT_FALSE(VPDT.dominates(IfThen, Exit));
+
+ EXPECT_FALSE(VPDT.dominates(IfElse, PH));
+ EXPECT_FALSE(VPDT.dominates(IfElse, H));
+ EXPECT_FALSE(VPDT.dominates(IfElse, IfThen));
+ EXPECT_TRUE(VPDT.dominates(IfElse, IfElse));
+ EXPECT_FALSE(VPDT.dominates(IfElse, Latch));
+ EXPECT_FALSE(VPDT.dominates(IfElse, Exit));
+
+ EXPECT_FALSE(VPDT.dominates(Latch, PH));
+ EXPECT_FALSE(VPDT.dominates(Latch, H));
+ EXPECT_FALSE(VPDT.dominates(Latch, IfThen));
+ EXPECT_FALSE(VPDT.dominates(Latch, IfElse));
+ EXPECT_TRUE(VPDT.dominates(Latch, Latch));
+ EXPECT_TRUE(VPDT.dominates(Latch, Exit));
+
+ EXPECT_FALSE(VPDT.dominates(Exit, PH));
+ EXPECT_FALSE(VPDT.dominates(Exit, H));
+ EXPECT_FALSE(VPDT.dominates(Exit, IfThen));
+ EXPECT_FALSE(VPDT.dominates(Exit, IfElse));
+ EXPECT_FALSE(VPDT.dominates(Exit, Latch));
+ EXPECT_TRUE(VPDT.dominates(Exit, Exit));
+
+ // VPBB proper dominance.
+ EXPECT_FALSE(VPDT.properlyDominates(PH, PH));
+ EXPECT_TRUE(VPDT.properlyDominates(PH, H));
+ EXPECT_TRUE(VPDT.properlyDominates(PH, IfThen));
+ EXPECT_TRUE(VPDT.properlyDominates(PH, IfElse));
+ EXPECT_TRUE(VPDT.properlyDominates(PH, Latch));
+ EXPECT_TRUE(VPDT.properlyDominates(PH, Exit));
+
+ EXPECT_FALSE(VPDT.properlyDominates(H, PH));
+ EXPECT_FALSE(VPDT.properlyDominates(H, H));
+ EXPECT_TRUE(VPDT.properlyDominates(H, IfThen));
+ EXPECT_TRUE(VPDT.properlyDominates(H, IfElse));
+ EXPECT_TRUE(VPDT.properlyDominates(H, Latch));
+ EXPECT_TRUE(VPDT.properlyDominates(H, Exit));
+
+ EXPECT_FALSE(VPDT.properlyDominates(IfThen, PH));
+ EXPECT_FALSE(VPDT.properlyDominates(IfThen, H));
+ EXPECT_FALSE(VPDT.properlyDominates(IfThen, IfThen));
+ EXPECT_FALSE(VPDT.properlyDominates(IfThen, IfElse));
+ EXPECT_FALSE(VPDT.properlyDominates(IfThen, Latch));
+ EXPECT_FALSE(VPDT.properlyDominates(IfThen, Exit));
+
+ EXPECT_FALSE(VPDT.properlyDominates(IfElse, PH));
+ EXPECT_FALSE(VPDT.properlyDominates(IfElse, H));
+ EXPECT_FALSE(VPDT.properlyDominates(IfElse, IfThen));
+ EXPECT_FALSE(VPDT.properlyDominates(IfElse, IfElse));
+ EXPECT_FALSE(VPDT.properlyDominates(IfElse, Latch));
+ EXPECT_FALSE(VPDT.properlyDominates(IfElse, Exit));
+
+ EXPECT_FALSE(VPDT.properlyDominates(Latch, PH));
+ EXPECT_FALSE(VPDT.properlyDominates(Latch, H));
+ EXPECT_FALSE(VPDT.properlyDominates(Latch, IfThen));
+ EXPECT_FALSE(VPDT.properlyDominates(Latch, IfElse));
+ EXPECT_FALSE(VPDT.properlyDominates(Latch, Latch));
+ EXPECT_TRUE(VPDT.properlyDominates(Latch, Exit));
+
+ EXPECT_FALSE(VPDT.properlyDominates(Exit, PH));
+ EXPECT_FALSE(VPDT.properlyDominates(Exit, H));
+ EXPECT_FALSE(VPDT.properlyDominates(Exit, IfThen));
+ EXPECT_FALSE(VPDT.properlyDominates(Exit, IfElse));
+ EXPECT_FALSE(VPDT.properlyDominates(Exit, Latch));
+ EXPECT_FALSE(VPDT.properlyDominates(Exit, Exit));
+
+ // VPBB nearest common dominator.
+ EXPECT_EQ(PH, VPDT.findNearestCommonDominator(PH, PH));
+ EXPECT_EQ(PH, VPDT.findNearestCommonDominator(PH, H));
+ EXPECT_EQ(PH, VPDT.findNearestCommonDominator(PH, IfThen));
+ EXPECT_EQ(PH, VPDT.findNearestCommonDominator(PH, IfElse));
+ EXPECT_EQ(PH, VPDT.findNearestCommonDominator(PH, Latch));
+ EXPECT_EQ(PH, VPDT.findNearestCommonDominator(PH, Exit));
+
+ EXPECT_EQ(PH, VPDT.findNearestCommonDominator(H, PH));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(H, H));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(H, IfThen));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(H, IfElse));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(H, Latch));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(H, Exit));
+
+ EXPECT_EQ(PH, VPDT.findNearestCommonDominator(IfThen, PH));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(IfThen, H));
+ EXPECT_EQ(IfThen, VPDT.findNearestCommonDominator(IfThen, IfThen));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(IfThen, IfElse));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(IfThen, Latch));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(IfThen, Exit));
+
+ EXPECT_EQ(PH, VPDT.findNearestCommonDominator(IfElse, PH));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(IfElse, H));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(IfElse, IfThen));
+ EXPECT_EQ(IfElse, VPDT.findNearestCommonDominator(IfElse, IfElse));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(IfElse, Latch));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(IfElse, Exit));
+
+ EXPECT_EQ(PH, VPDT.findNearestCommonDominator(Latch, PH));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(Latch, H));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(Latch, IfThen));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(Latch, IfElse));
+ EXPECT_EQ(Latch, VPDT.findNearestCommonDominator(Latch, Latch));
+ EXPECT_EQ(Latch, VPDT.findNearestCommonDominator(Latch, Exit));
+
+ EXPECT_EQ(PH, VPDT.findNearestCommonDominator(Exit, PH));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(Exit, H));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(Exit, IfThen));
+ EXPECT_EQ(H, VPDT.findNearestCommonDominator(Exit, IfElse));
+ EXPECT_EQ(Latch, VPDT.findNearestCommonDominator(Exit, Latch));
+ EXPECT_EQ(Exit, VPDT.findNearestCommonDominator(Exit, Exit));
+}
+} // namespace
+} // namespace llvm
diff --git a/unittests/Transforms/Vectorize/VPlanLoopInfoTest.cpp b/unittests/Transforms/Vectorize/VPlanLoopInfoTest.cpp
new file mode 100644
index 000000000000..55486ba0f14e
--- /dev/null
+++ b/unittests/Transforms/Vectorize/VPlanLoopInfoTest.cpp
@@ -0,0 +1,87 @@
+//===- llvm/unittests/Transforms/Vectorize/VPlanLoopInfoTest.cpp -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../lib/Transforms/Vectorize/VPlanLoopInfo.h"
+#include "VPlanTestBase.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+class VPlanLoopInfo : public VPlanTestBase {};
+
+TEST_F(VPlanLoopInfo, BasicLoopInfoTest) {
+ const char *ModuleString =
+ "define void @f(i32* %a, i32* %b, i32* %c, i32 %N, i32 %M, i32 %K) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body:\n"
+ " %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]\n"
+ " br i1 true, label %if.then, label %if.else\n"
+ "if.then:\n"
+ " br label %for.inc\n"
+ "if.else:\n"
+ " br label %for.inc\n"
+ "for.inc:\n"
+ " %iv.next = add nuw nsw i64 %iv, 1\n"
+ " %exitcond = icmp eq i64 %iv.next, 300\n"
+ " br i1 %exitcond, label %for.end, label %for.body\n"
+ "for.end:\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("f");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildHCFG(LoopHeader);
+
+ // Build VPlan domination tree and loop info analyses.
+ VPRegionBlock *TopRegion = cast<VPRegionBlock>(Plan->getEntry());
+ VPDominatorTree VPDT;
+ VPDT.recalculate(*TopRegion);
+ VPLoopInfo VPLI;
+ VPLI.analyze(VPDT);
+
+ VPBlockBase *PH = TopRegion->getEntry();
+ VPBlockBase *H = PH->getSingleSuccessor();
+ VPBlockBase *IfThen = H->getSuccessors()[0];
+ VPBlockBase *IfElse = H->getSuccessors()[1];
+ VPBlockBase *Latch = IfThen->getSingleSuccessor();
+ VPBlockBase *Exit = Latch->getSuccessors()[0] != H
+ ? Latch->getSuccessors()[0]
+ : Latch->getSuccessors()[1];
+
+ // Number of loops.
+ EXPECT_EQ(1, std::distance(VPLI.begin(), VPLI.end()));
+ VPLoop *VPLp = *VPLI.begin();
+
+ // VPBBs contained in VPLoop.
+ EXPECT_FALSE(VPLp->contains(PH));
+ EXPECT_EQ(nullptr, VPLI.getLoopFor(PH));
+ EXPECT_TRUE(VPLp->contains(H));
+ EXPECT_EQ(VPLp, VPLI.getLoopFor(H));
+ EXPECT_TRUE(VPLp->contains(IfThen));
+ EXPECT_EQ(VPLp, VPLI.getLoopFor(IfThen));
+ EXPECT_TRUE(VPLp->contains(IfElse));
+ EXPECT_EQ(VPLp, VPLI.getLoopFor(IfElse));
+ EXPECT_TRUE(VPLp->contains(Latch));
+ EXPECT_EQ(VPLp, VPLI.getLoopFor(Latch));
+ EXPECT_FALSE(VPLp->contains(Exit));
+ EXPECT_EQ(nullptr, VPLI.getLoopFor(Exit));
+
+ // VPLoop's parts.
+ EXPECT_EQ(PH, VPLp->getLoopPreheader());
+ EXPECT_EQ(H, VPLp->getHeader());
+ EXPECT_EQ(Latch, VPLp->getLoopLatch());
+ EXPECT_EQ(Latch, VPLp->getExitingBlock());
+ EXPECT_EQ(Exit, VPLp->getExitBlock());
+}
+} // namespace
+} // namespace llvm
diff --git a/unittests/Transforms/Vectorize/VPlanTestBase.h b/unittests/Transforms/Vectorize/VPlanTestBase.h
index da3b39f4df01..0fc4a9a21c88 100644
--- a/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -50,8 +50,19 @@ protected:
doAnalysis(*LoopHeader->getParent());
auto Plan = llvm::make_unique<VPlan>();
- VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get());
- HCFGBuilder.buildHierarchicalCFG(*Plan.get());
+ VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan);
+ HCFGBuilder.buildHierarchicalCFG();
+ return Plan;
+ }
+
+ /// Build the VPlan plain CFG for the loop starting from \p LoopHeader.
+ VPlanPtr buildPlainCFG(BasicBlock *LoopHeader) {
+ doAnalysis(*LoopHeader->getParent());
+
+ auto Plan = llvm::make_unique<VPlan>();
+ VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan);
+ VPRegionBlock *TopRegion = HCFGBuilder.buildPlainCFG();
+ Plan->setEntry(TopRegion);
return Plan;
}
};
diff --git a/utils/LLVMVisualizers/llvm.natvis b/utils/LLVMVisualizers/llvm.natvis
index 963c94f7e116..2310edf5f3da 100644
--- a/utils/LLVMVisualizers/llvm.natvis
+++ b/utils/LLVMVisualizers/llvm.natvis
@@ -68,6 +68,10 @@ For later versions of Visual Studio, no setup is required.
</Expand>
</Type>
+ <Type Name="StringView">
+ <DisplayString>{First,[Last - First]s}</DisplayString>
+ </Type>
+
<Type Name="llvm::StringRef">
<DisplayString>{Data,[Length]s}</DisplayString>
<StringView>Data,[Length]s</StringView>
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 1abe3a88bfbf..cc2b9d788980 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -3946,6 +3946,24 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
return false;
}
+// Promote xform function to be an explicit node wherever set.
+static TreePatternNodePtr PromoteXForms(TreePatternNodePtr N) {
+ if (Record *Xform = N->getTransformFn()) {
+ N->setTransformFn(nullptr);
+ std::vector<TreePatternNodePtr> Children;
+ Children.push_back(PromoteXForms(N));
+ return std::make_shared<TreePatternNode>(Xform, std::move(Children),
+ N->getNumTypes());
+ }
+
+ if (!N->isLeaf())
+ for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
+ TreePatternNodePtr Child = N->getChildShared(i);
+ N->setChild(i, PromoteXForms(Child));
+ }
+ return N;
+}
+
void CodeGenDAGPatterns::ParseOnePattern(Record *TheDef,
TreePattern &Pattern, TreePattern &Result,
const std::vector<Record *> &InstImpResults) {
@@ -4011,30 +4029,8 @@ void CodeGenDAGPatterns::ParseOnePattern(Record *TheDef,
Result.error("Could not infer all types in pattern result!");
}
- // Promote the xform function to be an explicit node if set.
- const TreePatternNodePtr &DstPattern = Result.getOnlyTree();
- std::vector<TreePatternNodePtr> ResultNodeOperands;
- for (unsigned ii = 0, ee = DstPattern->getNumChildren(); ii != ee; ++ii) {
- TreePatternNodePtr OpNode = DstPattern->getChildShared(ii);
- if (Record *Xform = OpNode->getTransformFn()) {
- OpNode->setTransformFn(nullptr);
- std::vector<TreePatternNodePtr> Children;
- Children.push_back(OpNode);
- OpNode = std::make_shared<TreePatternNode>(Xform, std::move(Children),
- OpNode->getNumTypes());
- }
- ResultNodeOperands.push_back(OpNode);
- }
-
- TreePatternNodePtr DstShared =
- DstPattern->isLeaf()
- ? DstPattern
- : std::make_shared<TreePatternNode>(DstPattern->getOperator(),
- std::move(ResultNodeOperands),
- DstPattern->getNumTypes());
-
- for (unsigned i = 0, e = Result.getOnlyTree()->getNumTypes(); i != e; ++i)
- DstShared->setType(i, Result.getOnlyTree()->getExtType(i));
+ // Promote xform function to be an explicit node wherever set.
+ TreePatternNodePtr DstShared = PromoteXForms(Result.getOnlyTree());
TreePattern Temp(Result.getRecord(), DstShared, false, *this);
Temp.InferAllTypes();