aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-07-01 13:22:02 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-07-01 13:22:02 +0000
commit9df3605dea17e84f8183581f6103bd0c79e2a606 (patch)
tree70a2f36ce9eb9bb213603cd7f2f120af53fc176f
parent08bbd35a80bf7765fe0d3043f9eb5a2f2786b649 (diff)
downloadsrc-9df3605dea17e84f8183581f6103bd0c79e2a606.tar.gz
src-9df3605dea17e84f8183581f6103bd0c79e2a606.zip
Vendor import of llvm trunk r306956:vendor/llvm/llvm-trunk-r306956
Notes
Notes: svn path=/vendor/llvm/dist/; revision=320533 svn path=/vendor/llvm/llvm-trunk-r306956/; revision=320534; tag=vendor/llvm/llvm-trunk-r306956
-rw-r--r--CMakeLists.txt2
-rw-r--r--CODE_OWNERS.TXT2
-rw-r--r--CREDITS.TXT7
-rw-r--r--bindings/ocaml/target/target_ocaml.c2
-rw-r--r--docs/CMake.rst7
-rw-r--r--docs/CommandGuide/llvm-nm.rst3
-rw-r--r--docs/Docker.rst205
-rw-r--r--docs/ReleaseNotes.rst14
-rw-r--r--docs/XRay.rst2
-rw-r--r--docs/index.rst4
-rw-r--r--include/llvm-c/Transforms/Vectorize.h2
-rw-r--r--include/llvm/Analysis/AliasSetTracker.h13
-rw-r--r--include/llvm/Analysis/CFLAliasAnalysisUtils.h58
-rw-r--r--include/llvm/Analysis/CFLAndersAliasAnalysis.h27
-rw-r--r--include/llvm/Analysis/CFLSteensAliasAnalysis.h24
-rw-r--r--include/llvm/Analysis/IteratedDominanceFrontier.h1
-rw-r--r--include/llvm/Analysis/MemorySSA.h10
-rw-r--r--include/llvm/Analysis/OptimizationDiagnosticInfo.h6
-rw-r--r--include/llvm/Analysis/RegionInfo.h211
-rw-r--r--include/llvm/Analysis/RegionInfoImpl.h39
-rw-r--r--include/llvm/Analysis/RegionIterator.h77
-rw-r--r--include/llvm/Analysis/ScalarEvolution.h22
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpressions.h28
-rw-r--r--include/llvm/Analysis/TargetTransformInfo.h34
-rw-r--r--include/llvm/Analysis/TargetTransformInfoImpl.h11
-rw-r--r--include/llvm/BinaryFormat/COFF.h2
-rw-r--r--include/llvm/BinaryFormat/Dwarf.h3
-rw-r--r--include/llvm/BinaryFormat/Wasm.h11
-rw-r--r--include/llvm/Bitcode/BitcodeReader.h7
-rw-r--r--include/llvm/Bitcode/BitcodeWriter.h20
-rw-r--r--include/llvm/Bitcode/LLVMBitCodes.h8
-rw-r--r--include/llvm/CodeGen/BasicTTIImpl.h3
-rw-r--r--include/llvm/CodeGen/GlobalISel/CallLowering.h40
-rw-r--r--include/llvm/CodeGen/GlobalISel/IRTranslator.h30
-rw-r--r--include/llvm/CodeGen/GlobalISel/InstructionSelector.h15
-rw-r--r--include/llvm/CodeGen/GlobalISel/LegalizerInfo.h59
-rw-r--r--include/llvm/CodeGen/GlobalISel/RegBankSelect.h52
-rw-r--r--include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h44
-rw-r--r--include/llvm/CodeGen/GlobalISel/Types.h12
-rw-r--r--include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h2
-rw-r--r--include/llvm/CodeGen/MachinePassRegistry.h37
-rw-r--r--include/llvm/CodeGen/MachineScheduler.h2
-rw-r--r--include/llvm/CodeGen/MachineValueType.h24
-rw-r--r--include/llvm/CodeGen/MacroFusion.h23
-rw-r--r--include/llvm/CodeGen/PseudoSourceValue.h6
-rw-r--r--include/llvm/CodeGen/SelectionDAGAddressAnalysis.h2
-rw-r--r--include/llvm/CodeGen/SelectionDAGNodes.h2
-rw-r--r--include/llvm/CodeGen/TargetPassConfig.h42
-rw-r--r--include/llvm/CodeGen/ValueTypes.h18
-rw-r--r--include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h2
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h32
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h18
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h31
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h32
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h25
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h14
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h30
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h18
-rw-r--r--include/llvm/DebugInfo/CodeView/EnumTables.h11
-rw-r--r--include/llvm/DebugInfo/CodeView/Formatters.h24
-rw-r--r--include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h11
-rw-r--r--include/llvm/DebugInfo/CodeView/StringsAndChecksums.h22
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolDeserializer.h4
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolRecord.h19
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolSerializer.h32
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h8
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h6
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h2
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeRecord.h30
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeSerializer.h29
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeServerHandler.h12
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h23
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h4
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFContext.h20
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h48
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h4
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugLine.h13
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h11
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h4
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFFormValue.h11
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h2
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFUnit.h21
-rw-r--r--include/llvm/DebugInfo/PDB/IPDBDataStream.h16
-rw-r--r--include/llvm/DebugInfo/PDB/IPDBEnumChildren.h4
-rw-r--r--include/llvm/DebugInfo/PDB/Native/DbiModuleList.h22
-rw-r--r--include/llvm/DebugInfo/PDB/Native/Hash.h14
-rw-r--r--include/llvm/DebugInfo/PDB/Native/HashTable.h25
-rw-r--r--include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h23
-rw-r--r--include/llvm/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.h0
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h15
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h12
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeSession.h10
-rw-r--r--include/llvm/DebugInfo/PDB/PDB.h16
-rw-r--r--include/llvm/DebugInfo/PDB/PDBExtras.h18
-rw-r--r--include/llvm/DebugInfo/PDB/PDBTypes.h45
-rw-r--r--include/llvm/DebugInfo/PDB/UDTLayout.h43
-rw-r--r--include/llvm/ExecutionEngine/Orc/IRTransformLayer.h2
-rw-r--r--include/llvm/IR/Argument.h2
-rw-r--r--include/llvm/IR/BasicBlock.h2
-rw-r--r--include/llvm/IR/Constant.h2
-rw-r--r--include/llvm/IR/Constants.h4
-rw-r--r--include/llvm/IR/DerivedTypes.h16
-rw-r--r--include/llvm/IR/Dominators.h10
-rw-r--r--include/llvm/IR/Function.h2
-rw-r--r--include/llvm/IR/GlobalAlias.h2
-rw-r--r--include/llvm/IR/GlobalIFunc.h2
-rw-r--r--include/llvm/IR/GlobalIndirectSymbol.h2
-rw-r--r--include/llvm/IR/GlobalObject.h2
-rw-r--r--include/llvm/IR/GlobalVariable.h2
-rw-r--r--include/llvm/IR/InlineAsm.h2
-rw-r--r--include/llvm/IR/InstrTypes.h24
-rw-r--r--include/llvm/IR/Instruction.h2
-rw-r--r--include/llvm/IR/Instructions.h176
-rw-r--r--include/llvm/IR/IntrinsicInst.h68
-rw-r--r--include/llvm/IR/IntrinsicsWebAssembly.td4
-rw-r--r--include/llvm/IR/LLVMContext.h13
-rw-r--r--include/llvm/IR/LegacyPassNameParser.h10
-rw-r--r--include/llvm/IR/Metadata.h13
-rw-r--r--include/llvm/IR/ModuleSummaryIndexYAML.h3
-rw-r--r--include/llvm/IR/Operator.h30
-rw-r--r--include/llvm/IR/PatternMatch.h12
-rw-r--r--include/llvm/IR/Statepoint.h12
-rw-r--r--include/llvm/IR/User.h2
-rw-r--r--include/llvm/InitializePasses.h1
-rw-r--r--include/llvm/LinkAllPasses.h1
-rw-r--r--include/llvm/MC/MCAsmBackend.h11
-rw-r--r--include/llvm/MC/MCWinCOFFObjectWriter.h1
-rw-r--r--include/llvm/Object/Archive.h2
-rw-r--r--include/llvm/Object/COFF.h2
-rw-r--r--include/llvm/Object/COFFImportFile.h2
-rw-r--r--include/llvm/Object/ELFObjectFile.h4
-rw-r--r--include/llvm/Object/IRObjectFile.h2
-rw-r--r--include/llvm/Object/IRSymtab.h14
-rw-r--r--include/llvm/Object/MachOUniversal.h2
-rw-r--r--include/llvm/Object/ObjectFile.h2
-rw-r--r--include/llvm/Object/SymbolicFile.h2
-rw-r--r--include/llvm/Object/Wasm.h3
-rw-r--r--include/llvm/ObjectYAML/COFFYAML.h113
-rw-r--r--include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h32
-rw-r--r--include/llvm/ObjectYAML/CodeViewYAMLSymbols.h16
-rw-r--r--include/llvm/ObjectYAML/CodeViewYAMLTypes.h29
-rw-r--r--include/llvm/ObjectYAML/DWARFEmitter.h27
-rw-r--r--include/llvm/ObjectYAML/DWARFYAML.h21
-rw-r--r--include/llvm/ObjectYAML/ELFYAML.h22
-rw-r--r--include/llvm/ObjectYAML/MachOYAML.h45
-rw-r--r--include/llvm/ObjectYAML/ObjectYAML.h9
-rw-r--r--include/llvm/ObjectYAML/WasmYAML.h28
-rw-r--r--include/llvm/ObjectYAML/YAML.h30
-rw-r--r--include/llvm/Passes/PassBuilder.h2
-rw-r--r--include/llvm/ProfileData/Coverage/CoverageMapping.h23
-rw-r--r--include/llvm/ProfileData/InstrProf.h81
-rw-r--r--include/llvm/Support/CMakeLists.txt38
-rw-r--r--include/llvm/Support/Errno.h11
-rw-r--r--include/llvm/Support/GenericDomTree.h444
-rw-r--r--include/llvm/Support/GenericDomTreeConstruction.h659
-rw-r--r--include/llvm/Support/TargetParser.h3
-rw-r--r--include/llvm/Support/YAMLParser.h14
-rw-r--r--include/llvm/Support/YAMLTraits.h132
-rw-r--r--include/llvm/Target/GenericOpcodes.td34
-rw-r--r--include/llvm/Target/GlobalISel/SelectionDAGCompat.td2
-rw-r--r--include/llvm/Target/TargetLowering.h4
-rw-r--r--include/llvm/Target/TargetOpcodes.def14
-rw-r--r--include/llvm/Transforms/IPO/PassManagerBuilder.h1
-rw-r--r--include/llvm/Transforms/SampleProfile.h4
-rw-r--r--include/llvm/Transforms/Scalar/ConstantHoisting.h2
-rw-r--r--include/llvm/Transforms/Utils/LoopUtils.h11
-rw-r--r--include/llvm/Transforms/Utils/OrderedInstructions.h2
-rw-r--r--include/llvm/Transforms/Utils/PredicateInfo.h18
-rw-r--r--include/llvm/Transforms/Utils/ValueMapper.h2
-rw-r--r--include/llvm/Transforms/Vectorize.h7
-rw-r--r--lib/Analysis/CFLAndersAliasAnalysis.cpp15
-rw-r--r--lib/Analysis/CFLSteensAliasAnalysis.cpp29
-rw-r--r--lib/Analysis/InlineCost.cpp33
-rw-r--r--lib/Analysis/IteratedDominanceFrontier.cpp12
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp4
-rw-r--r--lib/Analysis/OptimizationDiagnosticInfo.cpp13
-rw-r--r--lib/Analysis/RegionInfo.cpp40
-rw-r--r--lib/Analysis/ScalarEvolution.cpp243
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp9
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp20
-rw-r--r--lib/BinaryFormat/Magic.cpp4
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp32
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp44
-rw-r--r--lib/Bitcode/Writer/LLVMBuild.txt2
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp22
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.h1
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp20
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h10
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp20
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h9
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp111
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp67
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp20
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp68
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp37
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp24
-rw-r--r--lib/CodeGen/GlobalISel/RegBankSelect.cpp57
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp68
-rw-r--r--lib/CodeGen/LiveRangeCalc.h4
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp6
-rw-r--r--lib/CodeGen/MachineOptimizationRemarkEmitter.cpp10
-rw-r--r--lib/CodeGen/MacroFusion.cpp27
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp170
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp9
-rw-r--r--lib/CodeGen/RenameIndependentSubregs.cpp6
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp118
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp21
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp34
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp2
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp36
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp11
-rw-r--r--lib/DebugInfo/CodeView/CVSymbolVisitor.cpp28
-rw-r--r--lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp16
-rw-r--r--lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp6
-rw-r--r--lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp14
-rw-r--r--lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp14
-rw-r--r--lib/DebugInfo/CodeView/DebugLinesSubsection.cpp13
-rw-r--r--lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp11
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp14
-rw-r--r--lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp7
-rw-r--r--lib/DebugInfo/CodeView/EnumTables.cpp24
-rw-r--r--lib/DebugInfo/CodeView/Formatters.cpp8
-rw-r--r--lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp18
-rw-r--r--lib/DebugInfo/CodeView/StringsAndChecksums.cpp8
-rw-r--r--lib/DebugInfo/CodeView/SymbolSerializer.cpp11
-rw-r--r--lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp19
-rw-r--r--lib/DebugInfo/CodeView/TypeSerializer.cpp36
-rw-r--r--lib/DebugInfo/DWARF/CMakeLists.txt1
-rw-r--r--lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp42
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp87
-rw-r--r--lib/DebugInfo/DWARF/DWARFDataExtractor.cpp24
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp4
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLine.cpp22
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLoc.cpp8
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp11
-rw-r--r--lib/DebugInfo/DWARF/DWARFDie.cpp2
-rw-r--r--lib/DebugInfo/DWARF/DWARFFormValue.cpp20
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnit.cpp42
-rw-r--r--lib/DebugInfo/DWARF/DWARFVerifier.cpp73
-rw-r--r--lib/DebugInfo/PDB/Native/DbiModuleList.cpp11
-rw-r--r--lib/DebugInfo/PDB/Native/Hash.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/HashTable.cpp16
-rw-r--r--lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp6
-rw-r--r--lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp0
-rw-r--r--lib/DebugInfo/PDB/Native/NamedStreamMap.cpp9
-rw-r--r--lib/DebugInfo/PDB/Native/NativeEnumModules.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp29
-rw-r--r--lib/DebugInfo/PDB/Native/NativeSession.cpp21
-rw-r--r--lib/DebugInfo/PDB/PDB.cpp12
-rw-r--r--lib/DebugInfo/PDB/PDBExtras.cpp4
-rw-r--r--lib/DebugInfo/PDB/UDTLayout.cpp18
-rw-r--r--lib/ExecutionEngine/Orc/OrcMCJITReplacement.h10
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp4
-rw-r--r--lib/IR/Constants.cpp2
-rw-r--r--lib/IR/Dominators.cpp18
-rw-r--r--lib/IR/LLVMContext.cpp15
-rw-r--r--lib/IR/LLVMContextImpl.h3
-rw-r--r--lib/LTO/LTO.cpp38
-rw-r--r--lib/MC/MCAssembler.cpp6
-rw-r--r--lib/MC/WasmObjectWriter.cpp24
-rw-r--r--lib/Object/CMakeLists.txt1
-rw-r--r--lib/Object/COFFObjectFile.cpp25
-rw-r--r--lib/Object/IRSymtab.cpp48
-rw-r--r--lib/Object/WasmObjectFile.cpp16
-rw-r--r--lib/Object/WindowsResource.cpp2
-rw-r--r--lib/ObjectYAML/COFFYAML.cpp62
-rw-r--r--lib/ObjectYAML/CodeViewYAMLDebugSections.cpp53
-rw-r--r--lib/ObjectYAML/CodeViewYAMLSymbols.cpp42
-rw-r--r--lib/ObjectYAML/CodeViewYAMLTypes.cpp73
-rw-r--r--lib/ObjectYAML/DWARFEmitter.cpp40
-rw-r--r--lib/ObjectYAML/DWARFYAML.cpp4
-rw-r--r--lib/ObjectYAML/ELFYAML.cpp18
-rw-r--r--lib/ObjectYAML/MachOYAML.cpp50
-rw-r--r--lib/ObjectYAML/ObjectYAML.cpp9
-rw-r--r--lib/ObjectYAML/WasmYAML.cpp10
-rw-r--r--lib/ObjectYAML/YAML.cpp5
-rw-r--r--lib/Passes/PassBuilder.cpp43
-rw-r--r--lib/ProfileData/Coverage/CoverageMapping.cpp54
-rw-r--r--lib/ProfileData/InstrProf.cpp20
-rw-r--r--lib/Support/AMDGPUCodeObjectMetadata.cpp2
-rw-r--r--lib/Support/Host.cpp10
-rw-r--r--lib/Support/MemoryBuffer.cpp13
-rw-r--r--lib/Support/TargetParser.cpp36
-rw-r--r--lib/Support/Unix/Path.inc12
-rw-r--r--lib/Support/Unix/Process.inc12
-rw-r--r--lib/Target/AArch64/AArch64CondBrTuning.cpp7
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp48
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp5
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td4
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp9
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp9
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp15
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.h2
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp2
-rw-r--r--lib/Target/AArch64/AArch64SchedThunderX2T99.td1221
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp4
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h1
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h3
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp77
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h6
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp32
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp123
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h38
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h5
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp18
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp65
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp37
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h43
-rw-r--r--lib/Target/AArch64/MCTargetDesc/CMakeLists.txt2
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h1
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td6
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp4
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h6
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h3
-rw-r--r--lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp4
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp60
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp18
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td4
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td8
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp44
-rw-r--r--lib/Target/AMDGPU/SITypeRewriter.cpp156
-rw-r--r--lib/Target/ARM/ARM.td32
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp45
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp15
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td8
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp50
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp43
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp12
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td4
-rw-r--r--lib/Target/ARM/ARMSchedule.td1
-rw-r--r--lib/Target/ARM/ARMScheduleM3.td21
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp77
-rw-r--r--lib/Target/ARM/ARMSubtarget.h6
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp138
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h3
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp34
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h2
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp17
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h83
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp47
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h15
-rw-r--r--lib/Target/BPF/BPFISelDAGToDAG.cpp312
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp181
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h17
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp77
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp18
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp63
-rw-r--r--lib/Target/Hexagon/HexagonOptAddrMode.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.h3
-rw-r--r--lib/Target/Hexagon/HexagonTargetTransformInfo.cpp17
-rw-r--r--lib/Target/Hexagon/HexagonTargetTransformInfo.h8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp15
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp88
-rw-r--r--lib/Target/Mips/MicroMips64r6InstrInfo.td12
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td12
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp2
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp135
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp99
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h3
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp176
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.h3
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp11
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h34
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp4
-rw-r--r--lib/Target/PowerPC/PPC.h6
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp108
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp9
-rw-r--r--lib/Target/PowerPC/PPCTLSDynamicCall.cpp24
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp5
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h1
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp4
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h3
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp12
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp16
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp6
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h1
-rw-r--r--lib/Target/SystemZ/README.txt5
-rw-r--r--lib/Target/SystemZ/SystemZ.td1
-rw-r--r--lib/Target/SystemZ/SystemZFeatures.td22
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td106
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td86
-rw-r--r--lib/Target/SystemZ/SystemZInstrSystem.td517
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td10
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ13.td193
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ196.td190
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZEC12.td191
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h15
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h3
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrControl.td26
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp112
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp20
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/X86.td30
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp106
-rw-r--r--lib/Target/X86/X86InstrAVX512.td645
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp53
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp20
-rw-r--r--lib/Target/X86/X86Subtarget.cpp55
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp47
-rw-r--r--lib/Target/X86/X86TargetMachine.h1
-rw-r--r--lib/Transforms/Coroutines/CoroInstr.h44
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp62
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp3
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp21
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp49
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h5
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp24
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp17
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp8
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp95
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp6
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp30
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp3
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp2
-rw-r--r--lib/Transforms/Scalar/SROA.cpp20
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp27
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp130
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp19
-rw-r--r--lib/Transforms/Utils/LowerMemIntrinsics.cpp19
-rw-r--r--lib/Transforms/Utils/OrderedInstructions.cpp3
-rw-r--r--lib/Transforms/Utils/PredicateInfo.cpp79
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp10
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp10
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp3282
-rw-r--r--lib/Transforms/Vectorize/CMakeLists.txt1
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp675
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp84
-rw-r--r--lib/Transforms/Vectorize/Vectorize.cpp3
-rw-r--r--test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll2
-rw-r--r--test/Analysis/ScalarEvolution/limit-depth.ll58
-rw-r--r--test/Bitcode/thinlto-alias.ll2
-rw-r--r--test/Bitcode/thinlto-function-summary-callgraph-pgo.ll2
-rw-r--r--test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll2
-rw-r--r--test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll2
-rw-r--r--test/Bitcode/thinlto-function-summary-callgraph.ll2
-rw-r--r--test/Bitcode/thinlto-function-summary-refgraph.ll2
-rw-r--r--test/Bitcode/thinlto-function-summary.ll2
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll19
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll65
-rw-r--r--test/CodeGen/AArch64/GlobalISel/call-translator.ll2
-rw-r--r--test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll2
-rw-r--r--test/CodeGen/AArch64/GlobalISel/legalize-combines.mir8
-rw-r--r--test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll7
-rw-r--r--test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir85
-rw-r--r--test/CodeGen/AArch64/GlobalISel/legalize-undef.mir15
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select-trunc.mir4
-rw-r--r--test/CodeGen/AArch64/arm64-ccmp.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-spill-remarks.ll27
-rw-r--r--test/CodeGen/AArch64/ccmp-successor-probs.mir46
-rw-r--r--test/CodeGen/AArch64/cond-br-tuning.ll8
-rw-r--r--test/CodeGen/AMDGPU/alignbit-pat.ll100
-rw-r--r--test/CodeGen/AMDGPU/bug-vopc-commute.ll6
-rw-r--r--test/CodeGen/AMDGPU/cgp-bitfield-extract.ll9
-rw-r--r--test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll10
-rw-r--r--test/CodeGen/AMDGPU/combine-and-sext-bool.ll27
-rw-r--r--test/CodeGen/AMDGPU/combine-cond-add-sub.ll20
-rw-r--r--test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll37
-rw-r--r--test/CodeGen/AMDGPU/llvm.SI.load.dword.ll22
-rw-r--r--test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll20
-rw-r--r--test/CodeGen/AMDGPU/misched-killflags.mir45
-rw-r--r--test/CodeGen/AMDGPU/mubuf.ll22
-rw-r--r--test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir69
-rw-r--r--test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir155
-rw-r--r--test/CodeGen/AMDGPU/ret_jump.ll7
-rw-r--r--test/CodeGen/AMDGPU/scheduler-subrange-crash.ll12
-rw-r--r--test/CodeGen/AMDGPU/sdwa-peephole-instr.mir446
-rw-r--r--test/CodeGen/AMDGPU/select-vectors.ll2
-rw-r--r--test/CodeGen/AMDGPU/setcc-sext.ll292
-rw-r--r--test/CodeGen/AMDGPU/sgpr-copy.ll94
-rw-r--r--test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll22
-rw-r--r--test/CodeGen/AMDGPU/shift-i64-opts.ll74
-rw-r--r--test/CodeGen/AMDGPU/si-lod-bias.ll17
-rw-r--r--test/CodeGen/AMDGPU/si-sgpr-spill.ll398
-rw-r--r--test/CodeGen/AMDGPU/si-spill-cf.ll136
-rw-r--r--test/CodeGen/AMDGPU/smrd.ll48
-rw-r--r--test/CodeGen/AMDGPU/spill-to-smem-m0.ll22
-rw-r--r--test/CodeGen/AMDGPU/split-smrd.ll4
-rw-r--r--test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll20
-rw-r--r--test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll2
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir73
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll10
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-isel.ll20
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-legalizer.mir55
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir31
-rw-r--r--test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll2
-rw-r--r--test/CodeGen/ARM/Windows/no-arm-mode.ll10
-rw-r--r--test/CodeGen/ARM/Windows/tls.ll14
-rw-r--r--test/CodeGen/ARM/alloca.ll4
-rw-r--r--test/CodeGen/ARM/arg-copy-elide.ll4
-rw-r--r--test/CodeGen/ARM/arm-abi-attr.ll2
-rw-r--r--test/CodeGen/ARM/arm-and-tst-peephole.ll2
-rw-r--r--test/CodeGen/ARM/arm-position-independence-jump-table.ll2
-rw-r--r--test/CodeGen/ARM/arm-shrink-wrapping-linux.ll10
-rw-r--r--test/CodeGen/ARM/atomic-cmpxchg.ll4
-rw-r--r--test/CodeGen/ARM/bool-ext-inc.ll28
-rw-r--r--test/CodeGen/ARM/cmpxchg-O0-be.ll26
-rw-r--r--test/CodeGen/ARM/cmpxchg-weak.ll4
-rw-r--r--test/CodeGen/ARM/code-placement.ll5
-rw-r--r--test/CodeGen/ARM/constantfp.ll12
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-basic.ll6
-rw-r--r--test/CodeGen/ARM/cortexr52-misched-basic.ll4
-rw-r--r--test/CodeGen/ARM/ctor_order.ll2
-rw-r--r--test/CodeGen/ARM/ctors_dtors.ll2
-rw-r--r--test/CodeGen/ARM/cttz.ll4
-rw-r--r--test/CodeGen/ARM/cttz_vector.ll64
-rw-r--r--test/CodeGen/ARM/cxx-tlscc.ll2
-rw-r--r--test/CodeGen/ARM/execute-only-big-stack-frame.ll6
-rw-r--r--test/CodeGen/ARM/execute-only-section.ll6
-rw-r--r--test/CodeGen/ARM/execute-only.ll6
-rw-r--r--test/CodeGen/ARM/fp16-promote.ll29
-rw-r--r--test/CodeGen/ARM/fp16-v3.ll4
-rw-r--r--test/CodeGen/ARM/ifcvt7.ll2
-rw-r--r--test/CodeGen/ARM/illegal-bitfield-loadstore.ll6
-rw-r--r--test/CodeGen/ARM/indirectbr.ll4
-rw-r--r--test/CodeGen/ARM/jump-table-islands.ll2
-rw-r--r--test/CodeGen/ARM/jump-table-tbh.ll6
-rw-r--r--test/CodeGen/ARM/ldm-stm-i256.ll20
-rw-r--r--test/CodeGen/ARM/legalize-unaligned-load.ll2
-rw-r--r--test/CodeGen/ARM/long-setcc.ll2
-rw-r--r--test/CodeGen/ARM/long_shift.ll16
-rw-r--r--test/CodeGen/ARM/misched-fusion-aes.ll6
-rw-r--r--test/CodeGen/ARM/select_const.ll8
-rw-r--r--test/CodeGen/ARM/shift-i64.ll2
-rw-r--r--test/CodeGen/ARM/ssp-data-layout.ll2
-rw-r--r--test/CodeGen/ARM/str_pre-2.ll2
-rw-r--r--test/CodeGen/ARM/swifterror.ll52
-rw-r--r--test/CodeGen/ARM/thumb2-it-block.ll4
-rw-r--r--test/CodeGen/ARM/vcgt.ll4
-rw-r--r--test/CodeGen/ARM/vector-DAGCombine.ll10
-rw-r--r--test/CodeGen/ARM/vext.ll58
-rw-r--r--test/CodeGen/ARM/vfp.ll4
-rw-r--r--test/CodeGen/ARM/vld1.ll2
-rw-r--r--test/CodeGen/ARM/vld2.ll16
-rw-r--r--test/CodeGen/ARM/vld3.ll16
-rw-r--r--test/CodeGen/ARM/vld4.ll24
-rw-r--r--test/CodeGen/ARM/vlddup.ll54
-rw-r--r--test/CodeGen/ARM/vldlane.ll2
-rw-r--r--test/CodeGen/ARM/vpadd.ll22
-rw-r--r--test/CodeGen/ARM/vst1.ll2
-rw-r--r--test/CodeGen/ARM/vst4.ll8
-rw-r--r--test/CodeGen/ARM/vstlane.ll6
-rw-r--r--test/CodeGen/ARM/vuzp.ll269
-rw-r--r--test/CodeGen/BPF/remove_truncate_1.ll87
-rw-r--r--test/CodeGen/BPF/remove_truncate_2.ll65
-rw-r--r--test/CodeGen/Hexagon/addrmode-keepdeadphis.mir30
-rw-r--r--test/CodeGen/Hexagon/expand-condsets-undefvni.ll49
-rw-r--r--test/CodeGen/Hexagon/expand-vselect-kill.ll53
-rw-r--r--test/CodeGen/Hexagon/fpelim-basic.ll91
-rw-r--r--test/CodeGen/Hexagon/frame.ll23
-rw-r--r--test/CodeGen/Hexagon/jt-in-text.ll57
-rw-r--r--test/CodeGen/Hexagon/newvaluejump-kill2.mir18
-rw-r--r--test/CodeGen/Hexagon/newvaluejump2.ll2
-rw-r--r--test/CodeGen/Hexagon/regalloc-liveout-undef.mir35
-rw-r--r--test/CodeGen/MIR/Generic/multiRunPass.mir3
-rw-r--r--test/CodeGen/Mips/2008-06-05-Carry.ll13
-rw-r--r--test/CodeGen/Mips/dsp-patterns.ll4
-rw-r--r--test/CodeGen/Mips/llcarry.ll11
-rw-r--r--test/CodeGen/Mips/llvm-ir/add.ll394
-rw-r--r--test/CodeGen/Mips/llvm-ir/sub.ll174
-rw-r--r--test/CodeGen/Mips/madd-msub.ll81
-rw-r--r--test/CodeGen/NVPTX/lower-aggr-copies.ll4
-rw-r--r--test/CodeGen/PowerPC/anon_aggr.ll64
-rw-r--r--test/CodeGen/PowerPC/floatPSA.ll2
-rw-r--r--test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll32
-rw-r--r--test/CodeGen/PowerPC/memcmp.ll131
-rw-r--r--test/CodeGen/PowerPC/memcmpIR.ll90
-rw-r--r--test/CodeGen/PowerPC/merge_stores_dereferenceable.ll24
-rw-r--r--test/CodeGen/PowerPC/ppc64-align-long-double.ll24
-rw-r--r--test/CodeGen/PowerPC/tls.ll2
-rw-r--r--test/CodeGen/PowerPC/tls_get_addr_fence1.mir66
-rw-r--r--test/CodeGen/PowerPC/tls_get_addr_fence2.mir65
-rw-r--r--test/CodeGen/Thumb/long-setcc.ll2
-rw-r--r--test/CodeGen/Thumb2/constant-islands-new-island.ll6
-rw-r--r--test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll154
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt2.ll3
-rw-r--r--test/CodeGen/WebAssembly/exception.ll22
-rw-r--r--test/CodeGen/X86/GlobalISel/and-scalar.ll43
-rw-r--r--test/CodeGen/X86/GlobalISel/fadd-scalar.ll20
-rw-r--r--test/CodeGen/X86/GlobalISel/fdiv-scalar.ll20
-rw-r--r--test/CodeGen/X86/GlobalISel/fmul-scalar.ll20
-rw-r--r--test/CodeGen/X86/GlobalISel/fsub-scalar.ll20
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-and-scalar.mir124
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-fadd-scalar.mir74
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-fdiv-scalar.mir74
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-fmul-scalar.mir74
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-fsub-scalar.mir74
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-or-scalar.mir124
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-xor-scalar.mir124
-rw-r--r--test/CodeGen/X86/GlobalISel/or-scalar.ll43
-rw-r--r--test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir235
-rw-r--r--test/CodeGen/X86/GlobalISel/select-add.mir80
-rw-r--r--test/CodeGen/X86/GlobalISel/select-and-scalar.mir160
-rw-r--r--test/CodeGen/X86/GlobalISel/select-constant.mir21
-rw-r--r--test/CodeGen/X86/GlobalISel/select-fadd-scalar.mir119
-rw-r--r--test/CodeGen/X86/GlobalISel/select-fdiv-scalar.mir119
-rw-r--r--test/CodeGen/X86/GlobalISel/select-fmul-scalar.mir119
-rw-r--r--test/CodeGen/X86/GlobalISel/select-fsub-scalar.mir119
-rw-r--r--test/CodeGen/X86/GlobalISel/select-merge-vec256.mir52
-rw-r--r--test/CodeGen/X86/GlobalISel/select-merge-vec512.mir74
-rw-r--r--test/CodeGen/X86/GlobalISel/select-or-scalar.mir160
-rw-r--r--test/CodeGen/X86/GlobalISel/select-sub.mir77
-rw-r--r--test/CodeGen/X86/GlobalISel/select-xor-scalar.mir160
-rw-r--r--test/CodeGen/X86/GlobalISel/xor-scalar.ll43
-rw-r--r--test/CodeGen/X86/atom-call-reg-indirect.ll2
-rw-r--r--test/CodeGen/X86/atom-fixup-lea2.ll2
-rw-r--r--test/CodeGen/X86/atom-sched.ll1
-rw-r--r--test/CodeGen/X86/avx2-arith.ll106
-rw-r--r--test/CodeGen/X86/avx2-cmp.ll36
-rwxr-xr-xtest/CodeGen/X86/avx2-conversions.ll74
-rw-r--r--test/CodeGen/X86/avx2-fma-fneg-combine.ll32
-rw-r--r--test/CodeGen/X86/avx2-gather.ll28
-rw-r--r--test/CodeGen/X86/avx2-logic.ll34
-rw-r--r--test/CodeGen/X86/avx2-phaddsub.ll36
-rw-r--r--test/CodeGen/X86/avx2-shift.ll170
-rw-r--r--test/CodeGen/X86/avx2-vector-shifts.ll168
-rwxr-xr-xtest/CodeGen/X86/avx2-vperm.ll20
-rw-r--r--test/CodeGen/X86/avx512-arith.ll258
-rw-r--r--test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll404
-rw-r--r--test/CodeGen/X86/avx512vl-vec-masked-cmp.ll13485
-rw-r--r--test/CodeGen/X86/bswap-vector.ll11
-rw-r--r--test/CodeGen/X86/bswap-wide-int.ll173
-rw-r--r--test/CodeGen/X86/compress_expand.ll8
-rw-r--r--test/CodeGen/X86/cpus.ll2
-rw-r--r--test/CodeGen/X86/fp128-cast.ll16
-rw-r--r--test/CodeGen/X86/insertelement-zero.ll15
-rw-r--r--test/CodeGen/X86/lower-vec-shift.ll7
-rw-r--r--test/CodeGen/X86/lower-vec-shuffle-bug.ll11
-rw-r--r--test/CodeGen/X86/masked_memop.ll16
-rw-r--r--test/CodeGen/X86/memcmp.ll782
-rw-r--r--test/CodeGen/X86/palignr.ll259
-rw-r--r--test/CodeGen/X86/peephole-recurrence.mir232
-rw-r--r--test/CodeGen/X86/sbb.ll80
-rw-r--r--test/CodeGen/X86/vector-shuffle-512-v16.ll57
-rw-r--r--test/CodeGen/X86/vector-shuffle-512-v8.ll88
-rw-r--r--test/CodeGen/X86/vector-truncate-combine.ll35
-rw-r--r--test/CodeGen/X86/x86-interleaved-access.ll58
-rw-r--r--test/DebugInfo/COFF/lines-bb-start.ll97
-rw-r--r--test/DebugInfo/COFF/local-variables.ll2
-rw-r--r--test/DebugInfo/PDB/pdbdump-headers.test128
-rw-r--r--test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test2
-rw-r--r--test/DebugInfo/dwarfdump-accel.test5
-rw-r--r--test/Feature/optnone-opt.ll3
-rw-r--r--test/Instrumentation/MemorySanitizer/msan_basic.ll4
-rw-r--r--test/LTO/Resolution/X86/Inputs/comdat-mixed-lto.ll23
-rw-r--r--test/LTO/Resolution/X86/comdat-mixed-lto.ll42
-rw-r--r--test/MC/AArch64/coff-basic.ll8
-rw-r--r--test/MC/AMDGPU/code-object-metadata-kernel-args.s4
-rw-r--r--test/MC/AMDGPU/code-object-metadata-kernel-attrs.s4
-rw-r--r--test/MC/AVR/out-of-range-fixups/adiw-fail.s5
-rw-r--r--test/MC/AVR/out-of-range-fixups/in-fail.s5
-rw-r--r--test/MC/AVR/out-of-range-fixups/lds-fail.s5
-rw-r--r--test/MC/AVR/out-of-range-fixups/sbi-fail.s5
-rw-r--r--test/MC/Disassembler/SystemZ/insns.txt2025
-rw-r--r--test/MC/Mips/macro-dla-bad.s21
-rw-r--r--test/MC/Mips/macro-dla-pic.s50
-rw-r--r--test/MC/Mips/micromips64r6/valid.s4
-rw-r--r--test/MC/Mips/mips3/valid.s4
-rw-r--r--test/MC/Mips/mips4/valid.s4
-rw-r--r--test/MC/Mips/mips5/valid.s4
-rw-r--r--test/MC/Mips/mips64/valid.s4
-rw-r--r--test/MC/Mips/mips64r2/valid.s4
-rw-r--r--test/MC/Mips/mips64r3/valid.s4
-rw-r--r--test/MC/Mips/mips64r5/valid.s4
-rw-r--r--test/MC/Mips/mips64r6/valid.s10
-rw-r--r--test/MC/SystemZ/insn-bad-z196.s21
-rw-r--r--test/MC/SystemZ/insn-bad-zEC12.s14
-rw-r--r--test/MC/SystemZ/insn-bad.s850
-rw-r--r--test/MC/SystemZ/insn-good-z196.s180
-rw-r--r--test/MC/SystemZ/insn-good-zEC12.s14
-rw-r--r--test/MC/SystemZ/insn-good.s1411
-rw-r--r--test/MC/SystemZ/regs-bad.s63
-rw-r--r--test/MC/SystemZ/regs-good.s51
-rw-r--r--test/MC/WebAssembly/unnamed-data.ll94
-rw-r--r--test/MC/WebAssembly/weak-alias.ll35
-rw-r--r--test/MC/WebAssembly/weak.ll36
-rw-r--r--test/MC/X86/intel-syntax-bitwise-ops.s17
-rw-r--r--test/MC/X86/signed-coff-pcrel.s12
-rw-r--r--test/Object/X86/irsymtab-asm.ll17
-rw-r--r--test/Object/X86/irsymtab-bad-alias.ll15
-rw-r--r--test/Object/X86/irsymtab.ll33
-rw-r--r--test/Object/X86/yaml-elf-x86-rel-broken.yaml29
-rw-r--r--test/ObjectYAML/wasm/weak_symbols.yaml21
-rw-r--r--test/Other/new-pm-defaults.ll1
-rw-r--r--test/Other/new-pm-thinlto-defaults.ll13
-rw-r--r--test/ThinLTO/X86/autoupgrade.ll2
-rw-r--r--test/Transforms/BBVectorize/X86/cmp-types.ll16
-rw-r--r--test/Transforms/BBVectorize/X86/loop1.ll61
-rw-r--r--test/Transforms/BBVectorize/X86/pr15289.ll95
-rw-r--r--test/Transforms/BBVectorize/X86/sh-rec.ll54
-rw-r--r--test/Transforms/BBVectorize/X86/sh-rec2.ll85
-rw-r--r--test/Transforms/BBVectorize/X86/sh-rec3.ll170
-rw-r--r--test/Transforms/BBVectorize/X86/sh-types.ll25
-rw-r--r--test/Transforms/BBVectorize/X86/simple-int.ll79
-rw-r--r--test/Transforms/BBVectorize/X86/simple-ldstr.ll29
-rw-r--r--test/Transforms/BBVectorize/X86/simple.ll120
-rw-r--r--test/Transforms/BBVectorize/X86/vs-cast.ll12
-rw-r--r--test/Transforms/BBVectorize/X86/wr-aliases.ll144
-rw-r--r--test/Transforms/BBVectorize/cycle.ll112
-rw-r--r--test/Transforms/BBVectorize/func-alias.ll244
-rw-r--r--test/Transforms/BBVectorize/ld1.ll41
-rw-r--r--test/Transforms/BBVectorize/lit.local.cfg3
-rw-r--r--test/Transforms/BBVectorize/loop1.ll93
-rw-r--r--test/Transforms/BBVectorize/mem-op-depth.ll22
-rw-r--r--test/Transforms/BBVectorize/metadata.ll49
-rw-r--r--test/Transforms/BBVectorize/no-ldstr-conn.ll23
-rw-r--r--test/Transforms/BBVectorize/req-depth.ll17
-rw-r--r--test/Transforms/BBVectorize/search-limit.ll46
-rw-r--r--test/Transforms/BBVectorize/simple-int.ll506
-rw-r--r--test/Transforms/BBVectorize/simple-ldstr-ptrs.ll134
-rw-r--r--test/Transforms/BBVectorize/simple-ldstr.ll170
-rw-r--r--test/Transforms/BBVectorize/simple-sel.ll59
-rw-r--r--test/Transforms/BBVectorize/simple-tst.ll18
-rw-r--r--test/Transforms/BBVectorize/simple.ll199
-rw-r--r--test/Transforms/BBVectorize/simple3.ll35
-rw-r--r--test/Transforms/BBVectorize/vector-sel.ll33
-rw-r--r--test/Transforms/BBVectorize/xcore/no-vector-registers.ll18
-rw-r--r--test/Transforms/CodeExtractor/BlockAddressReference.ll36
-rw-r--r--test/Transforms/CodeExtractor/BlockAddressSelfReference.ll50
-rw-r--r--test/Transforms/CodeGenPrepare/X86/memcmp.ll126
-rw-r--r--test/Transforms/CodeGenPrepare/nonintegral.ll68
-rw-r--r--test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll37
-rw-r--r--test/Transforms/Inline/AArch64/inline-target-attr.ll40
-rw-r--r--test/Transforms/Inline/inline-cold-callsite-pgo.ll54
-rw-r--r--test/Transforms/Inline/inline-cold-callsite.ll79
-rw-r--r--test/Transforms/Inline/optimization-remarks-yaml.ll18
-rw-r--r--test/Transforms/Inline/pr33637.ll25
-rw-r--r--test/Transforms/InstCombine/and-or-not.ll96
-rw-r--r--test/Transforms/InstCombine/clamp-to-minmax.ll500
-rw-r--r--test/Transforms/InstCombine/extractinsert-tbaa.ll45
-rw-r--r--test/Transforms/InstCombine/ffs-1.ll12
-rw-r--r--test/Transforms/InstCombine/icmp.ll80
-rw-r--r--test/Transforms/InstCombine/logical-select.ll26
-rw-r--r--test/Transforms/InstCombine/max-of-nots.ll17
-rw-r--r--test/Transforms/InstCombine/memmove.ll48
-rw-r--r--test/Transforms/InstCombine/memset.ll17
-rw-r--r--test/Transforms/InstCombine/mul.ll28
-rw-r--r--test/Transforms/InstCombine/or-xor.ll76
-rw-r--r--test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll23
-rw-r--r--test/Transforms/InstCombine/select-with-bitwise-ops.ll36
-rw-r--r--test/Transforms/InstCombine/select.ll26
-rw-r--r--test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll25
-rw-r--r--test/Transforms/JumpThreading/range-compare.ll125
-rw-r--r--test/Transforms/LICM/dropped-tbaa.ll90
-rw-r--r--test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll169
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll279
-rw-r--r--test/Transforms/LoopUnroll/unroll-maxcount.ll31
-rw-r--r--test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll31
-rw-r--r--test/Transforms/LoopVectorize/first-order-recurrence.ll139
-rw-r--r--test/Transforms/LoopVectorize/if-conversion.ll2
-rw-r--r--test/Transforms/LoopVectorize/minmax_reduction.ll8
-rw-r--r--test/Transforms/LoopVectorize/small-loop.ll6
-rw-r--r--test/Transforms/LowerTypeTests/export-icall.ll9
-rw-r--r--test/Transforms/Reassociate/erase_inst_made_change.ll29
-rw-r--r--test/Transforms/SLPVectorizer/X86/limit.ll70
-rw-r--r--test/Transforms/SROA/alloca-address-space.ll31
-rw-r--r--test/Transforms/SROA/preserve-nonnull.ll94
-rw-r--r--test/Transforms/SimplifyCFG/Hexagon/lit.local.cfg5
-rw-r--r--test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll62
-rw-r--r--test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll2
-rw-r--r--test/Transforms/SimplifyCFG/X86/switch-table-bug.ll2
-rw-r--r--test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll32
-rw-r--r--test/tools/llvm-cvtres/symbols.test33
-rw-r--r--test/tools/llvm-dwarfdump/X86/apple_names_verify_buckets.s192
-rw-r--r--test/tools/llvm-dwarfdump/X86/apple_names_verify_data.s64
-rw-r--r--test/tools/llvm-dwarfdump/X86/apple_names_verify_form.s58
-rw-r--r--test/tools/llvm-dwarfdump/X86/apple_names_verify_num_atoms.s59
-rw-r--r--test/tools/llvm-dwarfdump/X86/no_apple_names_verify.s33
-rw-r--r--test/tools/llvm-dwarfdump/X86/no_apple_names_verify_buckets.s174
-rw-r--r--test/tools/llvm-nm/X86/demangle.ll37
-rw-r--r--test/tools/llvm-nm/wasm/weak-symbols.yaml2
-rw-r--r--test/tools/llvm-objdump/ARM/invalid-instruction.s9
-rw-r--r--test/tools/llvm-objdump/WebAssembly/lit.local.cfg2
-rw-r--r--test/tools/llvm-objdump/WebAssembly/relocations.test8
-rw-r--r--test/tools/llvm-pdbdump/partial-type-stream.test30
-rw-r--r--test/tools/llvm-readobj/Inputs/trivial.obj.coff-arm64bin0 -> 141 bytes
-rw-r--r--test/tools/llvm-readobj/file-headers.test18
-rw-r--r--test/tools/llvm-readobj/peplus.test1
-rw-r--r--test/tools/llvm-readobj/symbols.test4
-rw-r--r--tools/bugpoint/ToolRunner.cpp2
-rw-r--r--tools/dsymutil/DwarfLinker.cpp23
-rw-r--r--tools/llc/llc.cpp10
-rw-r--r--tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp6
-rw-r--r--tools/llvm-lto2/CMakeLists.txt1
-rw-r--r--tools/llvm-lto2/LLVMBuild.txt2
-rw-r--r--tools/llvm-lto2/llvm-lto2.cpp14
-rw-r--r--tools/llvm-nm/CMakeLists.txt1
-rw-r--r--tools/llvm-nm/llvm-nm.cpp43
-rw-r--r--tools/llvm-objdump/llvm-objdump.cpp20
-rw-r--r--tools/llvm-pdbutil/DumpOutputStyle.cpp111
-rw-r--r--tools/llvm-pdbutil/DumpOutputStyle.h2
-rw-r--r--tools/llvm-pdbutil/MinimalSymbolDumper.cpp96
-rw-r--r--tools/llvm-pdbutil/MinimalSymbolDumper.h1
-rw-r--r--tools/llvm-pdbutil/MinimalTypeDumper.cpp2
-rw-r--r--tools/llvm-pdbutil/PdbYaml.cpp2
-rw-r--r--tools/llvm-pdbutil/llvm-pdbutil.cpp7
-rw-r--r--tools/llvm-pdbutil/llvm-pdbutil.h1
-rw-r--r--tools/llvm-readobj/COFFDumper.cpp2
-rw-r--r--tools/llvm-readobj/WasmDumper.cpp1
-rw-r--r--tools/obj2yaml/wasm2yaml.cpp2
-rw-r--r--tools/opt/opt.cpp10
-rw-r--r--tools/yaml2obj/yaml2wasm.cpp60
-rw-r--r--unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp19
-rw-r--r--unittests/DebugInfo/DWARF/CMakeLists.txt2
-rw-r--r--unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp304
-rw-r--r--unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp8
-rw-r--r--unittests/DebugInfo/DWARF/DwarfGenerator.h1
-rw-r--r--unittests/IR/DominatorTreeTest.cpp15
-rw-r--r--unittests/ProfileData/CoverageMappingTest.cpp18
-rw-r--r--unittests/Support/CMakeLists.txt1
-rw-r--r--unittests/Support/ErrnoTest.cpp36
-rw-r--r--unittests/Support/YAMLIOTest.cpp30
-rw-r--r--utils/TableGen/CodeGenDAGPatterns.cpp4
-rw-r--r--utils/TableGen/CodeGenRegisters.cpp78
-rw-r--r--utils/TableGen/CodeGenRegisters.h17
-rw-r--r--utils/TableGen/CodeGenSchedule.cpp7
-rw-r--r--utils/TableGen/DAGISelMatcher.h70
-rw-r--r--utils/TableGen/GlobalISelEmitter.cpp194
-rw-r--r--utils/TableGen/RegisterInfoEmitter.cpp6
-rw-r--r--utils/TableGen/SubtargetEmitter.cpp1
-rw-r--r--utils/docker/README1
-rwxr-xr-xutils/docker/build_docker_image.sh121
-rw-r--r--utils/docker/debian8/build/Dockerfile35
-rw-r--r--utils/docker/debian8/release/Dockerfile21
-rw-r--r--utils/docker/example/build/Dockerfile26
-rw-r--r--utils/docker/example/release/Dockerfile24
-rw-r--r--utils/docker/nvidia-cuda/build/Dockerfile25
-rw-r--r--utils/docker/nvidia-cuda/release/Dockerfile23
-rwxr-xr-xutils/docker/scripts/build_install_llvm.sh169
-rw-r--r--utils/lit/lit/formats/__init__.py7
-rw-r--r--utils/lit/lit/formats/base.py157
-rw-r--r--utils/lit/lit/formats/googletest.py74
-rw-r--r--utils/lit/lit/formats/shtest.py40
-rw-r--r--utils/lit/lit/run.py218
-rw-r--r--utils/lit/lit/util.py126
-rwxr-xr-xutils/opt-viewer/opt-diff.py21
-rwxr-xr-xutils/opt-viewer/opt-stats.py18
-rwxr-xr-xutils/opt-viewer/opt-viewer.py80
-rw-r--r--utils/opt-viewer/optpmap.py53
-rw-r--r--utils/opt-viewer/optrecord.py23
863 files changed, 42032 insertions, 15604 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9cdc688d42b2..fc05f30e4cdb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -206,7 +206,7 @@ endif()
include(VersionFromVCS)
option(LLVM_APPEND_VC_REV
- "Append the version control system revision id to LLVM version" OFF)
+ "Embed the version control system revision id in LLVM" ON)
if( LLVM_APPEND_VC_REV )
add_version_info_from_vcs(PACKAGE_VERSION)
diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT
index 3339c039ff85..619844256ada 100644
--- a/CODE_OWNERS.TXT
+++ b/CODE_OWNERS.TXT
@@ -70,7 +70,7 @@ D: Branch weights and BlockFrequencyInfo
N: Hal Finkel
E: hfinkel@anl.gov
-D: BBVectorize, the loop reroller, alias analysis and the PowerPC target
+D: The loop reroller, alias analysis and the PowerPC target
N: Dan Gohman
E: sunfish@mozilla.com
diff --git a/CREDITS.TXT b/CREDITS.TXT
index 1773f9dd52d0..bfc3482e4099 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -318,11 +318,12 @@ D: Support for implicit TLS model used with MS VC runtime
D: Dumping of Win64 EH structures
N: Takumi Nakamura
+I: chapuni
E: geek4civic@gmail.com
E: chapuni@hf.rim.or.jp
-D: Cygwin and MinGW support.
-D: Win32 tweaks.
-S: Yokohama, Japan
+D: Maintaining the Git monorepo
+W: https://github.com/llvm-project/
+S: Ebina, Japan
N: Edward O'Callaghan
E: eocallaghan@auroraux.org
diff --git a/bindings/ocaml/target/target_ocaml.c b/bindings/ocaml/target/target_ocaml.c
index b63bef6d3d5b..8872f42b5b68 100644
--- a/bindings/ocaml/target/target_ocaml.c
+++ b/bindings/ocaml/target/target_ocaml.c
@@ -77,7 +77,7 @@ CAMLprim value llvm_datalayout_pointer_size(value DL) {
/* Llvm.llcontext -> DataLayout.t -> Llvm.lltype */
CAMLprim LLVMTypeRef llvm_datalayout_intptr_type(LLVMContextRef C, value DL) {
- return LLVMIntPtrTypeInContext(C, DataLayout_val(DL));;
+ return LLVMIntPtrTypeInContext(C, DataLayout_val(DL));
}
/* int -> DataLayout.t -> int */
diff --git a/docs/CMake.rst b/docs/CMake.rst
index 2deae9361874..aeebc8f6acf9 100644
--- a/docs/CMake.rst
+++ b/docs/CMake.rst
@@ -247,9 +247,10 @@ LLVM-specific variables
tests.
**LLVM_APPEND_VC_REV**:BOOL
- Append version control revision info (svn revision number or Git revision id)
- to LLVM version string (stored in the PACKAGE_VERSION macro). For this to work
- cmake must be invoked before the build. Defaults to OFF.
+ Embed version control revision info (svn revision number or Git revision id).
+ This is used among other things in the LLVM version string (stored in the
+ PACKAGE_VERSION macro). For this to work cmake must be invoked before the
+ build. Defaults to ON.
**LLVM_ENABLE_THREADS**:BOOL
Build with threads support, if available. Defaults to ON.
diff --git a/docs/CommandGuide/llvm-nm.rst b/docs/CommandGuide/llvm-nm.rst
index 319e6e6aecf1..da7edea4743b 100644
--- a/docs/CommandGuide/llvm-nm.rst
+++ b/docs/CommandGuide/llvm-nm.rst
@@ -134,9 +134,6 @@ OPTIONS
BUGS
----
- * :program:`llvm-nm` cannot demangle C++ mangled names, like GNU :program:`nm`
- can.
-
* :program:`llvm-nm` does not support the full set of arguments that GNU
:program:`nm` does.
diff --git a/docs/Docker.rst b/docs/Docker.rst
new file mode 100644
index 000000000000..d873e1ebeeb4
--- /dev/null
+++ b/docs/Docker.rst
@@ -0,0 +1,205 @@
+=========================================
+A guide to Dockerfiles for building LLVM
+=========================================
+
+Introduction
+============
+You can find a number of sources to build docker images with LLVM components in
+``llvm/utils/docker``. They can be used by anyone who wants to build the docker
+images for their own use, or as a starting point for someone who wants to write
+their own Dockerfiles.
+
+We currently provide Dockerfiles with ``debian8`` and ``nvidia-cuda`` base images.
+We also provide an ``example`` image, which contains placeholders that one would need
+to fill out in order to produce Dockerfiles for a new docker image.
+
+Why?
+----
+Docker images provide a way to produce binary distributions of
+software inside a controlled environment. Having Dockerfiles to builds docker images
+inside LLVM repo makes them much more discoverable than putting them into any other
+place.
+
+Docker basics
+-------------
+If you've never heard about Docker before, you might find this section helpful
+to get a very basic explanation of it.
+`Docker <https://www.docker.com/>`_ is a popular solution for running programs in
+an isolated and reproducible environment, especially to maintain releases for
+software deployed to large distributed fleets.
+It uses linux kernel namespaces and cgroups to provide a lightweight isolation
+inside currently running linux kernel.
+A single active instance of dockerized environment is called a *docker
+container*.
+A snapshot of a docker container filesystem is called a *docker image*.
+One can start a container from a prebuilt docker image.
+
+Docker images are built from a so-called *Dockerfile*, a source file written in
+a specialized language that defines instructions to be used when build
+the docker image (see `official
+documentation <https://docs.docker.com/engine/reference/builder/>`_ for more
+details). A minimal Dockerfile typically contains a base image and a number
+of RUN commands that have to be executed to build the image. When building a new
+image, docker will first download your base image, mount its filesystem as
+read-only and then add a writable overlay on top of it to keep track of all
+filesystem modifications, performed while building your image. When the build
+process is finished, a diff between your image's final filesystem state and the
+base image's filesystem is stored in the resulting image.
+
+Overview
+========
+The ``llvm/utils/docker`` folder contains Dockerfiles and simple bash scripts to
+serve as a basis for anyone who wants to create their own Docker image with
+LLVM components, compiled from sources. The sources are checked out from the
+upstream svn repository when building the image.
+
+Inside each subfolder we host Dockerfiles for two images:
+
+- ``build/`` image is used to compile LLVM, it installs a system compiler and all
+ build dependencies of LLVM. After the build process is finished, the build
+ image will have an archive with compiled components at ``/tmp/clang.tar.gz``.
+- ``release/`` image usually only contains LLVM components, compiled by the
+ ``build/`` image, and also libstdc++ and binutils to make image minimally
+ useful for C++ development. The assumption is that you usually want clang to
+ be one of the provided components.
+
+To build both of those images, use ``build_docker_image.sh`` script.
+It will checkout LLVM sources and build clang in the ``build`` container, copy results
+of the build to the local filesystem and then build the ``release`` container using
+those. The ``build_docker_image.sh`` accepts a list of LLVM repositories to
+checkout, and arguments for CMake invocation.
+
+If you want to write your own docker image, start with an ``example/`` subfolder.
+It provides incomplete Dockerfiles with (very few) FIXMEs explaining the steps
+you need to take in order to make your Dockerfiles functional.
+
+Usage
+=====
+The ``llvm/utils/build_docker_image.sh`` script provides a rather high degree of
+control on how to run the build. It allows you to specify the projects to
+checkout from svn and provide a list of CMake arguments to use during when
+building LLVM inside docker container.
+
+Here's a very simple example of getting a docker image with clang binary,
+compiled by the system compiler in the debian8 image:
+
+.. code-block:: bash
+
+ ./llvm/utils/docker/build_docker_image.sh \
+ --source debian8 \
+ --docker-repository clang-debian8 --docker-tag "staging" \
+ -- \
+ -p clang -i install-clang -i install-clang-headers \
+ -- \
+ -DCMAKE_BUILD_TYPE=Release
+
+Note there are two levels of ``--`` indirection. First one separates
+``build_docker_image.sh`` arguments from ``llvm/utils/build_install_llvm.sh``
+arguments. Second one separates CMake arguments from ``build_install_llvm.sh``
+arguments. Note that build like that doesn't use a 2-stage build process that
+you probably want for clang. Running a 2-stage build is a little more intricate,
+this command will do that:
+
+.. code-block:: bash
+
+ # Run a 2-stage build.
+ # LLVM_TARGETS_TO_BUILD=Native is to reduce stage1 compile time.
+ # Options, starting with BOOTSTRAP_* are passed to stage2 cmake invocation.
+ ./build_docker_image.sh \
+ --source debian8 \
+ --docker-repository clang-debian8 --docker-tag "staging" \
+ -- \
+ -p clang -i stage2-install-clang -i stage2-install-clang-headers \
+ -- \
+ -DLLVM_TARGETS_TO_BUILD=Native -DCMAKE_BUILD_TYPE=Release \
+ -DBOOTSTRAP_CMAKE_BUILD_TYPE=Release \
+ -DCLANG_ENABLE_BOOTSTRAP=ON -DCLANG_BOOTSTRAP_TARGETS="install-clang;install-clang-headers"
+
+This will produce two images, a release image ``clang-debian8:staging`` and a
+build image ``clang-debian8-build:staging`` from the latest upstream revision.
+After the image is built you can run bash inside a container based on your
+image like this:
+
+.. code-block:: bash
+
+ docker run -ti clang-debian8:staging bash
+
+Now you can run bash commands as you normally would:
+
+.. code-block:: bash
+
+ root@80f351b51825:/# clang -v
+ clang version 5.0.0 (trunk 305064)
+ Target: x86_64-unknown-linux-gnu
+ Thread model: posix
+ InstalledDir: /bin
+ Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.8
+ Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.8.4
+ Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9
+ Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9.2
+ Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9
+ Candidate multilib: .;@m64
+ Selected multilib: .;@m64
+
+
+Which image should I choose?
+============================
+We currently provide two images: debian8-based and nvidia-cuda-based. They
+differ in the base image that they use, i.e. they have a different set of
+preinstalled binaries. Debian8 is very minimal, nvidia-cuda is larger, but has
+preinstalled CUDA libraries and allows to access a GPU, installed on your
+machine.
+
+If you need a minimal linux distribution with only clang and libstdc++ included,
+you should try debian8-based image.
+
+If you want to use CUDA libraries and have access to a GPU on your machine,
+you should choose nvidia-cuda-based image and use `nvidia-docker
+<https://github.com/NVIDIA/nvidia-docker>`_ to run your docker containers. Note
+that you don't need nvidia-docker to build the images, but you need it in order
+to have an access to GPU from a docker container that is running the built
+image.
+
+If you have a different use-case, you could create your own image based on
+``example/`` folder.
+
+Any docker image can be built and run using only the docker binary, i.e. you can
+run debian8 build on Fedora or any other Linux distribution. You don't need to
+install CMake, compilers or any other clang dependencies. It is all handled
+during the build process inside Docker's isolated environment.
+
+Stable build
+============
+If you want a somewhat recent and somewhat stable build, use the
+``branches/google/stable`` branch, i.e. the following command will produce a
+debian8-based image using the latest ``google/stable`` sources for you:
+
+.. code-block:: bash
+
+ ./llvm/utils/docker/build_docker_image.sh \
+ -s debian8 --d clang-debian8 -t "staging" \
+ -- \
+ --branch branches/google/stable \
+ -p clang -i install-clang -i install-clang-headers \
+ -- \
+ -DCMAKE_BUILD_TYPE=Release
+
+
+Minimizing docker image size
+============================
+Due to Docker restrictions we use two images (i.e., build and release folders)
+for the release image to be as small as possible. It's much easier to achieve
+that using two images, because Docker would store a filesystem layer for each
+command in the Dockerfile, i.e. if you install some packages in one command,
+then remove those in a separate command, the size of the resulting image will
+still be proportinal to the size of an image with installed packages.
+Therefore, we strive to provide a very simple release image which only copies
+compiled clang and does not do anything else.
+
+Docker 1.13 added a ``--squash`` flag that allows to flatten the layers of the
+image, i.e. remove the parts that were actually deleted. That is an easier way
+to produce the smallest images possible by using just a single image. We do not
+use it because as of today the flag is in experimental stage and not everyone
+may have the latest docker version available. When the flag is out of
+experimental stage, we should investigate replacing two images approach with
+just a single image, built using ``--squash`` flag.
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 95025fb91c72..ddb31acfd029 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -54,8 +54,9 @@ Non-comprehensive list of changes in this release
its nature as a general purpose PDB manipulation / diagnostics tool that does
more than just dumping contents.
-
-* ... next change ...
+* The ``BBVectorize`` pass has been removed. It was fully replaced and no
+ longer used back in 2014 but we didn't get around to removing it. Now it is
+ gone. The SLP vectorizer is the suggested non-loop vectorization pass.
.. NOTE
If you would like to document a larger change, then you can add a
@@ -108,6 +109,15 @@ Changes to the OCaml bindings
During this release ...
+Changes to the C API
+--------------------
+
+* Deprecated the ``LLVMAddBBVectorizePass`` interface since the ``BBVectorize``
+ pass has been removed. It is now a no-op and will be removed in the next
+ release. Use ``LLVMAddSLPVectorizePass`` instead to get the supported SLP
+ vectorizer.
+
+
External Open Source Projects Using LLVM 5
==========================================
diff --git a/docs/XRay.rst b/docs/XRay.rst
index e43f78e5ffe5..d61e4e6d9955 100644
--- a/docs/XRay.rst
+++ b/docs/XRay.rst
@@ -150,7 +150,7 @@ variable, where we list down the options and their defaults below.
| xray_logfile_base | ``const char*`` | ``xray-log.`` | Filename base for the |
| | | | XRay logfile. |
+-------------------+-----------------+---------------+------------------------+
-| xray_fdr_log | ``bool`` | ``false`` | Whether to install the |
+| xray_fdr_log | ``bool`` | ``false`` | Whether to install the |
| | | | Flight Data Recorder |
| | | | (FDR) mode. |
+-------------------+-----------------+---------------+------------------------+
diff --git a/docs/index.rst b/docs/index.rst
index 54b608236530..7f3788f95b66 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -91,6 +91,7 @@ representation.
CompileCudaWithLLVM
ReportingGuide
Benchmarking
+ Docker
:doc:`GettingStarted`
Discusses how to get up and running quickly with the LLVM infrastructure.
@@ -161,6 +162,9 @@ representation.
A collection of tips for frontend authors on how to generate IR
which LLVM is able to effectively optimize.
+:doc:`Docker`
+ A reference for using Dockerfiles provided with LLVM.
+
Programming Documentation
=========================
diff --git a/include/llvm-c/Transforms/Vectorize.h b/include/llvm-c/Transforms/Vectorize.h
index a82ef49cb167..cf8306aee762 100644
--- a/include/llvm-c/Transforms/Vectorize.h
+++ b/include/llvm-c/Transforms/Vectorize.h
@@ -33,7 +33,7 @@ extern "C" {
* @{
*/
-/** See llvm::createBBVectorizePass function. */
+/** DEPRECATED - Use LLVMAddSLPVectorizePass */
void LLVMAddBBVectorizePass(LLVMPassManagerRef PM);
/** See llvm::createLoopVectorizePass function. */
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index eac97501c759..daafd2fabe78 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -69,10 +69,15 @@ class AliasSet : public ilist_node<AliasSet> {
if (AAInfo == DenseMapInfo<AAMDNodes>::getEmptyKey())
// We don't have a AAInfo yet. Set it to NewAAInfo.
AAInfo = NewAAInfo;
- else if (AAInfo != NewAAInfo)
- // NewAAInfo conflicts with AAInfo.
- AAInfo = DenseMapInfo<AAMDNodes>::getTombstoneKey();
-
+ else {
+ AAMDNodes Intersection(AAInfo.intersect(NewAAInfo));
+ if (!Intersection) {
+ // NewAAInfo conflicts with AAInfo.
+ AAInfo = DenseMapInfo<AAMDNodes>::getTombstoneKey();
+ return SizeChanged;
+ }
+ AAInfo = Intersection;
+ }
return SizeChanged;
}
diff --git a/include/llvm/Analysis/CFLAliasAnalysisUtils.h b/include/llvm/Analysis/CFLAliasAnalysisUtils.h
new file mode 100644
index 000000000000..981a8ddc2289
--- /dev/null
+++ b/include/llvm/Analysis/CFLAliasAnalysisUtils.h
@@ -0,0 +1,58 @@
+//=- CFLAliasAnalysisUtils.h - Utilities for CFL Alias Analysis ----*- C++-*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// \file
+// These are the utilities/helpers used by the CFL Alias Analyses available in
+// tree, i.e. Steensgaard's and Andersens'.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H
+#define LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/ValueHandle.h"
+
+namespace llvm {
+namespace cflaa {
+
+template <typename AAResult> struct FunctionHandle final : public CallbackVH {
+ FunctionHandle(Function *Fn, AAResult *Result)
+ : CallbackVH(Fn), Result(Result) {
+ assert(Fn != nullptr);
+ assert(Result != nullptr);
+ }
+
+ void deleted() override { removeSelfFromCache(); }
+ void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
+
+private:
+ AAResult *Result;
+
+ void removeSelfFromCache() {
+ assert(Result != nullptr);
+ auto *Val = getValPtr();
+ Result->evict(cast<Function>(Val));
+ setValPtr(nullptr);
+ }
+};
+
+static inline const Function *parentFunctionOfValue(const Value *Val) {
+ if (auto *Inst = dyn_cast<Instruction>(Val)) {
+ auto *Bb = Inst->getParent();
+ return Bb->getParent();
+ }
+
+ if (auto *Arg = dyn_cast<Argument>(Val))
+ return Arg->getParent();
+ return nullptr;
+} // namespace cflaa
+} // namespace llvm
+}
+
+#endif // LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H
diff --git a/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/include/llvm/Analysis/CFLAndersAliasAnalysis.h
index f3520aa3fe82..4146ad4d18ac 100644
--- a/include/llvm/Analysis/CFLAndersAliasAnalysis.h
+++ b/include/llvm/Analysis/CFLAndersAliasAnalysis.h
@@ -18,8 +18,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysisUtils.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include <forward_list>
@@ -47,7 +47,7 @@ public:
return false;
}
/// Evict the given function from cache
- void evict(const Function &Fn);
+ void evict(const Function *Fn);
/// \brief Get the alias summary for the given function
/// Return nullptr if the summary is not found or not available
@@ -57,27 +57,6 @@ public:
AliasResult alias(const MemoryLocation &, const MemoryLocation &);
private:
- struct FunctionHandle final : public CallbackVH {
- FunctionHandle(Function *Fn, CFLAndersAAResult *Result)
- : CallbackVH(Fn), Result(Result) {
- assert(Fn != nullptr);
- assert(Result != nullptr);
- }
-
- void deleted() override { removeSelfFromCache(); }
- void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
-
- private:
- CFLAndersAAResult *Result;
-
- void removeSelfFromCache() {
- assert(Result != nullptr);
- auto *Val = getValPtr();
- Result->evict(*cast<Function>(Val));
- setValPtr(nullptr);
- }
- };
-
/// \brief Ensures that the given function is available in the cache.
/// Returns the appropriate entry from the cache.
const Optional<FunctionInfo> &ensureCached(const Function &);
@@ -97,7 +76,7 @@ private:
/// that simply has empty sets.
DenseMap<const Function *, Optional<FunctionInfo>> Cache;
- std::forward_list<FunctionHandle> Handles;
+ std::forward_list<cflaa::FunctionHandle<CFLAndersAAResult>> Handles;
};
/// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/include/llvm/Analysis/CFLSteensAliasAnalysis.h
index 3aae9a1e9b2e..fd3fa5febcdf 100644
--- a/include/llvm/Analysis/CFLSteensAliasAnalysis.h
+++ b/include/llvm/Analysis/CFLSteensAliasAnalysis.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysisUtils.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
@@ -85,27 +86,6 @@ public:
}
private:
- struct FunctionHandle final : public CallbackVH {
- FunctionHandle(Function *Fn, CFLSteensAAResult *Result)
- : CallbackVH(Fn), Result(Result) {
- assert(Fn != nullptr);
- assert(Result != nullptr);
- }
-
- void deleted() override { removeSelfFromCache(); }
- void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
-
- private:
- CFLSteensAAResult *Result;
-
- void removeSelfFromCache() {
- assert(Result != nullptr);
- auto *Val = getValPtr();
- Result->evict(cast<Function>(Val));
- setValPtr(nullptr);
- }
- };
-
const TargetLibraryInfo &TLI;
/// \brief Cached mapping of Functions to their StratifiedSets.
@@ -114,7 +94,7 @@ private:
/// have any kind of recursion, it is discernable from a function
/// that simply has empty sets.
DenseMap<Function *, Optional<FunctionInfo>> Cache;
- std::forward_list<FunctionHandle> Handles;
+ std::forward_list<cflaa::FunctionHandle<CFLSteensAAResult>> Handles;
FunctionInfo buildSetsFrom(Function *F);
};
diff --git a/include/llvm/Analysis/IteratedDominanceFrontier.h b/include/llvm/Analysis/IteratedDominanceFrontier.h
index af788c818f80..bd74d6bd14c3 100644
--- a/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -86,7 +86,6 @@ public:
private:
DominatorTreeBase<BasicBlock> &DT;
bool useLiveIn;
- DenseMap<DomTreeNode *, unsigned> DomLevels;
const SmallPtrSetImpl<BasicBlock *> *LiveInBlocks;
const SmallPtrSetImpl<BasicBlock *> *DefBlocks;
};
diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h
index 462e4594266e..5cec2bfb0cfb 100644
--- a/include/llvm/Analysis/MemorySSA.h
+++ b/include/llvm/Analysis/MemorySSA.h
@@ -139,7 +139,7 @@ public:
// Methods for support type inquiry through isa, cast, and
// dyn_cast
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
unsigned ID = V->getValueID();
return ID == MemoryUseVal || ID == MemoryPhiVal || ID == MemoryDefVal;
}
@@ -241,7 +241,7 @@ public:
/// \brief Get the access that produces the memory state used by this Use.
MemoryAccess *getDefiningAccess() const { return getOperand(0); }
- static inline bool classof(const Value *MA) {
+ static bool classof(const Value *MA) {
return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal;
}
@@ -297,7 +297,7 @@ public:
// allocate space for exactly one operand
void *operator new(size_t s) { return User::operator new(s, 1); }
- static inline bool classof(const Value *MA) {
+ static bool classof(const Value *MA) {
return MA->getValueID() == MemoryUseVal;
}
@@ -353,7 +353,7 @@ public:
// allocate space for exactly one operand
void *operator new(size_t s) { return User::operator new(s, 1); }
- static inline bool classof(const Value *MA) {
+ static bool classof(const Value *MA) {
return MA->getValueID() == MemoryDefVal;
}
@@ -526,7 +526,7 @@ public:
return getIncomingValue(Idx);
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() == MemoryPhiVal;
}
diff --git a/include/llvm/Analysis/OptimizationDiagnosticInfo.h b/include/llvm/Analysis/OptimizationDiagnosticInfo.h
index edd9140a3493..64dd0737a112 100644
--- a/include/llvm/Analysis/OptimizationDiagnosticInfo.h
+++ b/include/llvm/Analysis/OptimizationDiagnosticInfo.h
@@ -34,7 +34,7 @@ class Value;
///
/// It allows reporting when optimizations are performed and when they are not
/// along with the reasons for it. Hotness information of the corresponding
-/// code region can be included in the remark if DiagnosticHotnessRequested is
+/// code region can be included in the remark if DiagnosticsHotnessRequested is
/// enabled in the LLVM context.
class OptimizationRemarkEmitter {
public:
@@ -45,10 +45,10 @@ public:
/// analysis pass).
///
/// Note that this ctor has a very different cost depending on whether
- /// F->getContext().getDiagnosticHotnessRequested() is on or not. If it's off
+ /// F->getContext().getDiagnosticsHotnessRequested() is on or not. If it's off
/// the operation is free.
///
- /// Whereas if DiagnosticHotnessRequested is on, it is fairly expensive
+ /// Whereas if DiagnosticsHotnessRequested is on, it is fairly expensive
/// operation since BFI and all its required analyses are computed. This is
/// for example useful for CGSCC passes that can't use function analyses
/// passes in the old PM.
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index 16ee07fa3177..2e34928b28ad 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -37,18 +37,38 @@
#ifndef LLVM_ANALYSIS_REGIONINFO_H
#define LLVM_ANALYSIS_REGIONINFO_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/IR/CFG.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
#include <map>
#include <memory>
#include <set>
+#include <string>
+#include <type_traits>
+#include <vector>
namespace llvm {
+class DominanceFrontier;
+class DominatorTree;
+class Loop;
+class LoopInfo;
+struct PostDominatorTree;
+class Region;
+template <class RegionTr> class RegionBase;
+class RegionInfo;
+template <class RegionTr> class RegionInfoBase;
+class RegionNode;
+
// Class to be specialized for different users of RegionInfo
// (i.e. BasicBlocks or MachineBasicBlocks). This is only to avoid needing to
// pass around an unreasonable number of template parameters.
@@ -59,37 +79,23 @@ struct RegionTraits {
// RegionT
// RegionNodeT
// RegionInfoT
- typedef typename FuncT_::UnknownRegionTypeError BrokenT;
+ using BrokenT = typename FuncT_::UnknownRegionTypeError;
};
-class DominatorTree;
-class DominanceFrontier;
-class Loop;
-class LoopInfo;
-struct PostDominatorTree;
-class raw_ostream;
-class Region;
-template <class RegionTr>
-class RegionBase;
-class RegionNode;
-class RegionInfo;
-template <class RegionTr>
-class RegionInfoBase;
-
template <>
struct RegionTraits<Function> {
- typedef Function FuncT;
- typedef BasicBlock BlockT;
- typedef Region RegionT;
- typedef RegionNode RegionNodeT;
- typedef RegionInfo RegionInfoT;
- typedef DominatorTree DomTreeT;
- typedef DomTreeNode DomTreeNodeT;
- typedef DominanceFrontier DomFrontierT;
- typedef PostDominatorTree PostDomTreeT;
- typedef Instruction InstT;
- typedef Loop LoopT;
- typedef LoopInfo LoopInfoT;
+ using FuncT = Function;
+ using BlockT = BasicBlock;
+ using RegionT = Region;
+ using RegionNodeT = RegionNode;
+ using RegionInfoT = RegionInfo;
+ using DomTreeT = DominatorTree;
+ using DomTreeNodeT = DomTreeNode;
+ using DomFrontierT = DominanceFrontier;
+ using PostDomTreeT = PostDominatorTree;
+ using InstT = Instruction;
+ using LoopT = Loop;
+ using LoopInfoT = LoopInfo;
static unsigned getNumSuccessors(BasicBlock *BB) {
return BB->getTerminator()->getNumSuccessors();
@@ -113,13 +119,10 @@ class RegionNodeBase {
friend class RegionBase<Tr>;
public:
- typedef typename Tr::BlockT BlockT;
- typedef typename Tr::RegionT RegionT;
+ using BlockT = typename Tr::BlockT;
+ using RegionT = typename Tr::RegionT;
private:
- RegionNodeBase(const RegionNodeBase &) = delete;
- const RegionNodeBase &operator=(const RegionNodeBase &) = delete;
-
/// This is the entry basic block that starts this region node. If this is a
/// BasicBlock RegionNode, then entry is just the basic block, that this
/// RegionNode represents. Otherwise it is the entry of this (Sub)RegionNode.
@@ -150,6 +153,9 @@ protected:
: entry(Entry, isSubRegion), parent(Parent) {}
public:
+ RegionNodeBase(const RegionNodeBase &) = delete;
+ RegionNodeBase &operator=(const RegionNodeBase &) = delete;
+
/// @brief Get the parent Region of this RegionNode.
///
/// The parent Region is the Region this RegionNode belongs to. If for
@@ -247,24 +253,22 @@ public:
/// tree, the second one creates a graphical representation using graphviz.
template <class Tr>
class RegionBase : public RegionNodeBase<Tr> {
- typedef typename Tr::FuncT FuncT;
- typedef typename Tr::BlockT BlockT;
- typedef typename Tr::RegionInfoT RegionInfoT;
- typedef typename Tr::RegionT RegionT;
- typedef typename Tr::RegionNodeT RegionNodeT;
- typedef typename Tr::DomTreeT DomTreeT;
- typedef typename Tr::LoopT LoopT;
- typedef typename Tr::LoopInfoT LoopInfoT;
- typedef typename Tr::InstT InstT;
-
- typedef GraphTraits<BlockT *> BlockTraits;
- typedef GraphTraits<Inverse<BlockT *>> InvBlockTraits;
- typedef typename BlockTraits::ChildIteratorType SuccIterTy;
- typedef typename InvBlockTraits::ChildIteratorType PredIterTy;
-
friend class RegionInfoBase<Tr>;
- RegionBase(const RegionBase &) = delete;
- const RegionBase &operator=(const RegionBase &) = delete;
+
+ using FuncT = typename Tr::FuncT;
+ using BlockT = typename Tr::BlockT;
+ using RegionInfoT = typename Tr::RegionInfoT;
+ using RegionT = typename Tr::RegionT;
+ using RegionNodeT = typename Tr::RegionNodeT;
+ using DomTreeT = typename Tr::DomTreeT;
+ using LoopT = typename Tr::LoopT;
+ using LoopInfoT = typename Tr::LoopInfoT;
+ using InstT = typename Tr::InstT;
+
+ using BlockTraits = GraphTraits<BlockT *>;
+ using InvBlockTraits = GraphTraits<Inverse<BlockT *>>;
+ using SuccIterTy = typename BlockTraits::ChildIteratorType;
+ using PredIterTy = typename InvBlockTraits::ChildIteratorType;
// Information necessary to manage this Region.
RegionInfoT *RI;
@@ -274,12 +278,12 @@ class RegionBase : public RegionNodeBase<Tr> {
// (The entry BasicBlock is part of RegionNode)
BlockT *exit;
- typedef std::vector<std::unique_ptr<RegionT>> RegionSet;
+ using RegionSet = std::vector<std::unique_ptr<RegionT>>;
// The subregions of this region.
RegionSet children;
- typedef std::map<BlockT *, std::unique_ptr<RegionNodeT>> BBNodeMapT;
+ using BBNodeMapT = std::map<BlockT *, std::unique_ptr<RegionNodeT>>;
// Save the BasicBlock RegionNodes that are element of this Region.
mutable BBNodeMapT BBNodeMap;
@@ -308,6 +312,9 @@ public:
RegionBase(BlockT *Entry, BlockT *Exit, RegionInfoT *RI, DomTreeT *DT,
RegionT *Parent = nullptr);
+ RegionBase(const RegionBase &) = delete;
+ RegionBase &operator=(const RegionBase &) = delete;
+
/// Delete the Region and all its subregions.
~RegionBase();
@@ -543,8 +550,8 @@ public:
///
/// These iterators iterator over all subregions of this Region.
//@{
- typedef typename RegionSet::iterator iterator;
- typedef typename RegionSet::const_iterator const_iterator;
+ using iterator = typename RegionSet::iterator;
+ using const_iterator = typename RegionSet::const_iterator;
iterator begin() { return children.begin(); }
iterator end() { return children.end(); }
@@ -563,12 +570,13 @@ public:
class block_iterator_wrapper
: public df_iterator<
typename std::conditional<IsConst, const BlockT, BlockT>::type *> {
- typedef df_iterator<
- typename std::conditional<IsConst, const BlockT, BlockT>::type *> super;
+ using super =
+ df_iterator<
+ typename std::conditional<IsConst, const BlockT, BlockT>::type *>;
public:
- typedef block_iterator_wrapper<IsConst> Self;
- typedef typename super::value_type value_type;
+ using Self = block_iterator_wrapper<IsConst>;
+ using value_type = typename super::value_type;
// Construct the begin iterator.
block_iterator_wrapper(value_type Entry, value_type Exit)
@@ -592,8 +600,8 @@ public:
}
};
- typedef block_iterator_wrapper<false> block_iterator;
- typedef block_iterator_wrapper<true> const_block_iterator;
+ using block_iterator = block_iterator_wrapper<false>;
+ using const_block_iterator = block_iterator_wrapper<true>;
block_iterator block_begin() { return block_iterator(getEntry(), getExit()); }
@@ -604,8 +612,8 @@ public:
}
const_block_iterator block_end() const { return const_block_iterator(); }
- typedef iterator_range<block_iterator> block_range;
- typedef iterator_range<const_block_iterator> const_block_range;
+ using block_range = iterator_range<block_iterator>;
+ using const_block_range = iterator_range<const_block_iterator>;
/// @brief Returns a range view of the basic blocks in the region.
inline block_range blocks() {
@@ -626,14 +634,14 @@ public:
/// are direct children of this Region. It does not iterate over any
/// RegionNodes that are also element of a subregion of this Region.
//@{
- typedef df_iterator<RegionNodeT *, df_iterator_default_set<RegionNodeT *>,
- false, GraphTraits<RegionNodeT *>>
- element_iterator;
+ using element_iterator =
+ df_iterator<RegionNodeT *, df_iterator_default_set<RegionNodeT *>, false,
+ GraphTraits<RegionNodeT *>>;
- typedef df_iterator<const RegionNodeT *,
- df_iterator_default_set<const RegionNodeT *>, false,
- GraphTraits<const RegionNodeT *>>
- const_element_iterator;
+ using const_element_iterator =
+ df_iterator<const RegionNodeT *,
+ df_iterator_default_set<const RegionNodeT *>, false,
+ GraphTraits<const RegionNodeT *>>;
element_iterator element_begin();
element_iterator element_end();
@@ -661,29 +669,26 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RegionNodeBase<Tr> &Node);
/// Tree.
template <class Tr>
class RegionInfoBase {
- typedef typename Tr::BlockT BlockT;
- typedef typename Tr::FuncT FuncT;
- typedef typename Tr::RegionT RegionT;
- typedef typename Tr::RegionInfoT RegionInfoT;
- typedef typename Tr::DomTreeT DomTreeT;
- typedef typename Tr::DomTreeNodeT DomTreeNodeT;
- typedef typename Tr::PostDomTreeT PostDomTreeT;
- typedef typename Tr::DomFrontierT DomFrontierT;
- typedef GraphTraits<BlockT *> BlockTraits;
- typedef GraphTraits<Inverse<BlockT *>> InvBlockTraits;
- typedef typename BlockTraits::ChildIteratorType SuccIterTy;
- typedef typename InvBlockTraits::ChildIteratorType PredIterTy;
-
friend class RegionInfo;
friend class MachineRegionInfo;
- typedef DenseMap<BlockT *, BlockT *> BBtoBBMap;
- typedef DenseMap<BlockT *, RegionT *> BBtoRegionMap;
- RegionInfoBase();
- virtual ~RegionInfoBase();
+ using BlockT = typename Tr::BlockT;
+ using FuncT = typename Tr::FuncT;
+ using RegionT = typename Tr::RegionT;
+ using RegionInfoT = typename Tr::RegionInfoT;
+ using DomTreeT = typename Tr::DomTreeT;
+ using DomTreeNodeT = typename Tr::DomTreeNodeT;
+ using PostDomTreeT = typename Tr::PostDomTreeT;
+ using DomFrontierT = typename Tr::DomFrontierT;
+ using BlockTraits = GraphTraits<BlockT *>;
+ using InvBlockTraits = GraphTraits<Inverse<BlockT *>>;
+ using SuccIterTy = typename BlockTraits::ChildIteratorType;
+ using PredIterTy = typename InvBlockTraits::ChildIteratorType;
+
+ using BBtoBBMap = DenseMap<BlockT *, BlockT *>;
+ using BBtoRegionMap = DenseMap<BlockT *, RegionT *>;
- RegionInfoBase(const RegionInfoBase &) = delete;
- const RegionInfoBase &operator=(const RegionInfoBase &) = delete;
+ RegionInfoBase();
RegionInfoBase(RegionInfoBase &&Arg)
: DT(std::move(Arg.DT)), PDT(std::move(Arg.PDT)), DF(std::move(Arg.DF)),
@@ -691,6 +696,7 @@ class RegionInfoBase {
BBtoRegion(std::move(Arg.BBtoRegion)) {
Arg.wipe();
}
+
RegionInfoBase &operator=(RegionInfoBase &&RHS) {
DT = std::move(RHS.DT);
PDT = std::move(RHS.PDT);
@@ -701,12 +707,14 @@ class RegionInfoBase {
return *this;
}
+ virtual ~RegionInfoBase();
+
DomTreeT *DT;
PostDomTreeT *PDT;
DomFrontierT *DF;
/// The top level region.
- RegionT *TopLevelRegion;
+ RegionT *TopLevelRegion = nullptr;
/// Map every BB to the smallest region, that contains BB.
BBtoRegionMap BBtoRegion;
@@ -785,6 +793,9 @@ private:
void calculate(FuncT &F);
public:
+ RegionInfoBase(const RegionInfoBase &) = delete;
+ RegionInfoBase &operator=(const RegionInfoBase &) = delete;
+
static bool VerifyRegionInfo;
static typename RegionT::PrintStyle printStyle;
@@ -887,21 +898,22 @@ public:
class RegionInfo : public RegionInfoBase<RegionTraits<Function>> {
public:
- typedef RegionInfoBase<RegionTraits<Function>> Base;
+ using Base = RegionInfoBase<RegionTraits<Function>>;
explicit RegionInfo();
- ~RegionInfo() override;
-
RegionInfo(RegionInfo &&Arg) : Base(std::move(static_cast<Base &>(Arg))) {
updateRegionTree(*this, TopLevelRegion);
}
+
RegionInfo &operator=(RegionInfo &&RHS) {
Base::operator=(std::move(static_cast<Base &>(RHS)));
updateRegionTree(*this, TopLevelRegion);
return *this;
}
+ ~RegionInfo() override;
+
/// Handle invalidation explicitly.
bool invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &);
@@ -931,8 +943,8 @@ class RegionInfoPass : public FunctionPass {
public:
static char ID;
- explicit RegionInfoPass();
+ explicit RegionInfoPass();
~RegionInfoPass() override;
RegionInfo &getRegionInfo() { return RI; }
@@ -953,10 +965,11 @@ public:
/// \brief Analysis pass that exposes the \c RegionInfo for a function.
class RegionInfoAnalysis : public AnalysisInfoMixin<RegionInfoAnalysis> {
friend AnalysisInfoMixin<RegionInfoAnalysis>;
+
static AnalysisKey Key;
public:
- typedef RegionInfo Result;
+ using Result = RegionInfo;
RegionInfo run(Function &F, FunctionAnalysisManager &AM);
};
@@ -967,6 +980,7 @@ class RegionInfoPrinterPass : public PassInfoMixin<RegionInfoPrinterPass> {
public:
explicit RegionInfoPrinterPass(raw_ostream &OS);
+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
@@ -995,8 +1009,8 @@ RegionNodeBase<RegionTraits<Function>>::getNodeAs<Region>() const {
template <class Tr>
inline raw_ostream &operator<<(raw_ostream &OS,
const RegionNodeBase<Tr> &Node) {
- typedef typename Tr::BlockT BlockT;
- typedef typename Tr::RegionT RegionT;
+ using BlockT = typename Tr::BlockT;
+ using RegionT = typename Tr::RegionT;
if (Node.isSubRegion())
return OS << Node.template getNodeAs<RegionT>()->getNameStr();
@@ -1008,5 +1022,6 @@ extern template class RegionBase<RegionTraits<Function>>;
extern template class RegionNodeBase<RegionTraits<Function>>;
extern template class RegionInfoBase<RegionTraits<Function>>;
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_REGIONINFO_H
diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h
index a16c534484b3..c0337b6daf37 100644
--- a/include/llvm/Analysis/RegionInfoImpl.h
+++ b/include/llvm/Analysis/RegionInfoImpl.h
@@ -12,7 +12,11 @@
#ifndef LLVM_ANALYSIS_REGIONINFOIMPL_H
#define LLVM_ANALYSIS_REGIONINFOIMPL_H
+#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
@@ -20,9 +24,15 @@
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cassert>
#include <iterator>
+#include <memory>
#include <set>
+#include <string>
+#include <type_traits>
+#include <vector>
namespace llvm {
@@ -303,7 +313,8 @@ RegionBase<Tr>::element_end() const {
template <class Tr>
typename Tr::RegionT *RegionBase<Tr>::getSubRegionNode(BlockT *BB) const {
- typedef typename Tr::RegionT RegionT;
+ using RegionT = typename Tr::RegionT;
+
RegionT *R = RI->getRegionFor(BB);
if (!R || R == this)
@@ -330,7 +341,8 @@ typename Tr::RegionNodeT *RegionBase<Tr>::getBBNode(BlockT *BB) const {
if (at == BBNodeMap.end()) {
auto Deconst = const_cast<RegionBase<Tr> *>(this);
typename BBNodeMapT::value_type V = {
- BB, make_unique<RegionNodeT>(static_cast<RegionT *>(Deconst), BB)};
+ BB,
+ llvm::make_unique<RegionNodeT>(static_cast<RegionT *>(Deconst), BB)};
at = BBNodeMap.insert(std::move(V)).first;
}
return at->second.get();
@@ -357,10 +369,10 @@ void RegionBase<Tr>::transferChildrenTo(RegionT *To) {
template <class Tr>
void RegionBase<Tr>::addSubRegion(RegionT *SubRegion, bool moveChildren) {
assert(!SubRegion->parent && "SubRegion already has a parent!");
- assert(find_if(*this,
- [&](const std::unique_ptr<RegionT> &R) {
- return R.get() == SubRegion;
- }) == children.end() &&
+ assert(llvm::find_if(*this,
+ [&](const std::unique_ptr<RegionT> &R) {
+ return R.get() == SubRegion;
+ }) == children.end() &&
"Subregion already exists!");
SubRegion->parent = static_cast<RegionT *>(this);
@@ -402,7 +414,7 @@ typename Tr::RegionT *RegionBase<Tr>::removeSubRegion(RegionT *Child) {
assert(Child->parent == this && "Child is not a child of this region!");
Child->parent = nullptr;
typename RegionSet::iterator I =
- find_if(children, [&](const std::unique_ptr<RegionT> &R) {
+ llvm::find_if(children, [&](const std::unique_ptr<RegionT> &R) {
return R.get() == Child;
});
assert(I != children.end() && "Region does not exit. Unable to remove.");
@@ -505,8 +517,7 @@ void RegionBase<Tr>::clearNodeCache() {
//
template <class Tr>
-RegionInfoBase<Tr>::RegionInfoBase()
- : TopLevelRegion(nullptr) {}
+RegionInfoBase<Tr>::RegionInfoBase() = default;
template <class Tr>
RegionInfoBase<Tr>::~RegionInfoBase() {
@@ -543,7 +554,8 @@ bool RegionInfoBase<Tr>::isCommonDomFrontier(BlockT *BB, BlockT *entry,
template <class Tr>
bool RegionInfoBase<Tr>::isRegion(BlockT *entry, BlockT *exit) const {
assert(entry && exit && "entry and exit must not be null!");
- typedef typename DomFrontierT::DomSetType DST;
+
+ using DST = typename DomFrontierT::DomSetType;
DST *entrySuccs = &DF->find(entry)->second;
@@ -689,7 +701,8 @@ void RegionInfoBase<Tr>::findRegionsWithEntry(BlockT *entry,
template <class Tr>
void RegionInfoBase<Tr>::scanForRegions(FuncT &F, BBtoBBMap *ShortCut) {
- typedef typename std::add_pointer<FuncT>::type FuncPtrT;
+ using FuncPtrT = typename std::add_pointer<FuncT>::type;
+
BlockT *entry = GraphTraits<FuncPtrT>::getEntryNode(&F);
DomTreeNodeT *N = DT->getNode(entry);
@@ -876,7 +889,7 @@ RegionInfoBase<Tr>::getCommonRegion(SmallVectorImpl<BlockT *> &BBs) const {
template <class Tr>
void RegionInfoBase<Tr>::calculate(FuncT &F) {
- typedef typename std::add_pointer<FuncT>::type FuncPtrT;
+ using FuncPtrT = typename std::add_pointer<FuncT>::type;
// ShortCut a function where for every BB the exit of the largest region
// starting with BB is stored. These regions can be threated as single BBS.
@@ -892,4 +905,4 @@ void RegionInfoBase<Tr>::calculate(FuncT &F) {
} // end namespace llvm
-#endif
+#endif // LLVM_ANALYSIS_REGIONINFOIMPL_H
diff --git a/include/llvm/Analysis/RegionIterator.h b/include/llvm/Analysis/RegionIterator.h
index de2f3bf3f12b..4f823cc82210 100644
--- a/include/llvm/Analysis/RegionIterator.h
+++ b/include/llvm/Analysis/RegionIterator.h
@@ -8,17 +8,23 @@
//===----------------------------------------------------------------------===//
// This file defines the iterators to iterate over the elements of a Region.
//===----------------------------------------------------------------------===//
+
#ifndef LLVM_ANALYSIS_REGIONITERATOR_H
#define LLVM_ANALYSIS_REGIONITERATOR_H
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/IR/CFG.h"
-#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <iterator>
+#include <type_traits>
namespace llvm {
+
+class BasicBlock;
+
//===----------------------------------------------------------------------===//
/// @brief Hierarchical RegionNode successor iterator.
///
@@ -33,10 +39,9 @@ namespace llvm {
template <class NodeRef, class BlockT, class RegionT>
class RNSuccIterator
: public std::iterator<std::forward_iterator_tag, NodeRef> {
- typedef std::iterator<std::forward_iterator_tag, NodeRef> super;
-
- typedef GraphTraits<BlockT*> BlockTraits;
- typedef typename BlockTraits::ChildIteratorType SuccIterTy;
+ using super = std::iterator<std::forward_iterator_tag, NodeRef>;
+ using BlockTraits = GraphTraits<BlockT *>;
+ using SuccIterTy = typename BlockTraits::ChildIteratorType;
// The iterator works in two modes, bb mode or region mode.
enum ItMode {
@@ -92,16 +97,15 @@ class RNSuccIterator
inline bool isExit(BlockT* BB) const {
return getNode()->getParent()->getExit() == BB;
}
-public:
- typedef RNSuccIterator<NodeRef, BlockT, RegionT> Self;
- typedef typename super::value_type value_type;
+public:
+ using Self = RNSuccIterator<NodeRef, BlockT, RegionT>;
+ using value_type = typename super::value_type;
/// @brief Create begin iterator of a RegionNode.
inline RNSuccIterator(NodeRef node)
: Node(node, node->isSubRegion() ? ItRgBegin : ItBB),
BItor(BlockTraits::child_begin(node->getEntry())) {
-
// Skip the exit block
if (!isRegionMode())
while (BlockTraits::child_end(node->getEntry()) != BItor && isExit(*BItor))
@@ -153,7 +157,6 @@ public:
}
};
-
//===----------------------------------------------------------------------===//
/// @brief Flat RegionNode iterator.
///
@@ -163,16 +166,16 @@ public:
template <class NodeRef, class BlockT, class RegionT>
class RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>
: public std::iterator<std::forward_iterator_tag, NodeRef> {
- typedef std::iterator<std::forward_iterator_tag, NodeRef> super;
- typedef GraphTraits<BlockT*> BlockTraits;
- typedef typename BlockTraits::ChildIteratorType SuccIterTy;
+ using super = std::iterator<std::forward_iterator_tag, NodeRef>;
+ using BlockTraits = GraphTraits<BlockT *>;
+ using SuccIterTy = typename BlockTraits::ChildIteratorType;
NodeRef Node;
SuccIterTy Itor;
public:
- typedef RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT> Self;
- typedef typename super::value_type value_type;
+ using Self = RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>;
+ using value_type = typename super::value_type;
/// @brief Create the iterator from a RegionNode.
///
@@ -255,8 +258,8 @@ inline RNSuccIterator<NodeRef, BlockT, RegionT> succ_end(NodeRef Node) {
#define RegionNodeGraphTraits(NodeT, BlockT, RegionT) \
template <> struct GraphTraits<NodeT *> { \
- typedef NodeT *NodeRef; \
- typedef RNSuccIterator<NodeRef, BlockT, RegionT> ChildIteratorType; \
+ using NodeRef = NodeT *; \
+ using ChildIteratorType = RNSuccIterator<NodeRef, BlockT, RegionT>; \
static NodeRef getEntryNode(NodeRef N) { return N; } \
static inline ChildIteratorType child_begin(NodeRef N) { \
return RNSuccIterator<NodeRef, BlockT, RegionT>(N); \
@@ -266,9 +269,9 @@ inline RNSuccIterator<NodeRef, BlockT, RegionT> succ_end(NodeRef Node) {
} \
}; \
template <> struct GraphTraits<FlatIt<NodeT *>> { \
- typedef NodeT *NodeRef; \
- typedef RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT> \
- ChildIteratorType; \
+ using NodeRef = NodeT *; \
+ using ChildIteratorType = \
+ RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>; \
static NodeRef getEntryNode(NodeRef N) { return N; } \
static inline ChildIteratorType child_begin(NodeRef N) { \
return RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>(N); \
@@ -280,7 +283,7 @@ inline RNSuccIterator<NodeRef, BlockT, RegionT> succ_end(NodeRef Node) {
#define RegionGraphTraits(RegionT, NodeT) \
template <> struct GraphTraits<RegionT *> : public GraphTraits<NodeT *> { \
- typedef df_iterator<NodeRef> nodes_iterator; \
+ using nodes_iterator = df_iterator<NodeRef>; \
static NodeRef getEntryNode(RegionT *R) { \
return R->getNode(R->getEntry()); \
} \
@@ -294,9 +297,9 @@ inline RNSuccIterator<NodeRef, BlockT, RegionT> succ_end(NodeRef Node) {
template <> \
struct GraphTraits<FlatIt<RegionT *>> \
: public GraphTraits<FlatIt<NodeT *>> { \
- typedef df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false, \
- GraphTraits<FlatIt<NodeRef>>> \
- nodes_iterator; \
+ using nodes_iterator = \
+ df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false, \
+ GraphTraits<FlatIt<NodeRef>>>; \
static NodeRef getEntryNode(RegionT *R) { \
return R->getBBNode(R->getEntry()); \
} \
@@ -315,17 +318,19 @@ RegionGraphTraits(Region, RegionNode);
RegionGraphTraits(const Region, const RegionNode);
template <> struct GraphTraits<RegionInfo*>
- : public GraphTraits<FlatIt<RegionNode*> > {
- typedef df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false,
- GraphTraits<FlatIt<NodeRef>>>
- nodes_iterator;
+ : public GraphTraits<FlatIt<RegionNode*>> {
+ using nodes_iterator =
+ df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false,
+ GraphTraits<FlatIt<NodeRef>>>;
static NodeRef getEntryNode(RegionInfo *RI) {
- return GraphTraits<FlatIt<Region*> >::getEntryNode(RI->getTopLevelRegion());
+ return GraphTraits<FlatIt<Region*>>::getEntryNode(RI->getTopLevelRegion());
}
+
static nodes_iterator nodes_begin(RegionInfo* RI) {
return nodes_iterator::begin(getEntryNode(RI));
}
+
static nodes_iterator nodes_end(RegionInfo *RI) {
return nodes_iterator::end(getEntryNode(RI));
}
@@ -333,21 +338,23 @@ template <> struct GraphTraits<RegionInfo*>
template <> struct GraphTraits<RegionInfoPass*>
: public GraphTraits<RegionInfo *> {
- typedef df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false,
- GraphTraits<FlatIt<NodeRef>>>
- nodes_iterator;
+ using nodes_iterator =
+ df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false,
+ GraphTraits<FlatIt<NodeRef>>>;
static NodeRef getEntryNode(RegionInfoPass *RI) {
return GraphTraits<RegionInfo*>::getEntryNode(&RI->getRegionInfo());
}
+
static nodes_iterator nodes_begin(RegionInfoPass* RI) {
return GraphTraits<RegionInfo*>::nodes_begin(&RI->getRegionInfo());
}
+
static nodes_iterator nodes_end(RegionInfoPass *RI) {
return GraphTraits<RegionInfo*>::nodes_end(&RI->getRegionInfo());
}
};
-} // End namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_ANALYSIS_REGIONITERATOR_H
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 002a3174fd19..c7accfae78b0 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -262,7 +262,7 @@ public:
const SCEVConstant *getRHS() const { return RHS; }
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEVPredicate *P) {
+ static bool classof(const SCEVPredicate *P) {
return P->getKind() == P_Equal;
}
};
@@ -360,7 +360,7 @@ public:
bool isAlwaysTrue() const override;
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEVPredicate *P) {
+ static bool classof(const SCEVPredicate *P) {
return P->getKind() == P_Wrap;
}
};
@@ -406,7 +406,7 @@ public:
unsigned getComplexity() const override { return Preds.size(); }
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEVPredicate *P) {
+ static bool classof(const SCEVPredicate *P) {
return P->getKind() == P_Union;
}
};
@@ -1197,20 +1197,8 @@ public:
const SCEV *getConstant(const APInt &Val);
const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false);
const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty);
-
- typedef SmallDenseMap<std::pair<const SCEV *, Type *>, const SCEV *, 8>
- ExtendCacheTy;
- const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty);
- const SCEV *getZeroExtendExprCached(const SCEV *Op, Type *Ty,
- ExtendCacheTy &Cache);
- const SCEV *getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
- ExtendCacheTy &Cache);
-
- const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty);
- const SCEV *getSignExtendExprCached(const SCEV *Op, Type *Ty,
- ExtendCacheTy &Cache);
- const SCEV *getSignExtendExprImpl(const SCEV *Op, Type *Ty,
- ExtendCacheTy &Cache);
+ const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
+ const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty);
const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap,
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 2c693bceb24d..56ddb5028d6d 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -46,7 +46,7 @@ namespace llvm {
Type *getType() const { return V->getType(); }
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scConstant;
}
};
@@ -65,7 +65,7 @@ namespace llvm {
Type *getType() const { return Ty; }
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scTruncate ||
S->getSCEVType() == scZeroExtend ||
S->getSCEVType() == scSignExtend;
@@ -82,7 +82,7 @@ namespace llvm {
public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scTruncate;
}
};
@@ -97,7 +97,7 @@ namespace llvm {
public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scZeroExtend;
}
};
@@ -112,7 +112,7 @@ namespace llvm {
public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scSignExtend;
}
};
@@ -167,7 +167,7 @@ namespace llvm {
}
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scAddExpr ||
S->getSCEVType() == scMulExpr ||
S->getSCEVType() == scSMaxExpr ||
@@ -185,7 +185,7 @@ namespace llvm {
public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scAddExpr ||
S->getSCEVType() == scMulExpr ||
S->getSCEVType() == scSMaxExpr ||
@@ -217,7 +217,7 @@ namespace llvm {
}
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scAddExpr;
}
};
@@ -234,7 +234,7 @@ namespace llvm {
public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scMulExpr;
}
};
@@ -263,7 +263,7 @@ namespace llvm {
}
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scUDivExpr;
}
};
@@ -345,7 +345,7 @@ namespace llvm {
}
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scAddRecExpr;
}
};
@@ -363,7 +363,7 @@ namespace llvm {
public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scSMaxExpr;
}
};
@@ -382,7 +382,7 @@ namespace llvm {
public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scUMaxExpr;
}
};
@@ -428,7 +428,7 @@ namespace llvm {
Type *getType() const { return getValPtr()->getType(); }
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const SCEV *S) {
+ static bool classof(const SCEV *S) {
return S->getSCEVType() == scUnknown;
}
};
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index af2ebb7b6b44..68fbf640994c 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -216,9 +216,23 @@ public:
/// other context they may not be folded. This routine can distinguish such
/// cases.
///
+ /// \p Operands is a list of operands which can be a result of transformations
+ /// of the current operands. The number of the operands on the list must equal
+ /// to the number of the current operands the IR user has. Their order on the
+ /// list must be the same as the order of the current operands the IR user
+ /// has.
+ ///
/// The returned cost is defined in terms of \c TargetCostConstants, see its
/// comments for a detailed explanation of the cost values.
- int getUserCost(const User *U) const;
+ int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
+
+ /// \brief This is a helper function which calls the two-argument getUserCost
+ /// with \p Operands which are the current operands U has.
+ int getUserCost(const User *U) const {
+ SmallVector<const Value *, 4> Operands(U->value_op_begin(),
+ U->value_op_end());
+ return getUserCost(U, Operands);
+ }
/// \brief Return true if branch divergence exists.
///
@@ -366,7 +380,8 @@ public:
/// \brief Get target-customized preferences for the generic loop unrolling
/// transformation. The caller will initialize UP with the current
/// target-independent defaults.
- void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &,
+ UnrollingPreferences &UP) const;
/// @}
@@ -823,13 +838,15 @@ public:
ArrayRef<const Value *> Arguments) = 0;
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JTSize) = 0;
- virtual int getUserCost(const User *U) = 0;
+ virtual int
+ getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
virtual bool hasBranchDivergence() = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isAlwaysUniform(const Value *V) = 0;
virtual unsigned getFlatAddressSpace() = 0;
virtual bool isLoweredToCall(const Function *F) = 0;
- virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
+ virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
+ UnrollingPreferences &UP) = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
@@ -998,7 +1015,9 @@ public:
ArrayRef<const Value *> Arguments) override {
return Impl.getIntrinsicCost(IID, RetTy, Arguments);
}
- int getUserCost(const User *U) override { return Impl.getUserCost(U); }
+ int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
+ return Impl.getUserCost(U, Operands);
+ }
bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
bool isSourceOfDivergence(const Value *V) override {
return Impl.isSourceOfDivergence(V);
@@ -1015,8 +1034,9 @@ public:
bool isLoweredToCall(const Function *F) override {
return Impl.isLoweredToCall(F);
}
- void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override {
- return Impl.getUnrollingPreferences(L, UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ UnrollingPreferences &UP) override {
+ return Impl.getUnrollingPreferences(L, SE, UP);
}
bool isLegalAddImmediate(int64_t Imm) override {
return Impl.isLegalAddImmediate(Imm);
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index 24ac3b1213e1..0246fc1c02cc 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -217,7 +217,8 @@ public:
return true;
}
- void getUnrollingPreferences(Loop *, TTI::UnrollingPreferences &) {}
+ void getUnrollingPreferences(Loop *, ScalarEvolution &,
+ TTI::UnrollingPreferences &) {}
bool isLegalAddImmediate(int64_t Imm) { return false; }
@@ -684,14 +685,14 @@ public:
return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys);
}
- unsigned getUserCost(const User *U) {
+ unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) {
if (isa<PHINode>(U))
return TTI::TCC_Free; // Model all PHI nodes as free.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
- SmallVector<Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
- return static_cast<T *>(this)->getGEPCost(
- GEP->getSourceElementType(), GEP->getPointerOperand(), Indices);
+ return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
+ GEP->getPointerOperand(),
+ Operands.drop_front());
}
if (auto CS = ImmutableCallSite(U)) {
diff --git a/include/llvm/BinaryFormat/COFF.h b/include/llvm/BinaryFormat/COFF.h
index df173a80e09b..b395db6eaa83 100644
--- a/include/llvm/BinaryFormat/COFF.h
+++ b/include/llvm/BinaryFormat/COFF.h
@@ -382,7 +382,7 @@ enum RelocationTypesARM64 {
IMAGE_REL_ARM64_ADDR32 = 0x0001,
IMAGE_REL_ARM64_ADDR32NB = 0x0002,
IMAGE_REL_ARM64_BRANCH26 = 0x0003,
- IMAGE_REL_ARM64_PAGEBASE_REL2 = 0x0004,
+ IMAGE_REL_ARM64_PAGEBASE_REL21 = 0x0004,
IMAGE_REL_ARM64_REL21 = 0x0005,
IMAGE_REL_ARM64_PAGEOFFSET_12A = 0x0006,
IMAGE_REL_ARM64_PAGEOFFSET_12L = 0x0007,
diff --git a/include/llvm/BinaryFormat/Dwarf.h b/include/llvm/BinaryFormat/Dwarf.h
index ab927565d05d..80456a0808f2 100644
--- a/include/llvm/BinaryFormat/Dwarf.h
+++ b/include/llvm/BinaryFormat/Dwarf.h
@@ -62,6 +62,9 @@ enum LLVMConstants : uint32_t {
const uint32_t DW_CIE_ID = UINT32_MAX;
const uint64_t DW64_CIE_ID = UINT64_MAX;
+// Identifier of an invalid DIE offset in the .debug_info section.
+const uint32_t DW_INVALID_OFFSET = UINT32_MAX;
+
enum Tag : uint16_t {
#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) DW_TAG_##NAME = ID,
#include "llvm/BinaryFormat/Dwarf.def"
diff --git a/include/llvm/BinaryFormat/Wasm.h b/include/llvm/BinaryFormat/Wasm.h
index 470c20ddc7d9..eef473b20dde 100644
--- a/include/llvm/BinaryFormat/Wasm.h
+++ b/include/llvm/BinaryFormat/Wasm.h
@@ -112,6 +112,11 @@ struct WasmRelocation {
int64_t Addend; // A value to add to the symbol.
};
+struct WasmLinkingData {
+ uint32_t DataSize;
+ uint32_t DataAlignment;
+};
+
enum : unsigned {
WASM_SEC_CUSTOM = 0, // Custom / User-defined section
WASM_SEC_TYPE = 1, // Function signature declarations
@@ -175,8 +180,10 @@ enum class ValType {
// Linking metadata kinds.
enum : unsigned {
- WASM_STACK_POINTER = 0x1,
- WASM_SYMBOL_INFO = 0x2,
+ WASM_STACK_POINTER = 0x1,
+ WASM_SYMBOL_INFO = 0x2,
+ WASM_DATA_SIZE = 0x3,
+ WASM_DATA_ALIGNMENT = 0x4,
};
enum : unsigned {
diff --git a/include/llvm/Bitcode/BitcodeReader.h b/include/llvm/Bitcode/BitcodeReader.h
index 0e17e9a0a278..160ddad5761f 100644
--- a/include/llvm/Bitcode/BitcodeReader.h
+++ b/include/llvm/Bitcode/BitcodeReader.h
@@ -111,9 +111,14 @@ namespace llvm {
struct BitcodeFileContents {
std::vector<BitcodeModule> Mods;
+ StringRef Symtab, StrtabForSymtab;
};
- /// Returns the contents of a bitcode file.
+ /// Returns the contents of a bitcode file. This includes the raw contents of
+ /// the symbol table embedded in the bitcode file. Clients which require a
+ /// symbol table should prefer to use irsymtab::read instead of this function
+ /// because it creates a reader for the irsymtab and handles upgrading bitcode
+ /// files without a symbol table or with an old symbol table.
Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
/// Returns a list of modules in the specified bitcode buffer.
diff --git a/include/llvm/Bitcode/BitcodeWriter.h b/include/llvm/Bitcode/BitcodeWriter.h
index 7c3c4b2e0cbd..f8b7fb341e88 100644
--- a/include/llvm/Bitcode/BitcodeWriter.h
+++ b/include/llvm/Bitcode/BitcodeWriter.h
@@ -28,18 +28,34 @@ namespace llvm {
std::unique_ptr<BitstreamWriter> Stream;
StringTableBuilder StrtabBuilder{StringTableBuilder::RAW};
- bool WroteStrtab = false;
+
+ // Owns any strings created by the irsymtab writer until we create the
+ // string table.
+ BumpPtrAllocator Alloc;
+
+ bool WroteStrtab = false, WroteSymtab = false;
void writeBlob(unsigned Block, unsigned Record, StringRef Blob);
+ std::vector<Module *> Mods;
+
public:
/// Create a BitcodeWriter that writes to Buffer.
BitcodeWriter(SmallVectorImpl<char> &Buffer);
~BitcodeWriter();
+ /// Attempt to write a symbol table to the bitcode file. This must be called
+ /// at most once after all modules have been written.
+ ///
+ /// A reader does not require a symbol table to interpret a bitcode file;
+ /// the symbol table is needed only to improve link-time performance. So
+ /// this function may decide not to write a symbol table. It may so decide
+ /// if, for example, the target is unregistered or the IR is malformed.
+ void writeSymtab();
+
/// Write the bitcode file's string table. This must be called exactly once
- /// after all modules have been written.
+ /// after all modules and the optional symbol table have been written.
void writeStrtab();
/// Copy the string table for another module into this bitcode file. This
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 4e3e177cac8f..5435e48ff424 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -22,7 +22,7 @@
namespace llvm {
namespace bitc {
-// The only top-level block types are MODULE, IDENTIFICATION and STRTAB.
+// The only top-level block types are MODULE, IDENTIFICATION, STRTAB and SYMTAB.
enum BlockIDs {
// Blocks
MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID,
@@ -57,6 +57,8 @@ enum BlockIDs {
STRTAB_BLOCK_ID,
FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID,
+
+ SYMTAB_BLOCK_ID,
};
/// Identification block contains a string that describes the producer details,
@@ -571,6 +573,10 @@ enum StrtabCodes {
STRTAB_BLOB = 1,
};
+enum SymtabCodes {
+ SYMTAB_BLOB = 1,
+};
+
} // End bitc namespace
} // End llvm namespace
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index 5eb7a0f61eec..a740df96899d 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -277,7 +277,8 @@ public:
unsigned getInliningThresholdMultiplier() { return 1; }
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) {
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP) {
// This unrolling functionality is target independent, but to provide some
// motivation for its intended use, for x86:
diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 3e9a9d514cb8..e7ce1946889e 100644
--- a/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GlobalISel/CallLowering.h - Call lowering --*- C++ -*-===//
+//===- llvm/CodeGen/GlobalISel/CallLowering.h - Call lowering ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,21 +15,31 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_CALLLOWERING_H
#define LLVM_CODEGEN_GLOBALISEL_CALLLOWERING_H
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetCallingConv.h"
+#include <cstdint>
+#include <functional>
namespace llvm {
-// Forward declarations.
+
+class DataLayout;
+class Function;
class MachineIRBuilder;
class MachineOperand;
+struct MachinePointerInfo;
+class MachineRegisterInfo;
class TargetLowering;
+class Type;
class Value;
class CallLowering {
const TargetLowering *TLI;
+
public:
struct ArgInfo {
unsigned Reg;
@@ -49,6 +59,12 @@ public:
/// arugment should go, exactly what happens can vary slightly. This
/// class abstracts the differences.
struct ValueHandler {
+ ValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : MIRBuilder(MIRBuilder), MRI(MRI), AssignFn(AssignFn) {}
+
+ virtual ~ValueHandler() = default;
+
/// Materialize a VReg containing the address of the specified
/// stack-based object. This is either based on a FrameIndex or
/// direct SP manipulation, depending on the context. \p MPO
@@ -89,12 +105,6 @@ public:
return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
}
- ValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- CCAssignFn *AssignFn)
- : MIRBuilder(MIRBuilder), MRI(MRI), AssignFn(AssignFn) {}
-
- virtual ~ValueHandler() {}
-
MachineIRBuilder &MIRBuilder;
MachineRegisterInfo &MRI;
CCAssignFn *AssignFn;
@@ -112,7 +122,6 @@ protected:
return static_cast<const XXXTargetLowering *>(TLI);
}
-
template <typename FuncInfoTy>
void setArgFlags(ArgInfo &Arg, unsigned OpNum, const DataLayout &DL,
const FuncInfoTy &FuncInfo) const;
@@ -126,7 +135,7 @@ protected:
public:
CallLowering(const TargetLowering *TLI) : TLI(TLI) {}
- virtual ~CallLowering() {}
+ virtual ~CallLowering() = default;
/// This hook must be implemented to lower outgoing return values, described
/// by \p Val, into the specified virtual register \p VReg.
@@ -200,6 +209,7 @@ public:
unsigned ResReg, ArrayRef<unsigned> ArgRegs,
std::function<unsigned()> GetCalleeReg) const;
};
-} // End namespace llvm.
-#endif
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_GLOBALISEL_CALLLOWERING_H
diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index e292e8913db0..7061c014d9b7 100644
--- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GlobalISel/IRTranslator.h - IRTranslator ---*- C++ -*-===//
+//===- llvm/CodeGen/GlobalISel/IRTranslator.h - IRTranslator ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,24 +19,33 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_IRTRANSLATOR_H
#define LLVM_CODEGEN_GLOBALISEL_IRTRANSLATOR_H
-#include "Types.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Types.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/Intrinsics.h"
+#include <memory>
+#include <utility>
namespace llvm {
-// Forward declarations.
+
+class AllocaInst;
class BasicBlock;
+class CallInst;
class CallLowering;
class Constant;
+class DataLayout;
class Instruction;
class MachineBasicBlock;
class MachineFunction;
class MachineInstr;
-class OptimizationRemarkEmitter;
class MachineRegisterInfo;
+class OptimizationRemarkEmitter;
+class PHINode;
class TargetPassConfig;
+class User;
+class Value;
// Technically the pass should run on an hypothetical MachineModule,
// since it should translate Global into some sort of MachineGlobal.
@@ -53,6 +62,7 @@ public:
private:
/// Interface used to lower the everything related to calls.
const CallLowering *CLI;
+
/// Mapping of the values of the current LLVM IR function
/// to the related virtual registers.
ValueToVReg ValToVReg;
@@ -67,7 +77,7 @@ private:
// a mapping between the edges arriving at the BasicBlock to the corresponding
// created MachineBasicBlocks. Some BasicBlocks that get translated to a
// single MachineBasicBlock may also end up in this Map.
- typedef std::pair<const BasicBlock *, const BasicBlock *> CFGEdge;
+ using CFGEdge = std::pair<const BasicBlock *, const BasicBlock *>;
DenseMap<CFGEdge, SmallVector<MachineBasicBlock *, 1>> MachinePreds;
// List of stubbed PHI instructions, for values and basic blocks to be filled
@@ -165,7 +175,6 @@ private:
return translateCompare(U, MIRBuilder);
}
-
/// Add remaining operands onto phis we've translated. Executed after all
/// MachineBasicBlocks for the function have been created.
void finishPendingPhis();
@@ -356,7 +365,7 @@ private:
MachineFunction *MF;
/// MachineRegisterInfo used to create virtual registers.
- MachineRegisterInfo *MRI;
+ MachineRegisterInfo *MRI = nullptr;
const DataLayout *DL;
@@ -430,5 +439,6 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
};
-} // End namespace llvm.
-#endif
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_GLOBALISEL_IRTRANSLATOR_H
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index b3ef7c2dc185..ec60123e54b1 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -1,4 +1,4 @@
-//==-- llvm/CodeGen/GlobalISel/InstructionSelector.h -------------*- C++ -*-==//
+//===- llvm/CodeGen/GlobalISel/InstructionSelector.h ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,15 +16,16 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H
#define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H
-#include "llvm/ADT/Optional.h"
#include <bitset>
+#include <cstddef>
#include <cstdint>
#include <functional>
+#include <initializer_list>
namespace llvm {
+
class MachineInstr;
class MachineInstrBuilder;
-class MachineFunction;
class MachineOperand;
class MachineRegisterInfo;
class RegisterBankInfo;
@@ -60,7 +61,7 @@ public:
/// Provides the logic to select generic machine instructions.
class InstructionSelector {
public:
- virtual ~InstructionSelector() {}
+ virtual ~InstructionSelector() = default;
/// Select the (possibly generic) instruction \p I to only use target-specific
/// opcodes. It is OK to insert multiple instructions, but they cannot be
@@ -76,7 +77,7 @@ public:
virtual bool select(MachineInstr &I) const = 0;
protected:
- typedef std::function<void(MachineInstrBuilder &)> ComplexRendererFn;
+ using ComplexRendererFn = std::function<void(MachineInstrBuilder &)>;
InstructionSelector();
@@ -110,6 +111,6 @@ protected:
bool isObviouslySafeToFold(MachineInstr &MI) const;
};
-} // End namespace llvm.
+} // end namespace llvm
-#endif
+#endif // LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 21354ae20ed1..c259e93fdd36 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -1,4 +1,4 @@
-//==-- llvm/CodeGen/GlobalISel/LegalizerInfo.h -------------------*- C++ -*-==//
+//===- llvm/CodeGen/GlobalISel/LegalizerInfo.h ------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,33 +12,36 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZER_H
-#define LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZER_H
+#ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H
+#define LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Target/TargetOpcodes.h"
-
#include <cstdint>
-#include <functional>
+#include <cassert>
+#include <tuple>
+#include <utility>
namespace llvm {
-class LLVMContext;
+
class MachineInstr;
class MachineIRBuilder;
class MachineRegisterInfo;
-class Type;
-class VectorType;
/// Legalization is decided based on an instruction's opcode, which type slot
/// we're considering, and what the existing type is. These aspects are gathered
/// together for convenience in the InstrAspect class.
struct InstrAspect {
unsigned Opcode;
- unsigned Idx;
+ unsigned Idx = 0;
LLT Type;
- InstrAspect(unsigned Opcode, LLT Type) : Opcode(Opcode), Idx(0), Type(Type) {}
+ InstrAspect(unsigned Opcode, LLT Type) : Opcode(Opcode), Type(Type) {}
InstrAspect(unsigned Opcode, unsigned Idx, LLT Type)
: Opcode(Opcode), Idx(Idx), Type(Type) {}
@@ -104,6 +107,19 @@ public:
/// before any query is made or incorrect results may be returned.
void computeTables();
+ static bool needsLegalizingToDifferentSize(const LegalizeAction Action) {
+ switch (Action) {
+ case NarrowScalar:
+ case WidenScalar:
+ case FewerElements:
+ case MoreElements:
+ case Unsupported:
+ return true;
+ default:
+ return false;
+ }
+ }
+
/// More friendly way to set an action for common types that have an LLT
/// representation.
void setAction(const InstrAspect &Aspect, LegalizeAction Action) {
@@ -125,7 +141,6 @@ public:
ScalarInVectorActions[std::make_pair(Opcode, ScalarTy)] = Action;
}
-
/// Determine what action should be taken to legalize the given generic
/// instruction opcode, type-index and type. Requires computeTables to have
/// been called.
@@ -145,8 +160,8 @@ public:
/// Iterate the given function (typically something like doubling the width)
/// on Ty until we find a legal type for this operation.
- Optional<LLT> findLegalType(const InstrAspect &Aspect,
- function_ref<LLT(LLT)> NextType) const {
+ Optional<LLT> findLegalizableSize(const InstrAspect &Aspect,
+ function_ref<LLT(LLT)> NextType) const {
LegalizeAction Action;
const TypeMap &Map = Actions[Aspect.Opcode - FirstOp][Aspect.Idx];
LLT Ty = Aspect.Type;
@@ -158,10 +173,9 @@ public:
if (DefaultIt == DefaultActions.end())
return None;
Action = DefaultIt->second;
- }
- else
+ } else
Action = ActionIt->second;
- } while(Action != Legal);
+ } while (needsLegalizingToDifferentSize(Action));
return Ty;
}
@@ -203,18 +217,17 @@ private:
static const int FirstOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_START;
static const int LastOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END;
- typedef DenseMap<LLT, LegalizeAction> TypeMap;
- typedef DenseMap<std::pair<unsigned, LLT>, LegalizeAction> SIVActionMap;
+ using TypeMap = DenseMap<LLT, LegalizeAction>;
+ using SIVActionMap = DenseMap<std::pair<unsigned, LLT>, LegalizeAction>;
SmallVector<TypeMap, 1> Actions[LastOp - FirstOp + 1];
SIVActionMap ScalarInVectorActions;
DenseMap<std::pair<unsigned, LLT>, uint16_t> MaxLegalVectorElts;
DenseMap<unsigned, LegalizeAction> DefaultActions;
- bool TablesInitialized;
+ bool TablesInitialized = false;
};
+} // end namespace llvm
-} // End namespace llvm.
-
-#endif
+#endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H
diff --git a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
index f610bc02b6f2..676955c33fe9 100644
--- a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
+++ b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
@@ -1,4 +1,4 @@
-//== llvm/CodeGen/GlobalISel/RegBankSelect.h - Reg Bank Selector -*- C++ -*-==//
+//=- llvm/CodeGen/GlobalISel/RegBankSelect.h - Reg Bank Selector --*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -64,20 +64,27 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_REGBANKSELECT_H
#define LLVM_CODEGEN_GLOBALISEL_REGBANKSELECT_H
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
namespace llvm {
-// Forward declarations.
+
class BlockFrequency;
-class MachineBranchProbabilityInfo;
class MachineBlockFrequencyInfo;
+class MachineBranchProbabilityInfo;
+class MachineOperand;
class MachineRegisterInfo;
+class Pass;
+class raw_ostream;
class TargetPassConfig;
class TargetRegisterInfo;
-class raw_ostream;
/// This pass implements the reg bank selector pass used in the GlobalISel
/// pipeline. At the end of this pass, all register operands have been assigned
@@ -105,6 +112,7 @@ public:
protected:
/// Tell if the insert point has already been materialized.
bool WasMaterialized = false;
+
/// Materialize the insertion point.
///
/// If isSplit() is true, this involves actually splitting
@@ -128,7 +136,7 @@ public:
virtual MachineBasicBlock::iterator getPointImpl() = 0;
public:
- virtual ~InsertPoint() {}
+ virtual ~InsertPoint() = default;
/// The first call to this method will cause the splitting to
/// happen if need be, then sub sequent calls just return
@@ -197,6 +205,7 @@ public:
private:
/// Insertion point.
MachineInstr &Instr;
+
/// Does the insertion point is before or after Instr.
bool Before;
@@ -216,6 +225,7 @@ public:
public:
/// Create an insertion point before (\p Before=true) or after \p Instr.
InstrInsertPoint(MachineInstr &Instr, bool Before = true);
+
bool isSplit() const override;
uint64_t frequency(const Pass &P) const override;
@@ -228,6 +238,7 @@ public:
private:
/// Insertion point.
MachineBasicBlock &MBB;
+
/// Does the insertion point is at the beginning or end of MBB.
bool Beginning;
@@ -252,6 +263,7 @@ public:
assert((Beginning || MBB.getFirstTerminator() == MBB.end()) &&
"Invalid end point");
}
+
bool isSplit() const override { return false; }
uint64_t frequency(const Pass &P) const override;
bool canMaterialize() const override { return true; };
@@ -262,10 +274,12 @@ public:
private:
/// Source of the edge.
MachineBasicBlock &Src;
+
/// Destination of the edge.
/// After the materialization is done, this hold the basic block
/// that resulted from the splitting.
MachineBasicBlock *DstOrSplit;
+
/// P is used to update the analysis passes as applicable.
Pass &P;
@@ -286,9 +300,11 @@ public:
public:
EdgeInsertPoint(MachineBasicBlock &Src, MachineBasicBlock &Dst, Pass &P)
: InsertPoint(), Src(Src), DstOrSplit(&Dst), P(P) {}
+
bool isSplit() const override {
return Src.succ_size() > 1 && DstOrSplit->pred_size() > 1;
}
+
uint64_t frequency(const Pass &P) const override;
bool canMaterialize() const override;
};
@@ -311,9 +327,9 @@ public:
/// \name Convenient types for a list of insertion points.
/// @{
- typedef SmallVector<std::unique_ptr<InsertPoint>, 2> InsertionPoints;
- typedef InsertionPoints::iterator insertpt_iterator;
- typedef InsertionPoints::const_iterator const_insertpt_iterator;
+ using InsertionPoints = SmallVector<std::unique_ptr<InsertPoint>, 2>;
+ using insertpt_iterator = InsertionPoints::iterator;
+ using const_insertpt_iterator = InsertionPoints::const_iterator;
/// @}
private:
@@ -324,7 +340,7 @@ public:
/// Are all the insert points materializeable?
bool CanMaterialize;
/// Is there any of the insert points needing splitting?
- bool HasSplit;
+ bool HasSplit = false;
/// Insertion point for the repair code.
/// The repairing code needs to happen just before these points.
InsertionPoints InsertPoints;
@@ -407,10 +423,10 @@ private:
private:
/// Cost of the local instructions.
/// This cost is free of basic block frequency.
- uint64_t LocalCost;
+ uint64_t LocalCost = 0;
/// Cost of the non-local instructions.
/// This cost should include the frequency of the related blocks.
- uint64_t NonLocalCost;
+ uint64_t NonLocalCost = 0;
/// Frequency of the block where the local instructions live.
uint64_t LocalFreq;
@@ -468,22 +484,22 @@ private:
/// Interface to the target lowering info related
/// to register banks.
- const RegisterBankInfo *RBI;
+ const RegisterBankInfo *RBI = nullptr;
/// MRI contains all the register class/bank information that this
/// pass uses and updates.
- MachineRegisterInfo *MRI;
+ MachineRegisterInfo *MRI = nullptr;
/// Information on the register classes for the current function.
- const TargetRegisterInfo *TRI;
+ const TargetRegisterInfo *TRI = nullptr;
/// Get the frequency of blocks.
/// This is required for non-fast mode.
- MachineBlockFrequencyInfo *MBFI;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
/// Get the frequency of the edges.
/// This is required for non-fast mode.
- MachineBranchProbabilityInfo *MBPI;
+ MachineBranchProbabilityInfo *MBPI = nullptr;
/// Current optimization remark emitter. Used to report failures.
std::unique_ptr<MachineOptimizationRemarkEmitter> MORE;
@@ -644,6 +660,6 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
};
-} // End namespace llvm.
+} // end namespace llvm
-#endif
+#endif // LLVM_CODEGEN_GLOBALISEL_REGBANKSELECT_H
diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
index e3549d8988cd..60905c7ec226 100644
--- a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
@@ -1,4 +1,4 @@
-//==-- llvm/CodeGen/GlobalISel/RegisterBankInfo.h ----------------*- C++ -*-==//
+//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.h ---------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,26 +12,27 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_GLOBALISEL_REGBANKINFO_H
-#define LLVM_CODEGEN_GLOBALISEL_REGBANKINFO_H
+#ifndef LLVM_CODEGEN_GLOBALISEL_REGISTERBANKINFO_H
+#define LLVM_CODEGEN_GLOBALISEL_REGISTERBANKINFO_H
-#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/MachineValueType.h" // For SimpleValueType.
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/ErrorHandling.h"
-
#include <cassert>
-#include <memory> // For unique_ptr.
+#include <initializer_list>
+#include <memory>
namespace llvm {
+
class MachineInstr;
class MachineRegisterInfo;
+class raw_ostream;
+class RegisterBank;
class TargetInstrInfo;
+class TargetRegisterClass;
class TargetRegisterInfo;
-class raw_ostream;
/// Holds all the information related to register banks.
class RegisterBankInfo {
@@ -48,10 +49,12 @@ public:
/// original value. The bits are counted from less significant
/// bits to most significant bits.
unsigned StartIdx;
+
/// Length of this mapping in bits. This is how many bits this
/// partial mapping covers in the original value:
/// from StartIdx to StartIdx + Length -1.
unsigned Length;
+
/// Register bank where the partial value lives.
const RegisterBank *RegBank;
@@ -180,13 +183,16 @@ public:
/// Identifier of the mapping.
/// This is used to communicate between the target and the optimizers
/// which mapping should be realized.
- unsigned ID;
+ unsigned ID = InvalidMappingID;
+
/// Cost of this mapping.
- unsigned Cost;
+ unsigned Cost = 0;
+
/// Mapping of all the operands.
const ValueMapping *OperandsMapping;
+
/// Number of operands.
- unsigned NumOperands;
+ unsigned NumOperands = 0;
const ValueMapping &getOperandMapping(unsigned i) {
assert(i < getNumOperands() && "Out of bound operand");
@@ -213,7 +219,7 @@ public:
/// Default constructor.
/// Use this constructor to express that the mapping is invalid.
- InstructionMapping() : ID(InvalidMappingID), Cost(0), NumOperands(0) {}
+ InstructionMapping() = default;
/// Get the cost.
unsigned getCost() const { return Cost; }
@@ -264,7 +270,7 @@ public:
/// Convenient type to represent the alternatives for mapping an
/// instruction.
/// \todo When we move to TableGen this should be an array ref.
- typedef SmallVector<const InstructionMapping *, 4> InstructionMappings;
+ using InstructionMappings = SmallVector<const InstructionMapping *, 4>;
/// Helper class used to get/create the virtual registers that will be used
/// to replace the MachineOperand when applying a mapping.
@@ -273,12 +279,16 @@ public:
/// OpIdx-th operand starts. -1 means we do not have such mapping yet.
/// Note: We use a SmallVector to avoid heap allocation for most cases.
SmallVector<int, 8> OpToNewVRegIdx;
+
/// Hold the registers that will be used to map MI with InstrMapping.
SmallVector<unsigned, 8> NewVRegs;
+
/// Current MachineRegisterInfo, used to create new virtual registers.
MachineRegisterInfo &MRI;
+
/// Instruction being remapped.
MachineInstr &MI;
+
/// New mapping of the instruction.
const InstructionMapping &InstrMapping;
@@ -373,6 +383,7 @@ public:
protected:
/// Hold the set of supported register banks.
RegisterBank **RegBanks;
+
/// Total number of register banks.
unsigned NumRegBanks;
@@ -729,6 +740,7 @@ operator<<(raw_ostream &OS, const RegisterBankInfo::OperandsMapper &OpdMapper) {
/// Hashing function for PartialMapping.
/// It is required for the hashing of ValueMapping.
hash_code hash_value(const RegisterBankInfo::PartialMapping &PartMapping);
-} // End namespace llvm.
-#endif
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_GLOBALISEL_REGISTERBANKINFO_H
diff --git a/include/llvm/CodeGen/GlobalISel/Types.h b/include/llvm/CodeGen/GlobalISel/Types.h
index 7d974878d3b9..7b22e343a7f8 100644
--- a/include/llvm/CodeGen/GlobalISel/Types.h
+++ b/include/llvm/CodeGen/GlobalISel/Types.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GlobalISel/Types.h - Types used by GISel ----*- C++ -*-===//
+//===- llvm/CodeGen/GlobalISel/Types.h - Types used by GISel ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,17 +16,19 @@
#define LLVM_CODEGEN_GLOBALISEL_TYPES_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/IR/Value.h"
namespace llvm {
+class Value;
+
/// Map a value to a virtual register.
/// For now, we chose to map aggregate types to on single virtual
/// register. This might be revisited if it turns out to be inefficient.
/// PR26161 tracks that.
/// Note: We need to expose this type to the target hooks for thing like
/// ABI lowering that would be used during IRTranslation.
-typedef DenseMap<const Value *, unsigned> ValueToVReg;
+using ValueToVReg = DenseMap<const Value *, unsigned>;
+
+} // end namespace llvm
-} // End namespace llvm.
-#endif
+#endif // LLVM_CODEGEN_GLOBALISEL_TYPES_H
diff --git a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
index da8fdcdf5a33..6ad5de533d13 100644
--- a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
+++ b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
@@ -134,7 +134,7 @@ using MNV = DiagnosticInfoMIROptimization::MachineArgument;
///
/// It allows reporting when optimizations are performed and when they are not
/// along with the reasons for it. Hotness information of the corresponding
-/// code region can be included in the remark if DiagnosticHotnessRequested is
+/// code region can be included in the remark if DiagnosticsHotnessRequested is
/// enabled in the LLVM context.
class MachineOptimizationRemarkEmitter {
public:
diff --git a/include/llvm/CodeGen/MachinePassRegistry.h b/include/llvm/CodeGen/MachinePassRegistry.h
index db914b1f8bc7..3aba0bba7d1a 100644
--- a/include/llvm/CodeGen/MachinePassRegistry.h
+++ b/include/llvm/CodeGen/MachinePassRegistry.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/MachinePassRegistry.h ----------------------*- C++ -*-===//
+//===- llvm/CodeGen/MachinePassRegistry.h -----------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,13 +18,13 @@
#ifndef LLVM_CODEGEN_MACHINEPASSREGISTRY_H
#define LLVM_CODEGEN_MACHINEPASSREGISTRY_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/CommandLine.h"
namespace llvm {
-typedef void *(*MachinePassCtor)();
-
+using MachinePassCtor = void *(*)();
//===----------------------------------------------------------------------===//
///
@@ -34,36 +34,30 @@ typedef void *(*MachinePassCtor)();
//===----------------------------------------------------------------------===//
class MachinePassRegistryListener {
virtual void anchor();
+
public:
- MachinePassRegistryListener() {}
- virtual ~MachinePassRegistryListener() {}
+ MachinePassRegistryListener() = default;
+ virtual ~MachinePassRegistryListener() = default;
+
virtual void NotifyAdd(StringRef N, MachinePassCtor C, StringRef D) = 0;
virtual void NotifyRemove(StringRef N) = 0;
};
-
//===----------------------------------------------------------------------===//
///
/// MachinePassRegistryNode - Machine pass node stored in registration list.
///
//===----------------------------------------------------------------------===//
class MachinePassRegistryNode {
-
private:
-
- MachinePassRegistryNode *Next; // Next function pass in list.
+ MachinePassRegistryNode *Next = nullptr; // Next function pass in list.
StringRef Name; // Name of function pass.
StringRef Description; // Description string.
MachinePassCtor Ctor; // Function pass creator.
public:
-
MachinePassRegistryNode(const char *N, const char *D, MachinePassCtor C)
- : Next(nullptr)
- , Name(N)
- , Description(D)
- , Ctor(C)
- {}
+ : Name(N), Description(D), Ctor(C) {}
// Accessors
MachinePassRegistryNode *getNext() const { return Next; }
@@ -72,25 +66,20 @@ public:
StringRef getDescription() const { return Description; }
MachinePassCtor getCtor() const { return Ctor; }
void setNext(MachinePassRegistryNode *N) { Next = N; }
-
};
-
//===----------------------------------------------------------------------===//
///
/// MachinePassRegistry - Track the registration of machine passes.
///
//===----------------------------------------------------------------------===//
class MachinePassRegistry {
-
private:
-
MachinePassRegistryNode *List; // List of registry nodes.
MachinePassCtor Default; // Default function pass creator.
- MachinePassRegistryListener* Listener;// Listener for list adds are removes.
+ MachinePassRegistryListener *Listener; // Listener for list adds are removes.
public:
-
// NO CONSTRUCTOR - we don't want static constructor ordering to mess
// with the registry.
@@ -109,10 +98,8 @@ public:
/// Remove - Removes a function pass from the registration list.
///
void Remove(MachinePassRegistryNode *Node);
-
};
-
//===----------------------------------------------------------------------===//
///
/// RegisterPassParser class - Handle the addition of new machine passes.
@@ -142,7 +129,6 @@ public:
}
// Implement the MachinePassRegistryListener callbacks.
- //
void NotifyAdd(StringRef N, MachinePassCtor C, StringRef D) override {
this->addLiteralOption(N, (typename RegistryClass::FunctionPassCtor)C, D);
}
@@ -151,7 +137,6 @@ public:
}
};
-
} // end namespace llvm
-#endif
+#endif // LLVM_CODEGEN_MACHINEPASSREGISTRY_H
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 34cbffa78203..8590b7a348cf 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -32,7 +32,7 @@
//
// ScheduleDAGInstrs *<Target>PassConfig::
// createMachineScheduler(MachineSchedContext *C) {
-// return new ScheduleDAGMI(C, CustomStrategy(C));
+// return new ScheduleDAGMILive(C, CustomStrategy(C));
// }
//
// The DAG builder can also be customized in a sense by adding DAG mutations
diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h
index d991e4c216d9..0bdb38bfcbec 100644
--- a/include/llvm/CodeGen/MachineValueType.h
+++ b/include/llvm/CodeGen/MachineValueType.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include <cassert>
namespace llvm {
@@ -232,8 +233,7 @@ namespace llvm {
Any = 255
};
- SimpleValueType SimpleTy;
-
+ SimpleValueType SimpleTy = INVALID_SIMPLE_VALUE_TYPE;
// A class to represent the number of elements in a vector
//
@@ -270,7 +270,7 @@ namespace llvm {
}
};
- constexpr MVT() : SimpleTy(INVALID_SIMPLE_VALUE_TYPE) {}
+ constexpr MVT() = default;
constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {}
bool operator>(const MVT& S) const { return SimpleTy > S.SimpleTy; }
@@ -780,7 +780,6 @@ namespace llvm {
return getSizeInBits() <= VT.getSizeInBits();
}
-
static MVT getFloatingPointVT(unsigned BitWidth) {
switch (BitWidth) {
default:
@@ -982,9 +981,12 @@ namespace llvm {
/// A simple iterator over the MVT::SimpleValueType enum.
struct mvt_iterator {
SimpleValueType VT;
+
mvt_iterator(SimpleValueType VT) : VT(VT) {}
+
MVT operator*() const { return VT; }
bool operator!=(const mvt_iterator &LHS) const { return VT != LHS.VT; }
+
mvt_iterator& operator++() {
VT = (MVT::SimpleValueType)((int)VT + 1);
assert((int)VT <= MVT::MAX_ALLOWED_VALUETYPE &&
@@ -992,8 +994,9 @@ namespace llvm {
return *this;
}
};
+
/// A range of the MVT::SimpleValueType enum.
- typedef iterator_range<mvt_iterator> mvt_range;
+ using mvt_range = iterator_range<mvt_iterator>;
public:
/// SimpleValueType Iteration
@@ -1001,32 +1004,39 @@ namespace llvm {
static mvt_range all_valuetypes() {
return mvt_range(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE);
}
+
static mvt_range integer_valuetypes() {
return mvt_range(MVT::FIRST_INTEGER_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_INTEGER_VALUETYPE + 1));
}
+
static mvt_range fp_valuetypes() {
return mvt_range(MVT::FIRST_FP_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_FP_VALUETYPE + 1));
}
+
static mvt_range vector_valuetypes() {
return mvt_range(MVT::FIRST_VECTOR_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_VECTOR_VALUETYPE + 1));
}
+
static mvt_range integer_vector_valuetypes() {
return mvt_range(
MVT::FIRST_INTEGER_VECTOR_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_INTEGER_VECTOR_VALUETYPE + 1));
}
+
static mvt_range fp_vector_valuetypes() {
return mvt_range(
MVT::FIRST_FP_VECTOR_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1));
}
+
static mvt_range integer_scalable_vector_valuetypes() {
return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1));
}
+
static mvt_range fp_scalable_vector_valuetypes() {
return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1));
@@ -1034,6 +1044,6 @@ namespace llvm {
/// @}
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_CODEGEN_MACHINEVALUETYPE_H
diff --git a/include/llvm/CodeGen/MacroFusion.h b/include/llvm/CodeGen/MacroFusion.h
index 473784bc5841..dc105fdc68fd 100644
--- a/include/llvm/CodeGen/MacroFusion.h
+++ b/include/llvm/CodeGen/MacroFusion.h
@@ -1,4 +1,4 @@
-//===- MacroFusion.h - Macro Fusion ------------------------===//
+//===- MacroFusion.h - Macro Fusion -----------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,19 +12,26 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_MACROFUSION_H
+#define LLVM_CODEGEN_MACROFUSION_H
+
#include <functional>
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/CodeGen/MachineScheduler.h"
+#include <memory>
namespace llvm {
+class MachineInstr;
+class ScheduleDAGMutation;
+class TargetInstrInfo;
+class TargetSubtargetInfo;
+
/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
/// together. Given SecondMI, when FirstMI is unspecified, then check if
/// SecondMI may be part of a fused pair at all.
-typedef std::function<bool(const TargetInstrInfo &TII,
- const TargetSubtargetInfo &TSI,
- const MachineInstr *FirstMI,
- const MachineInstr &SecondMI)> ShouldSchedulePredTy;
+using ShouldSchedulePredTy = std::function<bool(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI)>;
/// \brief Create a DAG scheduling mutation to pair instructions back to back
/// for instructions that benefit according to the target-specific
@@ -39,3 +46,5 @@ std::unique_ptr<ScheduleDAGMutation>
createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent);
} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACROFUSION_H
diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h
index 681ccb4b997c..f5aedb07e4d2 100644
--- a/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/include/llvm/CodeGen/PseudoSourceValue.h
@@ -94,7 +94,7 @@ public:
explicit FixedStackPseudoSourceValue(int FI)
: PseudoSourceValue(FixedStack), FI(FI) {}
- static inline bool classof(const PseudoSourceValue *V) {
+ static bool classof(const PseudoSourceValue *V) {
return V->kind() == FixedStack;
}
@@ -126,7 +126,7 @@ class GlobalValuePseudoSourceValue : public CallEntryPseudoSourceValue {
public:
GlobalValuePseudoSourceValue(const GlobalValue *GV);
- static inline bool classof(const PseudoSourceValue *V) {
+ static bool classof(const PseudoSourceValue *V) {
return V->kind() == GlobalValueCallEntry;
}
@@ -140,7 +140,7 @@ class ExternalSymbolPseudoSourceValue : public CallEntryPseudoSourceValue {
public:
ExternalSymbolPseudoSourceValue(const char *ES);
- static inline bool classof(const PseudoSourceValue *V) {
+ static bool classof(const PseudoSourceValue *V) {
return V->kind() == ExternalSymbolCallEntry;
}
diff --git a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
index d82ab7d647e7..2107e5a31381 100644
--- a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
+++ b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
@@ -57,7 +57,7 @@ public:
int64_t &Off);
/// Parses tree in Ptr for base, index, offset addresses.
- static BaseIndexOffset match(SDValue Ptr);
+ static BaseIndexOffset match(SDValue Ptr, const SelectionDAG &DAG);
};
} // namespace llvm
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index af418d3050e4..d9f8af0e21d1 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1743,7 +1743,7 @@ public:
bool isConstant() const;
- static inline bool classof(const SDNode *N) {
+ static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::BUILD_VECTOR;
}
};
diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h
index c109b7489cca..aaf0ab5d5481 100644
--- a/include/llvm/CodeGen/TargetPassConfig.h
+++ b/include/llvm/CodeGen/TargetPassConfig.h
@@ -1,4 +1,4 @@
-//===-- TargetPassConfig.h - Code Generation pass options -------*- C++ -*-===//
+//===- TargetPassConfig.h - Code Generation pass options --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,19 +16,23 @@
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
+#include <cassert>
#include <string>
namespace llvm {
-class PassConfigImpl;
-class ScheduleDAGInstrs;
class LLVMTargetMachine;
struct MachineSchedContext;
+class PassConfigImpl;
+class ScheduleDAGInstrs;
// The old pass manager infrastructure is hidden in a legacy namespace now.
namespace legacy {
+
class PassManagerBase;
-}
+
+} // end namespace legacy
+
using legacy::PassManagerBase;
/// Discriminated union of Pass ID types.
@@ -50,10 +54,11 @@ class IdentifyingPassPtr {
AnalysisID ID;
Pass *P;
};
- bool IsInstance;
+ bool IsInstance = false;
+
public:
- IdentifyingPassPtr() : P(nullptr), IsInstance(false) {}
- IdentifyingPassPtr(AnalysisID IDPtr) : ID(IDPtr), IsInstance(false) {}
+ IdentifyingPassPtr() : P(nullptr) {}
+ IdentifyingPassPtr(AnalysisID IDPtr) : ID(IDPtr) {}
IdentifyingPassPtr(Pass *InstancePtr) : P(InstancePtr), IsInstance(true) {}
bool isValid() const { return P; }
@@ -63,6 +68,7 @@ public:
assert(!IsInstance && "Not a Pass ID");
return ID;
}
+
Pass *getInstance() const {
assert(IsInstance && "Not a Pass Instance");
return P;
@@ -93,31 +99,30 @@ public:
static char PostRAMachineLICMID;
private:
- PassManagerBase *PM;
+ PassManagerBase *PM = nullptr;
AnalysisID StartBefore = nullptr;
AnalysisID StartAfter = nullptr;
AnalysisID StopBefore = nullptr;
AnalysisID StopAfter = nullptr;
- bool Started;
- bool Stopped;
- bool AddingMachinePasses;
+ bool Started = true;
+ bool Stopped = false;
+ bool AddingMachinePasses = false;
protected:
LLVMTargetMachine *TM;
- PassConfigImpl *Impl; // Internal data structures
- bool Initialized; // Flagged after all passes are configured.
+ PassConfigImpl *Impl = nullptr; // Internal data structures
+ bool Initialized = false; // Flagged after all passes are configured.
// Target Pass Options
// Targets provide a default setting, user flags override.
- //
- bool DisableVerify;
+ bool DisableVerify = false;
/// Default setting for -enable-tail-merge on this target.
- bool EnableTailMerge;
+ bool EnableTailMerge = true;
/// Require processing of functions such that callees are generated before
/// callers.
- bool RequireCodeGenSCCOrder;
+ bool RequireCodeGenSCCOrder = false;
/// Add the actual instruction selection passes. This does not include
/// preparation passes on IR.
@@ -296,7 +301,6 @@ public:
/// printAndVerify - Add a pass to dump then verify the machine function, if
/// those steps are enabled.
- ///
void printAndVerify(const std::string &Banner);
/// Add a pass to print the machine function if printing is enabled.
@@ -430,4 +434,4 @@ protected:
} // end namespace llvm
-#endif
+#endif // LLVM_CODEGEN_TARGETPASSCONFIG_H
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index b404b4ca701f..40d501edde10 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -17,7 +17,10 @@
#define LLVM_CODEGEN_VALUETYPES_H
#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
#include <cassert>
+#include <cstdint>
#include <string>
namespace llvm {
@@ -30,13 +33,13 @@ namespace llvm {
/// can represent.
struct EVT {
private:
- MVT V;
- Type *LLVMTy;
+ MVT V = MVT::INVALID_SIMPLE_VALUE_TYPE;
+ Type *LLVMTy = nullptr;
public:
- constexpr EVT() : V(MVT::INVALID_SIMPLE_VALUE_TYPE), LLVMTy(nullptr) {}
- constexpr EVT(MVT::SimpleValueType SVT) : V(SVT), LLVMTy(nullptr) {}
- constexpr EVT(MVT S) : V(S), LLVMTy(nullptr) {}
+ constexpr EVT() = default;
+ constexpr EVT(MVT::SimpleValueType SVT) : V(SVT) {}
+ constexpr EVT(MVT S) : V(S) {}
bool operator==(EVT VT) const {
return !(*this != VT);
@@ -246,7 +249,6 @@ namespace llvm {
return getSizeInBits() <= VT.getSizeInBits();
}
-
/// Return the SimpleValueType held in the specified simple EVT.
MVT getSimpleVT() const {
assert(isSimple() && "Expected a SimpleValueType!");
@@ -430,6 +432,6 @@ namespace llvm {
unsigned getExtendedSizeInBits() const LLVM_READONLY;
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_CODEGEN_VALUETYPES_H
diff --git a/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h b/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
index b2d3f5ea34a8..7c8cd121751a 100644
--- a/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
@@ -25,7 +25,9 @@ public:
CVSymbolVisitor(SymbolVisitorCallbacks &Callbacks);
Error visitSymbolRecord(CVSymbol &Record);
+ Error visitSymbolRecord(CVSymbol &Record, uint32_t Offset);
Error visitSymbolStream(const CVSymbolArray &Symbols);
+ Error visitSymbolStream(const CVSymbolArray &Symbols, uint32_t InitialOffset);
private:
SymbolVisitorCallbacks &Callbacks;
diff --git a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
index 9fc90f13d347..78b284563afd 100644
--- a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
@@ -12,13 +12,19 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <vector>
namespace llvm {
+
namespace codeview {
class DebugStringTableSubsection;
@@ -28,24 +34,22 @@ struct FileChecksumEntry {
FileChecksumKind Kind; // The type of checksum.
ArrayRef<uint8_t> Checksum; // The bytes of the checksum.
};
-}
-}
-namespace llvm {
+} // end namespace codeview
+
template <> struct VarStreamArrayExtractor<codeview::FileChecksumEntry> {
public:
- typedef void ContextType;
+ using ContextType = void;
Error operator()(BinaryStreamRef Stream, uint32_t &Len,
codeview::FileChecksumEntry &Item);
};
-}
-namespace llvm {
namespace codeview {
+
class DebugChecksumsSubsectionRef final : public DebugSubsectionRef {
- typedef VarStreamArray<codeview::FileChecksumEntry> FileChecksumArray;
- typedef FileChecksumArray::Iterator Iterator;
+ using FileChecksumArray = VarStreamArray<codeview::FileChecksumEntry>;
+ using Iterator = FileChecksumArray::Iterator;
public:
DebugChecksumsSubsectionRef()
@@ -89,10 +93,12 @@ private:
DenseMap<uint32_t, uint32_t> OffsetMap;
uint32_t SerializedSize = 0;
- llvm::BumpPtrAllocator Storage;
+ BumpPtrAllocator Storage;
std::vector<FileChecksumEntry> Checksums;
};
-}
-}
-#endif
+} // end namespace codeview
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGCHECKSUMSSUBSECTION_H
diff --git a/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h b/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
index f755b23422c7..2f9e9814d998 100644
--- a/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
@@ -10,18 +10,21 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSEXSUBSECTION_H
#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSEXSUBSECTION_H
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/Endian.h"
-
+#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
#include <map>
namespace llvm {
namespace codeview {
+
class DebugCrossModuleExportsSubsectionRef final : public DebugSubsectionRef {
- typedef FixedStreamArray<CrossModuleExport> ReferenceArray;
- typedef ReferenceArray::Iterator Iterator;
+ using ReferenceArray = FixedStreamArray<CrossModuleExport>;
+ using Iterator = ReferenceArray::Iterator;
public:
DebugCrossModuleExportsSubsectionRef()
@@ -58,7 +61,8 @@ public:
private:
std::map<uint32_t, uint32_t> Mappings;
};
-}
-}
-#endif
+} // end namespace codeview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSEXSUBSECTION_H
diff --git a/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h b/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
index ea3a9a43d50b..8be7ef265c82 100644
--- a/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
@@ -11,38 +11,43 @@
#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSIMPSUBSECTION_H
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <vector>
namespace llvm {
+
namespace codeview {
struct CrossModuleImportItem {
const CrossModuleImport *Header = nullptr;
- llvm::FixedStreamArray<support::ulittle32_t> Imports;
+ FixedStreamArray<support::ulittle32_t> Imports;
};
-}
-}
-namespace llvm {
+} // end namespace codeview
+
template <> struct VarStreamArrayExtractor<codeview::CrossModuleImportItem> {
public:
- typedef void ContextType;
+ using ContextType = void;
Error operator()(BinaryStreamRef Stream, uint32_t &Len,
codeview::CrossModuleImportItem &Item);
};
-}
-namespace llvm {
namespace codeview {
+
class DebugStringTableSubsection;
class DebugCrossModuleImportsSubsectionRef final : public DebugSubsectionRef {
- typedef VarStreamArray<CrossModuleImportItem> ReferenceArray;
- typedef ReferenceArray::Iterator Iterator;
+ using ReferenceArray = VarStreamArray<CrossModuleImportItem>;
+ using Iterator = ReferenceArray::Iterator;
public:
DebugCrossModuleImportsSubsectionRef()
@@ -82,7 +87,9 @@ private:
DebugStringTableSubsection &Strings;
StringMap<std::vector<support::ulittle32_t>> Mappings;
};
-}
-}
-#endif
+} // end namespace codeview
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSIMPSUBSECTION_H
diff --git a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
index 7484af663105..b88c0eae1de2 100644
--- a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
@@ -7,19 +7,26 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_CODEVIEW_BUGINLINEELINESSUBSECTION_H
-#define LLVM_DEBUGINFO_CODEVIEW_BUGINLINEELINESSUBSECTION_H
+#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGINLINEELINESSUBSECTION_H
+#define LLVM_DEBUGINFO_CODEVIEW_DEBUGINLINEELINESSUBSECTION_H
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/DebugInfo/CodeView/Line.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <vector>
namespace llvm {
+
namespace codeview {
-class DebugInlineeLinesSubsectionRef;
class DebugChecksumsSubsection;
enum class InlineeLinesSignature : uint32_t {
@@ -40,18 +47,21 @@ struct InlineeSourceLine {
const InlineeSourceLineHeader *Header;
FixedStreamArray<support::ulittle32_t> ExtraFiles;
};
-}
+
+} // end namespace codeview
template <> struct VarStreamArrayExtractor<codeview::InlineeSourceLine> {
Error operator()(BinaryStreamRef Stream, uint32_t &Len,
codeview::InlineeSourceLine &Item);
+
bool HasExtraFiles = false;
};
namespace codeview {
+
class DebugInlineeLinesSubsectionRef final : public DebugSubsectionRef {
- typedef VarStreamArray<InlineeSourceLine> LinesArray;
- typedef LinesArray::Iterator Iterator;
+ using LinesArray = VarStreamArray<InlineeSourceLine>;
+ using Iterator = LinesArray::Iterator;
public:
DebugInlineeLinesSubsectionRef();
@@ -99,13 +109,13 @@ public:
private:
DebugChecksumsSubsection &Checksums;
-
bool HasExtraFiles = false;
uint32_t ExtraFileCount = 0;
-
std::vector<Entry> Entries;
};
-}
-}
-#endif
+} // end namespace codeview
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGINLINEELINESSUBSECTION_H
diff --git a/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
index f1feb1336cc5..53044b6c3dc8 100644
--- a/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
@@ -1,4 +1,4 @@
-//===- DebugLinesSubsection.h --------------------------------*- C++ -*-===//
+//===- DebugLinesSubsection.h -----------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,14 +7,20 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGLINEFRAGMENT_H
-#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGLINEFRAGMENT_H
+#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGLINESSUBSECTION_H
+#define LLVM_DEBUGINFO_CODEVIEW_DEBUGLINESSUBSECTION_H
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <vector>
namespace llvm {
namespace codeview {
@@ -72,8 +78,9 @@ public:
class DebugLinesSubsectionRef final : public DebugSubsectionRef {
friend class LineColumnExtractor;
- typedef VarStreamArray<LineColumnEntry, LineColumnExtractor> LineInfoArray;
- typedef LineInfoArray::Iterator Iterator;
+
+ using LineInfoArray = VarStreamArray<LineColumnEntry, LineColumnExtractor>;
+ using Iterator = LineInfoArray::Iterator;
public:
DebugLinesSubsectionRef();
@@ -130,14 +137,14 @@ public:
private:
DebugChecksumsSubsection &Checksums;
-
uint32_t RelocOffset = 0;
uint16_t RelocSegment = 0;
uint32_t CodeSize = 0;
LineFlags Flags = LF_None;
std::vector<Block> Blocks;
};
-}
-}
-#endif
+} // end namespace codeview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGLINESSUBSECTION_H
diff --git a/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
index be0a2344965b..7f0f10e4fbfa 100644
--- a/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
@@ -12,17 +12,15 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
-
-#include <stdint.h>
+#include <cstdint>
namespace llvm {
class BinaryStreamReader;
-class BinaryStreamRef;
-class BinaryStreamWriter;
namespace codeview {
@@ -83,7 +81,9 @@ private:
StringMap<uint32_t> Strings;
uint32_t StringSize = 1;
};
-}
-}
-#endif
+} // end namespace codeview
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGSTRINGTABLESUBSECTION_H
diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
index ee17b47d8e63..fc0cf0d1d90e 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
@@ -1,4 +1,4 @@
-//===- DebugSubsection.h ------------------------------------*- C++ -*-===//
+//===- DebugSubsectionRecord.h ----------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,17 +7,22 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTRECORD_H
-#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTRECORD_H
+#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSUBSECTIONRECORD_H
+#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSUBSECTIONRECORD_H
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamRef.h"
-#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdint>
+#include <memory>
namespace llvm {
+
+class BinaryStreamWriter;
+
namespace codeview {
class DebugSubsection;
@@ -42,8 +47,8 @@ public:
BinaryStreamRef getRecordData() const;
private:
- CodeViewContainer Container;
- DebugSubsectionKind Kind;
+ CodeViewContainer Container = CodeViewContainer::ObjectFile;
+ DebugSubsectionKind Kind = DebugSubsectionKind::None;
BinaryStreamRef Data;
};
@@ -71,7 +76,7 @@ private:
CodeViewContainer Container;
};
-} // namespace codeview
+} // end namespace codeview
template <> struct VarStreamArrayExtractor<codeview::DebugSubsectionRecord> {
Error operator()(BinaryStreamRef Stream, uint32_t &Length,
@@ -88,8 +93,11 @@ template <> struct VarStreamArrayExtractor<codeview::DebugSubsectionRecord> {
};
namespace codeview {
-typedef VarStreamArray<DebugSubsectionRecord> DebugSubsectionArray;
-}
-} // namespace llvm
-#endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTRECORD_H
+using DebugSubsectionArray = VarStreamArray<DebugSubsectionRecord>;
+
+} // end namespace codeview
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGSUBSECTIONRECORD_H
diff --git a/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h b/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h
index ad58a293cb09..a4c04b55eb4c 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h
@@ -10,17 +10,23 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLRVASUBSECTION_H
#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLRVASUBSECTION_H
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <vector>
namespace llvm {
+
+class BinaryStreamReader;
+
namespace codeview {
class DebugSymbolRVASubsectionRef final : public DebugSubsectionRef {
public:
- typedef FixedStreamArray<support::ulittle32_t> ArrayType;
+ using ArrayType = FixedStreamArray<support::ulittle32_t>;
DebugSymbolRVASubsectionRef();
@@ -53,7 +59,9 @@ public:
private:
std::vector<support::ulittle32_t> RVAs;
};
-} // namespace codeview
-} // namespace llvm
-#endif
+} // end namespace codeview
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLRVASUBSECTION_H
diff --git a/include/llvm/DebugInfo/CodeView/EnumTables.h b/include/llvm/DebugInfo/CodeView/EnumTables.h
index 5d54bb4cca84..ee0f0f7c6023 100644
--- a/include/llvm/DebugInfo/CodeView/EnumTables.h
+++ b/include/llvm/DebugInfo/CodeView/EnumTables.h
@@ -1,4 +1,4 @@
-//===- EnumTables.h Enum to string conversion tables ------------*- C++ -*-===//
+//===- EnumTables.h - Enum to string conversion tables ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,11 +14,11 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/Support/ScopedPrinter.h"
-
-#include <stdint.h>
+#include <cstdint>
namespace llvm {
namespace codeview {
+
ArrayRef<EnumEntry<SymbolKind>> getSymbolTypeNames();
ArrayRef<EnumEntry<TypeLeafKind>> getTypeLeafNames();
ArrayRef<EnumEntry<uint16_t>> getRegisterNames();
@@ -38,7 +38,8 @@ ArrayRef<EnumEntry<uint8_t>> getThunkOrdinalNames();
ArrayRef<EnumEntry<uint16_t>> getTrampolineNames();
ArrayRef<EnumEntry<COFF::SectionCharacteristics>>
getImageSectionCharacteristicNames();
-} // namespace codeview
-} // namespace llvm
+
+} // end namespace codeview
+} // end namespace llvm
#endif // LLVM_DEBUGINFO_CODEVIEW_ENUMTABLES_H
diff --git a/include/llvm/DebugInfo/CodeView/Formatters.h b/include/llvm/DebugInfo/CodeView/Formatters.h
index 1fbb0dd6f9b0..0842c1e373db 100644
--- a/include/llvm/DebugInfo/CodeView/Formatters.h
+++ b/include/llvm/DebugInfo/CodeView/Formatters.h
@@ -14,21 +14,27 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/FormatAdapters.h"
-#include "llvm/Support/FormatProviders.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
namespace llvm {
+
namespace codeview {
+
namespace detail {
-class GuidAdapter final : public llvm::FormatAdapter<ArrayRef<uint8_t>> {
+
+class GuidAdapter final : public FormatAdapter<ArrayRef<uint8_t>> {
ArrayRef<uint8_t> Guid;
public:
explicit GuidAdapter(ArrayRef<uint8_t> Guid);
explicit GuidAdapter(StringRef Guid);
- void format(llvm::raw_ostream &Stream, StringRef Style);
+
+ void format(raw_ostream &Stream, StringRef Style) override ;
};
-}
+
+} // end namespace detail
inline detail::GuidAdapter fmt_guid(StringRef Item) {
return detail::GuidAdapter(Item);
@@ -37,11 +43,12 @@ inline detail::GuidAdapter fmt_guid(StringRef Item) {
inline detail::GuidAdapter fmt_guid(ArrayRef<uint8_t> Item) {
return detail::GuidAdapter(Item);
}
-}
+
+} // end namespace codeview
template <> struct format_provider<codeview::TypeIndex> {
public:
- static void format(const codeview::TypeIndex &V, llvm::raw_ostream &Stream,
+ static void format(const codeview::TypeIndex &V, raw_ostream &Stream,
StringRef Style) {
if (V.isNoneType())
Stream << "<no type>";
@@ -52,6 +59,7 @@ public:
}
}
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_FORMATTERS_H
diff --git a/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h b/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h
index 8b1540abf903..cc0c24301d49 100644
--- a/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h
+++ b/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h
@@ -1,4 +1,4 @@
-//===- LazyRandomTypeCollection.h ---------------------------- *- C++ --*-===//
+//===- LazyRandomTypeCollection.h -------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,12 +10,18 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_LAZYRANDOMTYPECOLLECTION_H
#define LLVM_DEBUGINFO_CODEVIEW_LAZYRANDOMTYPECOLLECTION_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/StringSaver.h"
+#include <cstdint>
+#include <vector>
namespace llvm {
namespace codeview {
@@ -43,7 +49,8 @@ namespace codeview {
/// into M chunks of roughly equal size, this yields a worst case lookup time
/// of O(N/M) and an amortized time of O(1).
class LazyRandomTypeCollection : public TypeCollection {
- typedef FixedStreamArray<TypeIndexOffset> PartialOffsetArray;
+ using PartialOffsetArray = FixedStreamArray<TypeIndexOffset>;
+
struct CacheEntry {
CVType Type;
uint32_t Offset;
diff --git a/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h b/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h
index 708b317164fc..1a8388224665 100644
--- a/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h
+++ b/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h
@@ -7,23 +7,18 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_CODEVIEW_STRINGS_AND_CHECKSUMS_H
-#define LLVM_DEBUGINFO_CODEVIEW_STRINGS_AND_CHECKSUMS_H
+#ifndef LLVM_DEBUGINFO_CODEVIEW_STRINGSANDCHECKSUMS_H
+#define LLVM_DEBUGINFO_CODEVIEW_STRINGSANDCHECKSUMS_H
#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
-
#include <memory>
namespace llvm {
namespace codeview {
-class DebugSubsectionRecord;
-class DebugChecksumsSubsectionRef;
-class DebugStringTableSubsectionRef;
-class DebugChecksumsSubsection;
-class DebugStringTableSubsection;
-
class StringsAndChecksumsRef {
public:
// If no subsections are known about initially, we find as much as we can.
@@ -83,8 +78,9 @@ class StringsAndChecksums {
public:
using StringsPtr = std::shared_ptr<DebugStringTableSubsection>;
using ChecksumsPtr = std::shared_ptr<DebugChecksumsSubsection>;
+
// If no subsections are known about initially, we find as much as we can.
- StringsAndChecksums() {}
+ StringsAndChecksums() = default;
void setStrings(const StringsPtr &SP) { Strings = SP; }
void setChecksums(const ChecksumsPtr &CP) { Checksums = CP; }
@@ -100,7 +96,7 @@ private:
ChecksumsPtr Checksums;
};
-} // namespace codeview
-} // namespace llvm
+} // end namespace codeview
+} // end namespace llvm
-#endif
+#endif // LLVM_DEBUGINFO_CODEVIEW_STRINGSANDCHECKSUMS_H
diff --git a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
index 7080b0480757..5b6599d8c1db 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
@@ -51,6 +51,10 @@ public:
CodeViewContainer Container)
: Delegate(Delegate), Container(Container) {}
+ Error visitSymbolBegin(CVSymbol &Record, uint32_t Offset) override {
+ return visitSymbolBegin(Record);
+ }
+
Error visitSymbolBegin(CVSymbol &Record) override {
assert(!Mapping && "Already in a symbol mapping!");
Mapping = llvm::make_unique<MappingInfo>(Record.content(), Container);
diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index 1cf77fcdecbe..7941af8be8af 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -21,8 +21,6 @@
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <cstddef>
#include <cstdint>
#include <vector>
@@ -35,6 +33,7 @@ protected:
public:
SymbolRecordKind getKind() const { return Kind; }
+
SymbolRecordKind Kind;
};
@@ -153,6 +152,7 @@ public:
: SymbolRecord(Kind), RecordOffset(RecordOffset) {}
std::vector<TypeIndex> Indices;
+
uint32_t RecordOffset;
};
@@ -165,8 +165,8 @@ struct BinaryAnnotationIterator {
int32_t S1;
};
- BinaryAnnotationIterator(ArrayRef<uint8_t> Annotations) : Data(Annotations) {}
BinaryAnnotationIterator() = default;
+ BinaryAnnotationIterator(ArrayRef<uint8_t> Annotations) : Data(Annotations) {}
BinaryAnnotationIterator(const BinaryAnnotationIterator &Other)
: Data(Other.Data) {}
@@ -342,9 +342,9 @@ public:
: SymbolRecord(SymbolRecordKind::InlineSiteSym),
RecordOffset(RecordOffset) {}
- llvm::iterator_range<BinaryAnnotationIterator> annotations() const {
- return llvm::make_range(BinaryAnnotationIterator(AnnotationData),
- BinaryAnnotationIterator());
+ iterator_range<BinaryAnnotationIterator> annotations() const {
+ return make_range(BinaryAnnotationIterator(AnnotationData),
+ BinaryAnnotationIterator());
}
uint32_t Parent;
@@ -479,6 +479,7 @@ public:
ulittle16_t Register;
ulittle16_t MayHaveNoName;
};
+
explicit DefRangeRegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
DefRangeRegisterSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::DefRangeRegisterSym),
@@ -501,6 +502,7 @@ public:
ulittle16_t MayHaveNoName;
ulittle32_t OffsetInParent;
};
+
explicit DefRangeSubfieldRegisterSym(SymbolRecordKind Kind)
: SymbolRecord(Kind) {}
DefRangeSubfieldRegisterSym(uint32_t RecordOffset)
@@ -546,6 +548,7 @@ public:
ulittle16_t Flags;
little32_t BasePointerOffset;
};
+
explicit DefRangeRegisterRelSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
explicit DefRangeRegisterRelSym(uint32_t RecordOffset)
: SymbolRecord(SymbolRecordKind::DefRangeRegisterRelSym),
@@ -935,8 +938,8 @@ public:
uint32_t RecordOffset;
};
-typedef CVRecord<SymbolKind> CVSymbol;
-typedef VarStreamArray<CVSymbol> CVSymbolArray;
+using CVSymbol = CVRecord<SymbolKind>;
+using CVSymbolArray = VarStreamArray<CVSymbol>;
} // end namespace codeview
} // end namespace llvm
diff --git a/include/llvm/DebugInfo/CodeView/SymbolSerializer.h b/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
index 42adbdb4e20f..b63ced5217b4 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
@@ -1,4 +1,4 @@
-//===- symbolSerializer.h ---------------------------------------*- C++ -*-===//
+//===- SymbolSerializer.h ---------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,21 +10,20 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLSERIALIZER_H
#define LLVM_DEBUGINFO_CODEVIEW_SYMBOLSERIALIZER_H
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h"
#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
-
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <vector>
namespace llvm {
-class BinaryStreamWriter;
namespace codeview {
class SymbolSerializer : public SymbolVisitorCallbacks {
@@ -45,6 +44,8 @@ class SymbolSerializer : public SymbolVisitorCallbacks {
}
public:
+ SymbolSerializer(BumpPtrAllocator &Storage, CodeViewContainer Container);
+
template <typename SymType>
static CVSymbol writeOneSymbol(SymType &Sym, BumpPtrAllocator &Storage,
CodeViewContainer Container) {
@@ -57,13 +58,11 @@ public:
return Result;
}
- SymbolSerializer(BumpPtrAllocator &Storage, CodeViewContainer Container);
-
- virtual Error visitSymbolBegin(CVSymbol &Record) override;
- virtual Error visitSymbolEnd(CVSymbol &Record) override;
+ Error visitSymbolBegin(CVSymbol &Record) override;
+ Error visitSymbolEnd(CVSymbol &Record) override;
#define SYMBOL_RECORD(EnumName, EnumVal, Name) \
- virtual Error visitKnownRecord(CVSymbol &CVR, Name &Record) override { \
+ Error visitKnownRecord(CVSymbol &CVR, Name &Record) override { \
return visitKnownRecordImpl(CVR, Record); \
}
#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
@@ -75,7 +74,8 @@ private:
return Mapping.visitKnownRecord(CVR, Record);
}
};
-}
-}
-#endif
+} // end namespace codeview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_SYMBOLSERIALIZER_H
diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h
index 5f4205bd6e08..e29511a67b7f 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h
@@ -30,6 +30,14 @@ public:
return Error::success();
}
+ Error visitSymbolBegin(CVSymbol &Record, uint32_t Offset) override {
+ for (auto Visitor : Pipeline) {
+ if (auto EC = Visitor->visitSymbolBegin(Record, Offset))
+ return EC;
+ }
+ return Error::success();
+ }
+
Error visitSymbolBegin(CVSymbol &Record) override {
for (auto Visitor : Pipeline) {
if (auto EC = Visitor->visitSymbolBegin(Record))
diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h
index 2ef7eabdaa9d..0816f7c62656 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h
@@ -29,8 +29,10 @@ public:
/// Paired begin/end actions for all symbols. Receives all record data,
/// including the fixed-length record prefix. visitSymbolBegin() should
- /// return
- /// the type of the Symbol, or an error if it cannot be determined.
+ /// return the type of the Symbol, or an error if it cannot be determined.
+ virtual Error visitSymbolBegin(CVSymbol &Record, uint32_t Offset) {
+ return Error::success();
+ }
virtual Error visitSymbolBegin(CVSymbol &Record) { return Error::success(); }
virtual Error visitSymbolEnd(CVSymbol &Record) { return Error::success(); }
diff --git a/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h b/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
index c393b42cd27c..afe8942159e8 100644
--- a/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
+++ b/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
@@ -28,6 +28,8 @@ void discoverTypeIndices(ArrayRef<uint8_t> RecordData,
SmallVectorImpl<TiReference> &Refs);
void discoverTypeIndices(const CVType &Type,
SmallVectorImpl<TiReference> &Refs);
+void discoverTypeIndices(const CVType &Type,
+ SmallVectorImpl<TypeIndex> &Indices);
/// Discover type indices in symbol records. Returns false if this is an unknown
/// record.
diff --git a/include/llvm/DebugInfo/CodeView/TypeRecord.h b/include/llvm/DebugInfo/CodeView/TypeRecord.h
index 3a64a437aa4d..2efeb1b3cefd 100644
--- a/include/llvm/DebugInfo/CodeView/TypeRecord.h
+++ b/include/llvm/DebugInfo/CodeView/TypeRecord.h
@@ -15,6 +15,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
@@ -25,31 +26,30 @@
#include <vector>
namespace llvm {
-
-class BinaryStreamReader;
-
namespace codeview {
using support::little32_t;
using support::ulittle16_t;
using support::ulittle32_t;
-typedef CVRecord<TypeLeafKind> CVType;
-typedef RemappedRecord<TypeLeafKind> RemappedType;
+using CVType = CVRecord<TypeLeafKind>;
+using RemappedType = RemappedRecord<TypeLeafKind>;
struct CVMemberRecord {
TypeLeafKind Kind;
ArrayRef<uint8_t> Data;
};
-typedef VarStreamArray<CVType> CVTypeArray;
-typedef iterator_range<CVTypeArray::Iterator> CVTypeRange;
+using CVTypeArray = VarStreamArray<CVType>;
+using CVTypeRange = iterator_range<CVTypeArray::Iterator>;
/// Equvalent to CV_fldattr_t in cvinfo.h.
struct MemberAttributes {
uint16_t Attrs = 0;
+
enum {
MethodKindShift = 2,
};
+
MemberAttributes() = default;
explicit MemberAttributes(MemberAccess Access)
@@ -226,6 +226,7 @@ public:
TypeIndex getClassType() const { return ClassType; }
TypeIndex getFunctionType() const { return FunctionType; }
StringRef getName() const { return Name; }
+
TypeIndex ClassType;
TypeIndex FunctionType;
StringRef Name;
@@ -330,7 +331,6 @@ public:
TypeIndex ReferentType;
uint32_t Attrs;
-
Optional<MemberPointerInfo> MemberInfo;
private:
@@ -490,6 +490,7 @@ public:
UnderlyingType(UnderlyingType) {}
TypeIndex getUnderlyingType() const { return UnderlyingType; }
+
TypeIndex UnderlyingType;
};
@@ -505,6 +506,7 @@ public:
TypeIndex getType() const { return Type; }
uint8_t getBitOffset() const { return BitOffset; }
uint8_t getBitSize() const { return BitSize; }
+
TypeIndex Type;
uint8_t BitSize;
uint8_t BitOffset;
@@ -527,6 +529,7 @@ public:
}
uint32_t getEntryCount() const { return getSlots().size(); }
+
ArrayRef<VFTableSlotKind> SlotsRef;
std::vector<VFTableSlotKind> Slots;
};
@@ -541,9 +544,7 @@ public:
Name(Name) {}
StringRef getGuid() const { return Guid; }
-
uint32_t getAge() const { return Age; }
-
StringRef getName() const { return Name; }
StringRef Guid;
@@ -560,8 +561,8 @@ public:
: TypeRecord(TypeRecordKind::StringId), Id(Id), String(String) {}
TypeIndex getId() const { return Id; }
-
StringRef getString() const { return String; }
+
TypeIndex Id;
StringRef String;
};
@@ -576,9 +577,7 @@ public:
FunctionType(FunctionType), Name(Name) {}
TypeIndex getParentScope() const { return ParentScope; }
-
TypeIndex getFunctionType() const { return FunctionType; }
-
StringRef getName() const { return Name; }
TypeIndex ParentScope;
@@ -635,6 +634,7 @@ public:
ArgIndices(ArgIndices.begin(), ArgIndices.end()) {}
ArrayRef<TypeIndex> getArgs() const { return ArgIndices; }
+
SmallVector<TypeIndex, 4> ArgIndices;
};
@@ -656,6 +656,7 @@ public:
TypeIndex getOverriddenVTable() const { return OverriddenVFTable; }
uint32_t getVFPtrOffset() const { return VFPtrOffset; }
StringRef getName() const { return makeArrayRef(MethodNames).front(); }
+
ArrayRef<StringRef> getMethodNames() const {
return makeArrayRef(MethodNames).drop_front();
}
@@ -707,6 +708,7 @@ public:
: TypeRecord(TypeRecordKind::MethodOverloadList), Methods(Methods) {}
ArrayRef<OneMethodRecord> getMethods() const { return Methods; }
+
std::vector<OneMethodRecord> Methods;
};
@@ -723,6 +725,7 @@ public:
uint16_t getNumOverloads() const { return NumOverloads; }
TypeIndex getMethodList() const { return MethodList; }
StringRef getName() const { return Name; }
+
uint16_t NumOverloads;
TypeIndex MethodList;
StringRef Name;
@@ -874,7 +877,6 @@ public:
};
} // end namespace codeview
-
} // end namespace llvm
#endif // LLVM_DEBUGINFO_CODEVIEW_TYPERECORD_H
diff --git a/include/llvm/DebugInfo/CodeView/TypeSerializer.h b/include/llvm/DebugInfo/CodeView/TypeSerializer.h
index 988a2d4aa834..0e734a8170bd 100644
--- a/include/llvm/DebugInfo/CodeView/TypeSerializer.h
+++ b/include/llvm/DebugInfo/CodeView/TypeSerializer.h
@@ -10,19 +10,25 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPESERIALIZER_H
#define LLVM_DEBUGINFO_CODEVIEW_TYPESERIALIZER_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
-
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Allocator.h"
#include "llvm/Support/Error.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <vector>
namespace llvm {
-
namespace codeview {
class TypeHasher;
@@ -46,7 +52,7 @@ class TypeSerializer : public TypeVisitorCallbacks {
}
};
- typedef SmallVector<MutableArrayRef<uint8_t>, 2> MutableRecordList;
+ using MutableRecordList = SmallVector<MutableArrayRef<uint8_t>, 2>;
static constexpr uint8_t ContinuationLength = 8;
BumpPtrAllocator &RecordStorage;
@@ -82,7 +88,7 @@ class TypeSerializer : public TypeVisitorCallbacks {
public:
explicit TypeSerializer(BumpPtrAllocator &Storage, bool Hash = true);
- ~TypeSerializer();
+ ~TypeSerializer() override;
void reset();
@@ -146,7 +152,8 @@ private:
return Error::success();
}
};
-}
-}
-#endif
+} // end namespace codeview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_TYPESERIALIZER_H
diff --git a/include/llvm/DebugInfo/CodeView/TypeServerHandler.h b/include/llvm/DebugInfo/CodeView/TypeServerHandler.h
index 35f06eaf6eb4..e96baad9ceae 100644
--- a/include/llvm/DebugInfo/CodeView/TypeServerHandler.h
+++ b/include/llvm/DebugInfo/CodeView/TypeServerHandler.h
@@ -10,16 +10,17 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPESERVERHANDLER_H
#define LLVM_DEBUGINFO_CODEVIEW_TYPESERVERHANDLER_H
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/Support/Error.h"
namespace llvm {
namespace codeview {
+
+class TypeServer2Record;
class TypeVisitorCallbacks;
class TypeServerHandler {
public:
- virtual ~TypeServerHandler() {}
+ virtual ~TypeServerHandler() = default;
/// Handle a TypeServer record. If the implementation returns true
/// the record will not be processed by the top-level visitor. If
@@ -30,7 +31,8 @@ public:
return false;
}
};
-}
-}
-#endif
+} // end namespace codeview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_TYPESERVERHANDLER_H
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
index 8bde63efe188..eb6d0f541c1e 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
@@ -12,8 +12,8 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
-#include "llvm/Support/DataExtractor.h"
#include <cstdint>
#include <utility>
@@ -41,20 +41,31 @@ class DWARFAcceleratorTable {
struct Header Hdr;
struct HeaderData HdrData;
- DataExtractor AccelSection;
+ DWARFDataExtractor AccelSection;
DataExtractor StringSection;
- const RelocAddrMap& Relocs;
public:
- DWARFAcceleratorTable(DataExtractor AccelSection, DataExtractor StringSection,
- const RelocAddrMap &Relocs)
- : AccelSection(AccelSection), StringSection(StringSection), Relocs(Relocs) {}
+ DWARFAcceleratorTable(const DWARFDataExtractor &AccelSection,
+ DataExtractor StringSection)
+ : AccelSection(AccelSection), StringSection(StringSection) {}
bool extract();
uint32_t getNumBuckets();
uint32_t getNumHashes();
uint32_t getSizeHdr();
uint32_t getHeaderDataLength();
+ ArrayRef<std::pair<HeaderData::AtomType, HeaderData::Form>> getAtomsDesc();
+ bool validateForms();
+
+ /// Return information related to the DWARF DIE we're looking for when
+ /// performing a lookup by name.
+ ///
+ /// \param HashDataOffset an offset into the hash data table
+ /// \returns DIEOffset the offset into the .debug_info section for the DIE
+ /// related to the input hash data offset. Currently this function returns
+ /// only the DIEOffset but it can be modified to return more data regarding
+ /// the DIE
+ uint32_t readAtoms(uint32_t &HashDataOffset);
void dump(raw_ostream &OS) const;
};
diff --git a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
index b4e4721e3d51..a18adf87bf8e 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
@@ -20,8 +20,8 @@ public:
DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, const DWARFSection *RS,
StringRef SS, const DWARFSection &SOS,
- const DWARFSection *AOS, StringRef LS, bool LE, bool IsDWO,
- const DWARFUnitSectionBase &UnitSection,
+ const DWARFSection *AOS, const DWARFSection &LS, bool LE,
+ bool IsDWO, const DWARFUnitSectionBase &UnitSection,
const DWARFUnitIndex::Entry *Entry)
: DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, IsDWO,
UnitSection, Entry) {}
diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 4bf34d52bcba..739aa1f9ee74 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -45,12 +45,6 @@ class DataExtractor;
class MemoryBuffer;
class raw_ostream;
-/// Reads a value from data extractor and applies a relocation to the result if
-/// one exists for the given offset.
-uint64_t getRelocatedValue(const DataExtractor &Data, uint32_t Size,
- uint32_t *Off, const RelocAddrMap *Relocs,
- uint64_t *SecNdx = nullptr);
-
/// DWARFContext
/// This data structure is the top level entity that deals with dwarf debug
/// information parsing. The actual data is supplied through pure virtual
@@ -289,6 +283,11 @@ private:
DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address);
};
+/// Used as a return value for a error callback passed to DWARF context.
+/// Callback should return Halt if client application wants to stop
+/// object parsing, or should return Continue otherwise.
+enum class ErrorPolicy { Halt, Continue };
+
/// DWARFContextInMemory is the simplest possible implementation of a
/// DWARFContext. It assumes all content is available in memory and stores
/// pointers to it.
@@ -346,9 +345,14 @@ class DWARFContextInMemory : public DWARFContext {
Error maybeDecompress(const object::SectionRef &Sec, StringRef Name,
StringRef &Data);
+ /// Function used to handle default error reporting policy. Prints a error
+ /// message and returns Continue, so DWARF context ignores the error.
+ static ErrorPolicy defaultErrorHandler(Error E);
+
public:
- DWARFContextInMemory(const object::ObjectFile &Obj,
- const LoadedObjectInfo *L = nullptr);
+ DWARFContextInMemory(
+ const object::ObjectFile &Obj, const LoadedObjectInfo *L = nullptr,
+ function_ref<ErrorPolicy(Error)> HandleError = defaultErrorHandler);
DWARFContextInMemory(const StringMap<std::unique_ptr<MemoryBuffer>> &Sections,
uint8_t AddrSize,
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
new file mode 100644
index 000000000000..ef4360f66621
--- /dev/null
+++ b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
@@ -0,0 +1,48 @@
+//===- DWARFDataExtractor.h -------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDATAEXTRACTOR_H
+#define LLVM_DEBUGINFO_DWARFDATAEXTRACTOR_H
+
+#include "llvm/DebugInfo/DWARF/DWARFSection.h"
+#include "llvm/Support/DataExtractor.h"
+
+namespace llvm {
+
+/// A DataExtractor (typically for an in-memory copy of an object-file section)
+/// plus a relocation map for that section, if there is one.
+class DWARFDataExtractor : public DataExtractor {
+ const RelocAddrMap *RelocMap = nullptr;
+public:
+ /// Constructor for the normal case of extracting data from a DWARF section.
+ /// The DWARFSection's lifetime must be at least as long as the extractor's.
+ DWARFDataExtractor(const DWARFSection &Section, bool IsLittleEndian,
+ uint8_t AddressSize)
+ : DataExtractor(Section.Data, IsLittleEndian, AddressSize),
+ RelocMap(&Section.Relocs) {}
+
+ /// Constructor for cases when there are no relocations.
+ DWARFDataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
+ : DataExtractor(Data, IsLittleEndian, AddressSize) {}
+
+ /// Extracts a value and applies a relocation to the result if
+ /// one exists for the given offset.
+ uint64_t getRelocatedValue(uint32_t Size, uint32_t *Off,
+ uint64_t *SectionIndex = nullptr) const;
+
+ /// Extracts an address-sized value and applies a relocation to the result if
+ /// one exists for the given offset.
+ uint64_t getRelocatedAddress(uint32_t *Off, uint64_t *SecIx = nullptr) const {
+ return getRelocatedValue(getAddressSize(), Off, SecIx);
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_DWARFDATAEXTRACTOR_H
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
index 5c591b3de491..88c8f57bc33c 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
@@ -12,6 +12,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include <cstdint>
namespace llvm {
@@ -40,8 +41,7 @@ public:
/// High performance extraction should use this call.
bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr,
- const DataExtractor &DebugInfoData,
- uint32_t UEndOffset,
+ const DWARFDataExtractor &DebugInfoData, uint32_t UEndOffset,
uint32_t Depth);
uint32_t getOffset() const { return Offset; }
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 4d624812f186..0c8f98aa62f9 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -12,9 +12,9 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
-#include "llvm/Support/DataExtractor.h"
#include <cstdint>
#include <map>
#include <string>
@@ -26,9 +26,6 @@ class raw_ostream;
class DWARFDebugLine {
public:
- DWARFDebugLine(const RelocAddrMap *LineInfoRelocMap)
- : RelocMap(LineInfoRelocMap) {}
-
struct FileNameEntry {
FileNameEntry() = default;
@@ -98,7 +95,7 @@ public:
void clear();
void dump(raw_ostream &OS) const;
- bool parse(DataExtractor DebugLineData, uint32_t *OffsetPtr);
+ bool parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr);
};
/// Standard .debug_line state machine structure.
@@ -220,8 +217,7 @@ public:
void clear();
/// Parse prologue and all rows.
- bool parse(DataExtractor DebugLineData, const RelocAddrMap *RMap,
- uint32_t *OffsetPtr);
+ bool parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr);
using RowVector = std::vector<Row>;
using RowIter = RowVector::const_iterator;
@@ -238,7 +234,7 @@ public:
};
const LineTable *getLineTable(uint32_t Offset) const;
- const LineTable *getOrParseLineTable(DataExtractor DebugLineData,
+ const LineTable *getOrParseLineTable(const DWARFDataExtractor &DebugLineData,
uint32_t Offset);
private:
@@ -261,7 +257,6 @@ private:
using LineTableIter = LineTableMapTy::iterator;
using LineTableConstIter = LineTableMapTy::const_iterator;
- const RelocAddrMap *RelocMap;
LineTableMapTy LineTableMap;
};
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
index 821da8f9b536..c2b8d0cd73d8 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
@@ -11,8 +11,8 @@
#define LLVM_DEBUGINFO_DWARF_DWARFDEBUGLOC_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
-#include "llvm/Support/DataExtractor.h"
#include <cstdint>
namespace llvm {
@@ -45,18 +45,13 @@ class DWARFDebugLoc {
/// the locations in which the variable is stored.
LocationLists Locations;
- /// A map used to resolve binary relocations.
- const RelocAddrMap &RelocMap;
-
public:
- DWARFDebugLoc(const RelocAddrMap &LocRelocMap) : RelocMap(LocRelocMap) {}
-
/// Print the location lists found within the debug_loc section.
void dump(raw_ostream &OS) const;
/// Parse the debug_loc section accessible via the 'data' parameter using the
- /// specified address size to interpret the address ranges.
- void parse(DataExtractor data, unsigned AddressSize);
+ /// address size also given in 'data' to interpret the address ranges.
+ void parse(const DWARFDataExtractor &data);
};
class DWARFDebugLocDWO {
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
index 49beec92ecc6..bcba14b1630d 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
@@ -10,8 +10,8 @@
#ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H
#define LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
-#include "llvm/Support/DataExtractor.h"
#include <cassert>
#include <cstdint>
#include <vector>
@@ -79,7 +79,7 @@ public:
void clear();
void dump(raw_ostream &OS) const;
- bool extract(DataExtractor data, uint32_t *offset_ptr, const RelocAddrMap& Relocs);
+ bool extract(const DWARFDataExtractor &data, uint32_t *offset_ptr);
const std::vector<RangeListEntry> &getEntries() { return Entries; }
/// getAbsoluteRanges - Returns absolute address ranges defined by this range
diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 78fa6639db08..008dba9b42ac 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -14,7 +14,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/Support/DataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include <cstdint>
namespace llvm {
@@ -105,14 +105,13 @@ public:
/// Extracts a value in \p Data at offset \p *OffsetPtr.
///
- /// The passed DWARFUnit is allowed to be nullptr, in which
- /// case no relocation processing will be performed and some
+ /// The passed DWARFUnit is allowed to be nullptr, in which case some
/// kind of forms that depend on Unit information are disallowed.
- /// \param Data The DataExtractor to use.
- /// \param OffsetPtr The offset within DataExtractor where the data starts.
+ /// \param Data The DWARFDataExtractor to use.
+ /// \param OffsetPtr The offset within \p Data where the data starts.
/// \param U The optional DWARFUnit supplying information for some forms.
/// \returns whether the extraction succeeded.
- bool extractValue(const DataExtractor &Data, uint32_t *OffsetPtr,
+ bool extractValue(const DWARFDataExtractor &Data, uint32_t *OffsetPtr,
const DWARFUnit *U);
bool isInlinedCStr() const {
diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index 2041d40eb53a..4a5793ecb8fa 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -32,7 +32,7 @@ public:
DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, const DWARFSection *RS,
StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS,
- StringRef LS, bool LE, bool IsDWO,
+ const DWARFSection &LS, bool LE, bool IsDWO,
const DWARFUnitSectionBase &UnitSection,
const DWARFUnitIndex::Entry *Entry)
: DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, IsDWO,
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index d7ccaf82bc9a..ea36ab7ab5b6 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -58,7 +58,7 @@ protected:
virtual void parseImpl(DWARFContext &Context, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, const DWARFSection *RS,
StringRef SS, const DWARFSection &SOS,
- const DWARFSection *AOS, StringRef LS,
+ const DWARFSection *AOS, const DWARFSection &LS,
bool isLittleEndian, bool isDWO) = 0;
};
@@ -91,7 +91,7 @@ private:
void parseImpl(DWARFContext &Context, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, const DWARFSection *RS,
StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS,
- StringRef LS, bool LE, bool IsDWO) override {
+ const DWARFSection &LS, bool LE, bool IsDWO) override {
if (Parsed)
return;
const auto &Index = getDWARFUnitIndex(Context, UnitType::Section);
@@ -118,7 +118,7 @@ class DWARFUnit {
const DWARFDebugAbbrev *Abbrev;
const DWARFSection *RangeSection;
uint32_t RangeSectionBase;
- StringRef LineSection;
+ const DWARFSection &LineSection;
StringRef StringSection;
const DWARFSection &StringOffsetSection;
uint64_t StringOffsetSectionBase = 0;
@@ -166,15 +166,16 @@ protected:
public:
DWARFUnit(DWARFContext &Context, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, const DWARFSection *RS, StringRef SS,
- const DWARFSection &SOS, const DWARFSection *AOS, StringRef LS,
- bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection,
+ const DWARFSection &SOS, const DWARFSection *AOS,
+ const DWARFSection &LS, bool LE, bool IsDWO,
+ const DWARFUnitSectionBase &UnitSection,
const DWARFUnitIndex::Entry *IndexEntry = nullptr);
virtual ~DWARFUnit();
DWARFContext& getContext() const { return Context; }
- StringRef getLineSection() const { return LineSection; }
+ const DWARFSection &getLineSection() const { return LineSection; }
StringRef getStringSection() const { return StringSection; }
const DWARFSection &getStringOffsetSection() const {
return StringOffsetSection;
@@ -194,13 +195,11 @@ public:
}
bool getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const;
- // FIXME: Result should be uint64_t in DWARF64.
bool getStringOffsetSectionItem(uint32_t Index, uint64_t &Result) const;
- uint64_t getStringOffsetSectionRelocation(uint32_t Index) const;
- DataExtractor getDebugInfoExtractor() const {
- return DataExtractor(InfoSection.Data, isLittleEndian,
- getAddressByteSize());
+ DWARFDataExtractor getDebugInfoExtractor() const {
+ return DWARFDataExtractor(InfoSection, isLittleEndian,
+ getAddressByteSize());
}
DataExtractor getStringExtractor() const {
diff --git a/include/llvm/DebugInfo/PDB/IPDBDataStream.h b/include/llvm/DebugInfo/PDB/IPDBDataStream.h
index 9594dc1591a7..67b5a06d7c59 100644
--- a/include/llvm/DebugInfo/PDB/IPDBDataStream.h
+++ b/include/llvm/DebugInfo/PDB/IPDBDataStream.h
@@ -1,4 +1,4 @@
-//===- IPDBDataStream.h - base interface for child enumerator -*- C++ ---*-===//
+//===- IPDBDataStream.h - base interface for child enumerator ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,9 +10,10 @@
#ifndef LLVM_DEBUGINFO_PDB_IPDBDATASTREAM_H
#define LLVM_DEBUGINFO_PDB_IPDBDATASTREAM_H
-#include "PDBTypes.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
+#include <cstdint>
+#include <string>
namespace llvm {
namespace pdb {
@@ -22,18 +23,19 @@ namespace pdb {
/// stream type.
class IPDBDataStream {
public:
- typedef llvm::SmallVector<uint8_t, 32> RecordType;
+ using RecordType = SmallVector<uint8_t, 32>;
virtual ~IPDBDataStream();
virtual uint32_t getRecordCount() const = 0;
virtual std::string getName() const = 0;
- virtual llvm::Optional<RecordType> getItemAtIndex(uint32_t Index) const = 0;
+ virtual Optional<RecordType> getItemAtIndex(uint32_t Index) const = 0;
virtual bool getNext(RecordType &Record) = 0;
virtual void reset() = 0;
virtual IPDBDataStream *clone() const = 0;
};
-}
-}
-#endif
+} // end namespace pdb
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_IPDBDATASTREAM_H
diff --git a/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h b/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
index e48dc250822e..b6b7d95f6282 100644
--- a/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
+++ b/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
@@ -18,8 +18,8 @@ namespace pdb {
template <typename ChildType> class IPDBEnumChildren {
public:
- typedef std::unique_ptr<ChildType> ChildTypePtr;
- typedef IPDBEnumChildren<ChildType> MyType;
+ using ChildTypePtr = std::unique_ptr<ChildType>;
+ using MyType = IPDBEnumChildren<ChildType>;
virtual ~IPDBEnumChildren() = default;
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
index 2885081628f6..5f6e7ab92a96 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
@@ -7,22 +7,23 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H
-#define LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_DBIMODULELIST_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_DBIMODULELIST_H
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
-#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include <cstddef>
#include <cstdint>
+#include <iterator>
#include <vector>
namespace llvm {
-namespace codeview {}
namespace pdb {
class DbiModuleList;
@@ -31,9 +32,9 @@ struct FileInfoSubstreamHeader;
class DbiModuleSourceFilesIterator
: public iterator_facade_base<DbiModuleSourceFilesIterator,
std::random_access_iterator_tag, StringRef> {
- typedef iterator_facade_base<DbiModuleSourceFilesIterator,
- std::random_access_iterator_tag, StringRef>
- BaseType;
+ using BaseType =
+ iterator_facade_base<DbiModuleSourceFilesIterator,
+ std::random_access_iterator_tag, StringRef>;
public:
DbiModuleSourceFilesIterator(const DbiModuleList &Modules, uint32_t Modi,
@@ -110,7 +111,8 @@ private:
BinaryStreamRef FileInfoSubstream;
BinaryStreamRef NamesBuffer;
};
-}
-}
-#endif // LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H \ No newline at end of file
+} // end namespace pdb
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_DBIMODULELIST_H
diff --git a/include/llvm/DebugInfo/PDB/Native/Hash.h b/include/llvm/DebugInfo/PDB/Native/Hash.h
index 0340554d7b0b..1f11d43ecdd4 100644
--- a/include/llvm/DebugInfo/PDB/Native/Hash.h
+++ b/include/llvm/DebugInfo/PDB/Native/Hash.h
@@ -7,19 +7,21 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_PDB_RAW_HASH_H
-#define LLVM_DEBUGINFO_PDB_RAW_HASH_H
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASH_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_HASH_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
-#include <stdint.h>
+#include <cstdint>
namespace llvm {
namespace pdb {
+
uint32_t hashStringV1(StringRef Str);
uint32_t hashStringV2(StringRef Str);
uint32_t hashBufferV8(ArrayRef<uint8_t> Data);
-}
-}
-#endif
+} // end namespace pdb
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASH_H
diff --git a/include/llvm/DebugInfo/PDB/Native/HashTable.h b/include/llvm/DebugInfo/PDB/Native/HashTable.h
index 46eefa968e52..05c70c4f2175 100644
--- a/include/llvm/DebugInfo/PDB/Native/HashTable.h
+++ b/include/llvm/DebugInfo/PDB/Native/HashTable.h
@@ -7,36 +7,36 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_PDB_RAW_HASHTABLE_H
-#define LLVM_DEBUGINFO_PDB_RAW_HASHTABLE_H
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
-#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/MathExtras.h"
-
#include <cstdint>
+#include <iterator>
#include <utility>
+#include <vector>
namespace llvm {
+
+class BinaryStreamReader;
+class BinaryStreamWriter;
+
namespace pdb {
class HashTableIterator;
class HashTable {
friend class HashTableIterator;
+
struct Header {
support::ulittle32_t Size;
support::ulittle32_t Capacity;
};
- typedef std::vector<std::pair<uint32_t, uint32_t>> BucketList;
+ using BucketList = std::vector<std::pair<uint32_t, uint32_t>>;
public:
HashTable();
@@ -63,6 +63,7 @@ public:
protected:
bool isPresent(uint32_t K) const { return Present.test(K); }
bool isDeleted(uint32_t K) const { return Deleted.test(K); }
+
BucketList Buckets;
mutable SparseBitVector<> Present;
mutable SparseBitVector<> Deleted;
@@ -81,6 +82,7 @@ class HashTableIterator
: public iterator_facade_base<HashTableIterator, std::forward_iterator_tag,
std::pair<uint32_t, uint32_t>> {
friend class HashTable;
+
HashTableIterator(const HashTable &Map, uint32_t Index, bool IsEnd);
public:
@@ -101,6 +103,7 @@ private:
};
} // end namespace pdb
+
} // end namespace llvm
-#endif // LLVM_DEBUGINFO_PDB_RAW_HASHTABLE_H
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
diff --git a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
index 5565cd5582bc..f413fd1b336e 100644
--- a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
@@ -1,4 +1,4 @@
-//===- ModuleDebugStream.h - PDB Module Info Stream Access ----------------===//
+//===- ModuleDebugStream.h - PDB Module Info Stream Access ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,26 +7,26 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_PDB_RAW_MODULEDEBUGSTREAM_H
-#define LLVM_DEBUGINFO_PDB_RAW_MODULEDEBUGSTREAM_H
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_MODULEDEBUGSTREAM_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_MODULEDEBUGSTREAM_H
#include "llvm/ADT/iterator_range.h"
-#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <memory>
namespace llvm {
namespace pdb {
-class PDBFile;
+
class DbiModuleDescriptor;
class ModuleDebugStreamRef {
- typedef codeview::DebugSubsectionArray::Iterator DebugSubsectionIterator;
+ using DebugSubsectionIterator = codeview::DebugSubsectionArray::Iterator;
public:
ModuleDebugStreamRef(const DbiModuleDescriptor &Module,
@@ -50,7 +50,7 @@ public:
ModuleDebugStreamRef &operator=(ModuleDebugStreamRef &&Other) = default;
- llvm::iterator_range<DebugSubsectionIterator> subsections() const;
+ iterator_range<DebugSubsectionIterator> subsections() const;
bool hasDebugSubsections() const;
@@ -75,7 +75,8 @@ private:
codeview::DebugSubsectionArray Subsections;
};
-}
-}
-#endif
+} // end namespace pdb
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_MODULEDEBUGSTREAM_H
diff --git a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.h
+++ /dev/null
diff --git a/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
index d4206503e7dc..25f66240a6a2 100644
--- a/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
+++ b/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
@@ -7,27 +7,31 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBNAMEDSTREAMMAP_H
-#define LLVM_DEBUGINFO_PDB_RAW_PDBNAMEDSTREAMMAP_H
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
#include "llvm/Support/Error.h"
#include <cstdint>
namespace llvm {
+
class BinaryStreamReader;
class BinaryStreamWriter;
namespace pdb {
-class NamedStreamMapBuilder;
+
class NamedStreamMap {
+ friend class NamedStreamMapBuilder;
+
struct FinalizationInfo {
uint32_t StringDataBytes = 0;
uint32_t SerializedLength = 0;
};
- friend NamedStreamMapBuilder;
public:
NamedStreamMap();
@@ -50,6 +54,7 @@ private:
};
} // end namespace pdb
+
} // end namespace llvm
-#endif // LLVM_DEBUGINFO_PDB_RAW_PDBNAMEDSTREAMMAP_H
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
index 5e4aaafff1a9..a24a972879d2 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
@@ -1,4 +1,4 @@
-//===- NativeRawSymbol.h - Native implementation of IPDBRawSymbol - C++ -*-===//
+//==- NativeRawSymbol.h - Native implementation of IPDBRawSymbol -*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,6 +11,8 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVERAWSYMBOL_H
#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
+#include <cstdint>
+#include <memory>
namespace llvm {
namespace pdb {
@@ -36,7 +38,7 @@ public:
std::unique_ptr<IPDBEnumSymbols>
findInlineFramesByRVA(uint32_t RVA) const override;
- void getDataBytes(llvm::SmallVector<uint8_t, 32> &Bytes) const override;
+ void getDataBytes(SmallVector<uint8_t, 32> &Bytes) const override;
void getFrontEndVersion(VersionInfo &Version) const override;
void getBackEndVersion(VersionInfo &Version) const override;
PDB_MemberAccess getAccess() const override;
@@ -206,7 +208,7 @@ protected:
uint32_t SymbolId;
};
-}
-}
+} // end namespace pdb
+} // end namespace llvm
-#endif
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVERAWSYMBOL_H
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/include/llvm/DebugInfo/PDB/Native/NativeSession.h
index bbe207738e02..dd40874dc5f2 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeSession.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeSession.h
@@ -7,11 +7,13 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_PDB_RAW_RAWSESSION_H
-#define LLVM_DEBUGINFO_PDB_RAW_RAWSESSION_H
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVESESSION_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVESESSION_H
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Error.h"
@@ -30,6 +32,9 @@ public:
static Error createFromExe(StringRef Path,
std::unique_ptr<IPDBSession> &Session);
+ std::unique_ptr<PDBSymbolCompiland>
+ createCompilandSymbol(DbiModuleDescriptor MI);
+
uint64_t getLoadAddress() const override;
void setLoadAddress(uint64_t Address) override;
std::unique_ptr<PDBSymbolExe> getGlobalScope() override;
@@ -71,6 +76,7 @@ public:
private:
std::unique_ptr<PDBFile> Pdb;
std::unique_ptr<BumpPtrAllocator> Allocator;
+ std::vector<std::unique_ptr<NativeRawSymbol>> SymbolCache;
};
}
}
diff --git a/include/llvm/DebugInfo/PDB/PDB.h b/include/llvm/DebugInfo/PDB/PDB.h
index 1f5a066b9a1b..9f9da39ca6cc 100644
--- a/include/llvm/DebugInfo/PDB/PDB.h
+++ b/include/llvm/DebugInfo/PDB/PDB.h
@@ -10,21 +10,23 @@
#ifndef LLVM_DEBUGINFO_PDB_PDB_H
#define LLVM_DEBUGINFO_PDB_PDB_H
-#include "PDBTypes.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include "llvm/Support/Error.h"
#include <memory>
-#include <system_error>
namespace llvm {
-class StringRef;
-
namespace pdb {
+class IPDBSession;
+
Error loadDataForPDB(PDB_ReaderType Type, StringRef Path,
std::unique_ptr<IPDBSession> &Session);
Error loadDataForEXE(PDB_ReaderType Type, StringRef Path,
std::unique_ptr<IPDBSession> &Session);
-}
-}
-#endif
+
+} // end namespace pdb
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_PDB_H
diff --git a/include/llvm/DebugInfo/PDB/PDBExtras.h b/include/llvm/DebugInfo/PDB/PDBExtras.h
index fc5787556a6d..3a38f21b94c8 100644
--- a/include/llvm/DebugInfo/PDB/PDBExtras.h
+++ b/include/llvm/DebugInfo/PDB/PDBExtras.h
@@ -1,4 +1,4 @@
-//===- PDBExtras.h - helper functions and classes for PDBs -------*- C++-*-===//
+//===- PDBExtras.h - helper functions and classes for PDBs ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,15 +10,17 @@
#ifndef LLVM_DEBUGINFO_PDB_PDBEXTRAS_H
#define LLVM_DEBUGINFO_PDB_PDBEXTRAS_H
-#include "PDBTypes.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include <unordered_map>
namespace llvm {
+class raw_ostream;
+
namespace pdb {
-typedef std::unordered_map<PDB_SymType, int> TagStats;
+
+using TagStats = std::unordered_map<PDB_SymType, int>;
raw_ostream &operator<<(raw_ostream &OS, const PDB_VariantType &Value);
raw_ostream &operator<<(raw_ostream &OS, const PDB_CallingConv &Conv);
@@ -37,7 +39,9 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_Machine &Machine);
raw_ostream &operator<<(raw_ostream &OS, const Variant &Value);
raw_ostream &operator<<(raw_ostream &OS, const VersionInfo &Version);
raw_ostream &operator<<(raw_ostream &OS, const TagStats &Stats);
-}
-}
-#endif
+} // end namespace pdb
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_PDBEXTRAS_H
diff --git a/include/llvm/DebugInfo/PDB/PDBTypes.h b/include/llvm/DebugInfo/PDB/PDBTypes.h
index dd2fc4f2c55f..79ec7ce906d5 100644
--- a/include/llvm/DebugInfo/PDB/PDBTypes.h
+++ b/include/llvm/DebugInfo/PDB/PDBTypes.h
@@ -1,4 +1,4 @@
-//===- PDBTypes.h - Defines enums for various fields contained in PDB ---*-===//
+//===- PDBTypes.h - Defines enums for various fields contained in PDB ----====//
//
// The LLVM Compiler Infrastructure
//
@@ -10,9 +10,10 @@
#ifndef LLVM_DEBUGINFO_PDB_PDBTYPES_H
#define LLVM_DEBUGINFO_PDB_PDBTYPES_H
-#include "llvm/Config/llvm-config.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include <cstddef>
#include <cstdint>
#include <cstring>
#include <functional>
@@ -20,21 +21,11 @@
namespace llvm {
namespace pdb {
-class PDBSymDumper;
-class PDBSymbol;
-
class IPDBDataStream;
-template <class T> class IPDBEnumChildren;
class IPDBLineNumber;
-class IPDBRawSymbol;
-class IPDBSession;
class IPDBSourceFile;
-
-typedef IPDBEnumChildren<PDBSymbol> IPDBEnumSymbols;
-typedef IPDBEnumChildren<IPDBSourceFile> IPDBEnumSourceFiles;
-typedef IPDBEnumChildren<IPDBDataStream> IPDBEnumDataStreams;
-typedef IPDBEnumChildren<IPDBLineNumber> IPDBEnumLineNumbers;
-
+class PDBSymDumper;
+class PDBSymbol;
class PDBSymbolExe;
class PDBSymbolCompiland;
class PDBSymbolCompilandDetails;
@@ -67,6 +58,11 @@ class PDBSymbolTypeManaged;
class PDBSymbolTypeDimension;
class PDBSymbolUnknown;
+using IPDBEnumSymbols = IPDBEnumChildren<PDBSymbol>;
+using IPDBEnumSourceFiles = IPDBEnumChildren<IPDBSourceFile>;
+using IPDBEnumDataStreams = IPDBEnumChildren<IPDBDataStream>;
+using IPDBEnumLineNumbers = IPDBEnumChildren<IPDBLineNumber>;
+
/// Specifies which PDB reader implementation is to be used. Only a value
/// of PDB_ReaderType::DIA is currently supported, but Native is in the works.
enum class PDB_ReaderType {
@@ -104,7 +100,7 @@ enum class PDB_Checksum { None = 0, MD5 = 1, SHA1 = 2 };
/// These values correspond to the CV_CPU_TYPE_e enumeration, and are documented
/// here: https://msdn.microsoft.com/en-us/library/b2fc64ek.aspx
-typedef codeview::CPUType PDB_Cpu;
+using PDB_Cpu = codeview::CPUType;
enum class PDB_Machine {
Invalid = 0xffff,
@@ -135,12 +131,11 @@ enum class PDB_Machine {
/// at the following locations:
/// https://msdn.microsoft.com/en-us/library/b2fc64ek.aspx
/// https://msdn.microsoft.com/en-us/library/windows/desktop/ms680207(v=vs.85).aspx
-///
-typedef codeview::CallingConvention PDB_CallingConv;
+using PDB_CallingConv = codeview::CallingConvention;
/// These values correspond to the CV_CFL_LANG enumeration, and are documented
/// here: https://msdn.microsoft.com/en-us/library/bw3aekw6.aspx
-typedef codeview::SourceLanguage PDB_Lang;
+using PDB_Lang = codeview::SourceLanguage;
/// These values correspond to the DataKind enumeration, and are documented
/// here: https://msdn.microsoft.com/en-us/library/b2x2t313.aspx
@@ -273,9 +268,9 @@ enum PDB_VariantType {
};
struct Variant {
- Variant() : Type(PDB_VariantType::Empty) {}
+ Variant() = default;
- Variant(const Variant &Other) : Type(PDB_VariantType::Empty) {
+ Variant(const Variant &Other) {
*this = Other;
}
@@ -284,7 +279,7 @@ struct Variant {
delete[] Value.String;
}
- PDB_VariantType Type;
+ PDB_VariantType Type = PDB_VariantType::Empty;
union {
bool Bool;
int8_t Int8;
@@ -344,18 +339,20 @@ struct Variant {
}
};
+} // end namespace pdb
} // end namespace llvm
-}
namespace std {
+
template <> struct hash<llvm::pdb::PDB_SymType> {
- typedef llvm::pdb::PDB_SymType argument_type;
- typedef std::size_t result_type;
+ using argument_type = llvm::pdb::PDB_SymType;
+ using result_type = std::size_t;
result_type operator()(const argument_type &Arg) const {
return std::hash<int>()(static_cast<int>(Arg));
}
};
+
} // end namespace std
#endif // LLVM_DEBUGINFO_PDB_PDBTYPES_H
diff --git a/include/llvm/DebugInfo/PDB/UDTLayout.h b/include/llvm/DebugInfo/PDB/UDTLayout.h
index 6bc3660fbe51..c4234c191e21 100644
--- a/include/llvm/DebugInfo/PDB/UDTLayout.h
+++ b/include/llvm/DebugInfo/PDB/UDTLayout.h
@@ -10,30 +10,26 @@
#ifndef LLVM_DEBUGINFO_PDB_UDTLAYOUT_H
#define LLVM_DEBUGINFO_PDB_UDTLAYOUT_H
-#include "PDBSymbol.h"
-#include "PDBTypes.h"
-
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-
-#include <list>
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
+#include <cstdint>
#include <memory>
+#include <string>
+#include <vector>
namespace llvm {
-
-class raw_ostream;
-
namespace pdb {
-class PDBSymTypeBaseClass;
-class PDBSymbolData;
-class PDBSymbolTypeUDT;
-class PDBSymbolTypeVTable;
-
-class ClassLayout;
class BaseClassLayout;
-class LayoutItemBase;
+class ClassLayout;
class UDTLayoutBase;
class LayoutItemBase {
@@ -41,7 +37,7 @@ public:
LayoutItemBase(const UDTLayoutBase *Parent, const PDBSymbol *Symbol,
const std::string &Name, uint32_t OffsetInParent,
uint32_t Size, bool IsElided);
- virtual ~LayoutItemBase() {}
+ virtual ~LayoutItemBase() = default;
uint32_t deepPaddingSize() const;
virtual uint32_t immediatePadding() const { return 0; }
@@ -79,7 +75,8 @@ public:
VBPtrLayoutItem(const UDTLayoutBase &Parent,
std::unique_ptr<PDBSymbolTypeBuiltin> Sym, uint32_t Offset,
uint32_t Size);
- virtual bool isVBPtr() const { return true; }
+
+ bool isVBPtr() const override { return true; }
private:
std::unique_ptr<PDBSymbolTypeBuiltin> Type;
@@ -120,17 +117,12 @@ public:
bool IsElided);
uint32_t tailPadding() const override;
-
ArrayRef<LayoutItemBase *> layout_items() const { return LayoutItems; }
-
ArrayRef<BaseClassLayout *> bases() const { return AllBases; }
ArrayRef<BaseClassLayout *> regular_bases() const { return NonVirtualBases; }
ArrayRef<BaseClassLayout *> virtual_bases() const { return VirtualBases; }
-
uint32_t directVirtualBaseCount() const { return DirectVBaseCount; }
-
ArrayRef<std::unique_ptr<PDBSymbolFunc>> funcs() const { return Funcs; }
-
ArrayRef<std::unique_ptr<PDBSymbol>> other_items() const { return Other; }
protected:
@@ -183,7 +175,8 @@ private:
std::unique_ptr<PDBSymbolTypeUDT> OwnedStorage;
const PDBSymbolTypeUDT &UDT;
};
-}
-} // namespace llvm
+
+} // end namespace pdb
+} // end namespace llvm
#endif // LLVM_DEBUGINFO_PDB_UDTLAYOUT_H
diff --git a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
index 8e9be6b6f4fe..cf6556a33bbd 100644
--- a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
@@ -15,9 +15,11 @@
#define LLVM_EXECUTIONENGINE_ORC_IRTRANSFORMLAYER_H
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include <memory>
#include <string>
namespace llvm {
+class Module;
namespace orc {
/// @brief IR mutating layer.
diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h
index 3efcc637b6ed..497dca44547c 100644
--- a/include/llvm/IR/Argument.h
+++ b/include/llvm/IR/Argument.h
@@ -120,7 +120,7 @@ public:
bool hasAttribute(Attribute::AttrKind Kind) const;
/// Method for support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() == ArgumentVal;
}
};
diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h
index 7a35afcbafc3..6714f2c97473 100644
--- a/include/llvm/IR/BasicBlock.h
+++ b/include/llvm/IR/BasicBlock.h
@@ -326,7 +326,7 @@ public:
ValueSymbolTable *getValueSymbolTable();
/// \brief Methods for support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() == Value::BasicBlockVal;
}
diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h
index 82afd9a2691f..9daeac6ad6e7 100644
--- a/include/llvm/IR/Constant.h
+++ b/include/llvm/IR/Constant.h
@@ -116,7 +116,7 @@ public:
void destroyConstant();
//// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() >= ConstantFirstVal &&
V->getValueID() <= ConstantLastVal;
}
diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h
index 003a6d5d075d..8b3a90fa065b 100644
--- a/include/llvm/IR/Constants.h
+++ b/include/llvm/IR/Constants.h
@@ -842,7 +842,7 @@ public:
BasicBlock *getBasicBlock() const { return (BasicBlock*)Op<1>().get(); }
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() == BlockAddressVal;
}
};
@@ -1217,7 +1217,7 @@ public:
Instruction *getAsInstruction();
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() == ConstantExprVal;
}
diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h
index a92321a44511..6e5e085873ab 100644
--- a/include/llvm/IR/DerivedTypes.h
+++ b/include/llvm/IR/DerivedTypes.h
@@ -89,7 +89,7 @@ public:
bool isPowerOf2ByteWidth() const;
/// Methods for support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const Type *T) {
+ static bool classof(const Type *T) {
return T->getTypeID() == IntegerTyID;
}
};
@@ -139,7 +139,7 @@ public:
unsigned getNumParams() const { return NumContainedTys - 1; }
/// Methods for support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const Type *T) {
+ static bool classof(const Type *T) {
return T->getTypeID() == FunctionTyID;
}
};
@@ -171,7 +171,7 @@ public:
bool indexValid(unsigned Idx) const;
/// Methods for support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const Type *T) {
+ static bool classof(const Type *T) {
return T->getTypeID() == ArrayTyID ||
T->getTypeID() == StructTyID ||
T->getTypeID() == VectorTyID;
@@ -317,7 +317,7 @@ public:
}
/// Methods for support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const Type *T) {
+ static bool classof(const Type *T) {
return T->getTypeID() == StructTyID;
}
};
@@ -360,7 +360,7 @@ public:
Type *getElementType() const { return ContainedType; }
/// Methods for support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const Type *T) {
+ static bool classof(const Type *T) {
return T->getTypeID() == ArrayTyID || T->getTypeID() == VectorTyID;
}
};
@@ -380,7 +380,7 @@ public:
static bool isValidElementType(Type *ElemTy);
/// Methods for support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const Type *T) {
+ static bool classof(const Type *T) {
return T->getTypeID() == ArrayTyID;
}
};
@@ -454,7 +454,7 @@ public:
}
/// Methods for support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const Type *T) {
+ static bool classof(const Type *T) {
return T->getTypeID() == VectorTyID;
}
};
@@ -495,7 +495,7 @@ public:
inline unsigned getAddressSpace() const { return getSubclassData(); }
/// Implement support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const Type *T) {
+ static bool classof(const Type *T) {
return T->getTypeID() == PointerTyID;
}
};
diff --git a/include/llvm/IR/Dominators.h b/include/llvm/IR/Dominators.h
index 9be6acc33591..e10d14c19793 100644
--- a/include/llvm/IR/Dominators.h
+++ b/include/llvm/IR/Dominators.h
@@ -36,12 +36,22 @@ class raw_ostream;
extern template class DomTreeNodeBase<BasicBlock>;
extern template class DominatorTreeBase<BasicBlock>;
+namespace DomTreeBuilder {
extern template void Calculate<Function, BasicBlock *>(
DominatorTreeBaseByGraphTraits<GraphTraits<BasicBlock *>> &DT, Function &F);
+
extern template void Calculate<Function, Inverse<BasicBlock *>>(
DominatorTreeBaseByGraphTraits<GraphTraits<Inverse<BasicBlock *>>> &DT,
Function &F);
+extern template bool Verify<BasicBlock *>(
+ const DominatorTreeBaseByGraphTraits<GraphTraits<BasicBlock *>> &DT);
+
+extern template bool Verify<Inverse<BasicBlock *>>(
+ const DominatorTreeBaseByGraphTraits<GraphTraits<Inverse<BasicBlock *>>>
+ &DT);
+} // namespace DomTreeBuilder
+
using DomTreeNode = DomTreeNodeBase<BasicBlock>;
class BasicBlockEdge {
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index 3496806d9362..75fccc135dae 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -671,7 +671,7 @@ public:
void viewCFGOnly() const;
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() == Value::FunctionVal;
}
diff --git a/include/llvm/IR/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h
index d4bf0d7e1ed4..450583baaa3c 100644
--- a/include/llvm/IR/GlobalAlias.h
+++ b/include/llvm/IR/GlobalAlias.h
@@ -88,7 +88,7 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() == Value::GlobalAliasVal;
}
};
diff --git a/include/llvm/IR/GlobalIFunc.h b/include/llvm/IR/GlobalIFunc.h
index d90c7c78ed26..ef51315a6f5d 100644
--- a/include/llvm/IR/GlobalIFunc.h
+++ b/include/llvm/IR/GlobalIFunc.h
@@ -70,7 +70,7 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() == Value::GlobalIFuncVal;
}
};
diff --git a/include/llvm/IR/GlobalIndirectSymbol.h b/include/llvm/IR/GlobalIndirectSymbol.h
index 212703af7101..22c00686c549 100644
--- a/include/llvm/IR/GlobalIndirectSymbol.h
+++ b/include/llvm/IR/GlobalIndirectSymbol.h
@@ -75,7 +75,7 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() == Value::GlobalAliasVal ||
V->getValueID() == Value::GlobalIFuncVal;
}
diff --git a/include/llvm/IR/GlobalObject.h b/include/llvm/IR/GlobalObject.h
index fc38f698027b..278b193567f1 100644
--- a/include/llvm/IR/GlobalObject.h
+++ b/include/llvm/IR/GlobalObject.h
@@ -155,7 +155,7 @@ protected:
public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() == Value::FunctionVal ||
V->getValueID() == Value::GlobalVariableVal;
}
diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h
index 8255a4f298c0..34ace6f2b4f4 100644
--- a/include/llvm/IR/GlobalVariable.h
+++ b/include/llvm/IR/GlobalVariable.h
@@ -241,7 +241,7 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() == Value::GlobalVariableVal;
}
};
diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h
index 7f03fcd19b65..59874b05b0ce 100644
--- a/include/llvm/IR/InlineAsm.h
+++ b/include/llvm/IR/InlineAsm.h
@@ -183,7 +183,7 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() == Value::InlineAsmVal;
}
diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h
index 76524b412456..d749077fd34a 100644
--- a/include/llvm/IR/InstrTypes.h
+++ b/include/llvm/IR/InstrTypes.h
@@ -73,10 +73,10 @@ public:
void setSuccessor(unsigned idx, BasicBlock *B);
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->isTerminator();
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -298,14 +298,14 @@ public:
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Alloca ||
I->getOpcode() == Instruction::Load ||
I->getOpcode() == Instruction::VAArg ||
I->getOpcode() == Instruction::ExtractValue ||
(I->getOpcode() >= CastOpsBegin && I->getOpcode() < CastOpsEnd);
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -532,10 +532,10 @@ public:
bool swapOperands();
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->isBinaryOp();
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -833,10 +833,10 @@ public:
static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy);
/// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->isCast();
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -1062,11 +1062,11 @@ public:
static bool isImpliedFalseByMatchingCmp(Predicate Pred1, Predicate Pred2);
/// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::ICmp ||
I->getOpcode() == Instruction::FCmp;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -1152,8 +1152,8 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) { return I->isFuncletPad(); }
- static inline bool classof(const Value *V) {
+ static bool classof(const Instruction *I) { return I->isFuncletPad(); }
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h
index d8db29e15886..8dc02111b866 100644
--- a/include/llvm/IR/Instruction.h
+++ b/include/llvm/IR/Instruction.h
@@ -556,7 +556,7 @@ public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return V->getValueID() >= Value::InstructionVal;
}
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index e1620b3e7df5..dc5f37450b48 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -145,10 +145,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return (I->getOpcode() == Instruction::Alloca);
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -284,10 +284,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Load;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -408,10 +408,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Store;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -483,10 +483,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Fence;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -639,10 +639,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::AtomicCmpXchg;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -788,10 +788,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::AtomicRMW;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -1048,10 +1048,10 @@ public:
bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const;
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return (I->getOpcode() == Instruction::GetElementPtr);
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -1226,10 +1226,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::ICmp;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -1334,10 +1334,10 @@ public:
}
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::FCmp;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -1873,10 +1873,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Call;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -2011,10 +2011,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Select;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -2057,10 +2057,10 @@ public:
static unsigned getPointerOperandIndex() { return 0U; }
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == VAArg;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -2114,10 +2114,10 @@ public:
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::ExtractElement;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -2177,10 +2177,10 @@ public:
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::InsertElement;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -2276,10 +2276,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::ShuffleVector;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -2376,10 +2376,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::ExtractValue;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -2507,10 +2507,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::InsertValue;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -2747,10 +2747,10 @@ public:
bool hasConstantOrUndefValue() const;
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::PHI;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -2856,10 +2856,10 @@ public:
void reserveClauses(unsigned Size) { growOperands(Size); }
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::LandingPad;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -2930,10 +2930,10 @@ public:
unsigned getNumSuccessors() const { return 0; }
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return (I->getOpcode() == Instruction::Ret);
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -3046,10 +3046,10 @@ public:
void swapSuccessors();
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return (I->getOpcode() == Instruction::Br);
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -3403,10 +3403,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Switch;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -3501,10 +3501,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::IndirectBr;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -3977,10 +3977,10 @@ public:
unsigned getNumSuccessors() const { return 2; }
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return (I->getOpcode() == Instruction::Invoke);
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -4072,10 +4072,10 @@ public:
unsigned getNumSuccessors() const { return 0; }
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Resume;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -4260,10 +4260,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::CatchSwitch;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -4306,10 +4306,10 @@ public:
}
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::CleanupPad;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -4356,10 +4356,10 @@ public:
}
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::CatchPad;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -4420,10 +4420,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return (I->getOpcode() == Instruction::CatchRet);
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -4516,10 +4516,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return (I->getOpcode() == Instruction::CleanupRet);
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -4577,10 +4577,10 @@ public:
unsigned getNumSuccessors() const { return 0; }
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Unreachable;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
@@ -4627,10 +4627,10 @@ public:
);
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Trunc;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -4666,10 +4666,10 @@ public:
);
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == ZExt;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -4705,10 +4705,10 @@ public:
);
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == SExt;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -4744,10 +4744,10 @@ public:
);
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == FPTrunc;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -4783,10 +4783,10 @@ public:
);
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == FPExt;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -4822,10 +4822,10 @@ public:
);
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == UIToFP;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -4861,10 +4861,10 @@ public:
);
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == SIToFP;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -4900,10 +4900,10 @@ public:
);
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == FPToUI;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -4939,10 +4939,10 @@ public:
);
/// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == FPToSI;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -4982,10 +4982,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == IntToPtr;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -5033,10 +5033,10 @@ public:
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == PtrToInt;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -5072,10 +5072,10 @@ public:
);
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == BitCast;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
};
@@ -5112,10 +5112,10 @@ public:
);
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == AddrSpaceCast;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h
index e0dd3ca7d01e..944af57a7800 100644
--- a/include/llvm/IR/IntrinsicInst.h
+++ b/include/llvm/IR/IntrinsicInst.h
@@ -53,12 +53,12 @@ namespace llvm {
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const CallInst *I) {
+ static bool classof(const CallInst *I) {
if (const Function *CF = I->getCalledFunction())
return CF->isIntrinsic();
return false;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<CallInst>(V) && classof(cast<CallInst>(V));
}
};
@@ -72,7 +72,7 @@ namespace llvm {
Value *getVariableLocation(bool AllowNullOp = true) const;
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
@@ -80,7 +80,7 @@ namespace llvm {
default: return false;
}
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -107,10 +107,10 @@ namespace llvm {
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::dbg_declare;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -144,10 +144,10 @@ namespace llvm {
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::dbg_value;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -176,7 +176,7 @@ namespace llvm {
ExceptionBehavior getExceptionBehavior() const;
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::experimental_constrained_fadd:
case Intrinsic::experimental_constrained_fsub:
@@ -199,7 +199,7 @@ namespace llvm {
default: return false;
}
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -288,10 +288,10 @@ namespace llvm {
setArgOperand(ARG_ELEMENTSIZE, V);
}
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::memcpy_element_unordered_atomic;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -358,7 +358,7 @@ namespace llvm {
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::memcpy:
case Intrinsic::memmove:
@@ -367,7 +367,7 @@ namespace llvm {
default: return false;
}
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -387,10 +387,10 @@ namespace llvm {
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::memset;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -419,11 +419,11 @@ namespace llvm {
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::memcpy ||
I->getIntrinsicID() == Intrinsic::memmove;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -432,10 +432,10 @@ namespace llvm {
class MemCpyInst : public MemTransferInst {
public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::memcpy;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -444,10 +444,10 @@ namespace llvm {
class MemMoveInst : public MemTransferInst {
public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::memmove;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -455,10 +455,10 @@ namespace llvm {
/// This represents the llvm.va_start intrinsic.
class VAStartInst : public IntrinsicInst {
public:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::vastart;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
@@ -468,10 +468,10 @@ namespace llvm {
/// This represents the llvm.va_end intrinsic.
class VAEndInst : public IntrinsicInst {
public:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::vaend;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
@@ -481,10 +481,10 @@ namespace llvm {
/// This represents the llvm.va_copy intrinsic.
class VACopyInst : public IntrinsicInst {
public:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::vacopy;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
@@ -495,10 +495,10 @@ namespace llvm {
/// This represents the llvm.instrprof_increment intrinsic.
class InstrProfIncrementInst : public IntrinsicInst {
public:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::instrprof_increment;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
@@ -524,10 +524,10 @@ namespace llvm {
class InstrProfIncrementInstStep : public InstrProfIncrementInst {
public:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::instrprof_increment_step;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -535,10 +535,10 @@ namespace llvm {
/// This represents the llvm.instrprof_value_profile intrinsic.
class InstrProfValueProfileInst : public IntrinsicInst {
public:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::instrprof_value_profile;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
diff --git a/include/llvm/IR/IntrinsicsWebAssembly.td b/include/llvm/IR/IntrinsicsWebAssembly.td
index 3a0957dfa39b..640ef627bc46 100644
--- a/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -19,4 +19,8 @@ let TargetPrefix = "wasm" in { // All intrinsics start with "llvm.wasm.".
def int_wasm_current_memory : Intrinsic<[llvm_anyint_ty], [], [IntrReadMem]>;
def int_wasm_grow_memory : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], []>;
+// Exception handling intrinsics
+def int_wasm_throw: Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [Throws]>;
+def int_wasm_rethrow: Intrinsic<[], [], [Throws]>;
+
}
diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h
index ad011fb72e6a..b27abad618c9 100644
--- a/include/llvm/IR/LLVMContext.h
+++ b/include/llvm/IR/LLVMContext.h
@@ -188,10 +188,19 @@ public:
/// \brief Return if a code hotness metric should be included in optimization
/// diagnostics.
- bool getDiagnosticHotnessRequested() const;
+ bool getDiagnosticsHotnessRequested() const;
/// \brief Set if a code hotness metric should be included in optimization
/// diagnostics.
- void setDiagnosticHotnessRequested(bool Requested);
+ void setDiagnosticsHotnessRequested(bool Requested);
+
+ /// \brief Return the minimum hotness value a diagnostic would need in order
+ /// to be included in optimization diagnostics. If there is no minimum, this
+ /// returns None.
+ uint64_t getDiagnosticsHotnessThreshold() const;
+
+ /// \brief Set the minimum hotness value a diagnostic needs in order to be
+ /// included in optimization diagnostics.
+ void setDiagnosticsHotnessThreshold(uint64_t Threshold);
/// \brief Return the YAML file used by the backend to save optimization
/// diagnostics. If null, diagnostics are not saved in a file but only
diff --git a/include/llvm/IR/LegacyPassNameParser.h b/include/llvm/IR/LegacyPassNameParser.h
index fd9d468b06cb..4cec08196408 100644
--- a/include/llvm/IR/LegacyPassNameParser.h
+++ b/include/llvm/IR/LegacyPassNameParser.h
@@ -81,15 +81,15 @@ public:
// default implementation to sort the table before we print...
void printOptionInfo(const cl::Option &O, size_t GlobalWidth) const override {
PassNameParser *PNP = const_cast<PassNameParser*>(this);
- array_pod_sort(PNP->Values.begin(), PNP->Values.end(), ValLessThan);
+ array_pod_sort(PNP->Values.begin(), PNP->Values.end(), ValCompare);
cl::parser<const PassInfo*>::printOptionInfo(O, GlobalWidth);
}
private:
- // ValLessThan - Provide a sorting comparator for Values elements...
- static int ValLessThan(const PassNameParser::OptionInfo *VT1,
- const PassNameParser::OptionInfo *VT2) {
- return VT1->Name < VT2->Name;
+ // ValCompare - Provide a sorting comparator for Values elements...
+ static int ValCompare(const PassNameParser::OptionInfo *VT1,
+ const PassNameParser::OptionInfo *VT2) {
+ return VT1->Name.compare(VT2->Name);
}
};
diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h
index 80ed44be43eb..3462cc02fd27 100644
--- a/include/llvm/IR/Metadata.h
+++ b/include/llvm/IR/Metadata.h
@@ -660,6 +660,19 @@ struct AAMDNodes {
/// \brief The tag specifying the noalias scope.
MDNode *NoAlias;
+
+ /// \brief Given two sets of AAMDNodes that apply to the same pointer,
+ /// give the best AAMDNodes that are compatible with both (i.e. a set of
+ /// nodes whose allowable aliasing conclusions are a subset of those
+ /// allowable by both of the inputs). However, for efficiency
+ /// reasons, do not create any new MDNodes.
+ AAMDNodes intersect(const AAMDNodes &Other) {
+ AAMDNodes Result;
+ Result.TBAA = Other.TBAA == TBAA ? TBAA : nullptr;
+ Result.Scope = Other.Scope == Scope ? Scope : nullptr;
+ Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr;
+ return Result;
+ }
};
// Specialize DenseMapInfo for AAMDNodes.
diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h
index 5d7b8b997d37..7f6cb5bee5a6 100644
--- a/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -140,8 +140,6 @@ struct FunctionSummaryYaml {
} // End yaml namespace
} // End llvm namespace
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint64_t)
-
namespace llvm {
namespace yaml {
@@ -188,7 +186,6 @@ template <> struct MappingTraits<FunctionSummaryYaml> {
LLVM_YAML_IS_STRING_MAP(TypeIdSummary)
LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummaryYaml)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
namespace llvm {
namespace yaml {
diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h
index c7f4697e93e7..9df6bfc54cd4 100644
--- a/include/llvm/IR/Operator.h
+++ b/include/llvm/IR/Operator.h
@@ -54,9 +54,9 @@ public:
return Instruction::UserOp1;
}
- static inline bool classof(const Instruction *) { return true; }
- static inline bool classof(const ConstantExpr *) { return true; }
- static inline bool classof(const Value *V) {
+ static bool classof(const Instruction *) { return true; }
+ static bool classof(const ConstantExpr *) { return true; }
+ static bool classof(const Value *V) {
return isa<Instruction>(V) || isa<ConstantExpr>(V);
}
};
@@ -97,19 +97,19 @@ public:
return (SubclassOptionalData & NoSignedWrap) != 0;
}
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Add ||
I->getOpcode() == Instruction::Sub ||
I->getOpcode() == Instruction::Mul ||
I->getOpcode() == Instruction::Shl;
}
- static inline bool classof(const ConstantExpr *CE) {
+ static bool classof(const ConstantExpr *CE) {
return CE->getOpcode() == Instruction::Add ||
CE->getOpcode() == Instruction::Sub ||
CE->getOpcode() == Instruction::Mul ||
CE->getOpcode() == Instruction::Shl;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
(isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
}
@@ -144,13 +144,13 @@ public:
OpC == Instruction::LShr;
}
- static inline bool classof(const ConstantExpr *CE) {
+ static bool classof(const ConstantExpr *CE) {
return isPossiblyExactOpcode(CE->getOpcode());
}
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return isPossiblyExactOpcode(I->getOpcode());
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
(isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
}
@@ -324,17 +324,17 @@ public:
/// precision.
float getFPAccuracy() const;
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getType()->isFPOrFPVectorTy() ||
I->getOpcode() == Instruction::FCmp;
}
- static inline bool classof(const ConstantExpr *CE) {
+ static bool classof(const ConstantExpr *CE) {
return CE->getType()->isFPOrFPVectorTy() ||
CE->getOpcode() == Instruction::FCmp;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
(isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
}
@@ -344,13 +344,13 @@ public:
template<typename SuperClass, unsigned Opc>
class ConcreteOperator : public SuperClass {
public:
- static inline bool classof(const Instruction *I) {
+ static bool classof(const Instruction *I) {
return I->getOpcode() == Opc;
}
- static inline bool classof(const ConstantExpr *CE) {
+ static bool classof(const ConstantExpr *CE) {
return CE->getOpcode() == Opc;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
(isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
}
diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h
index 9e9c8ac75d2a..5b69e7855cc7 100644
--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@@ -378,7 +378,7 @@ struct bind_const_intval_ty {
template <typename ITy> bool match(ITy *V) {
if (const auto *CV = dyn_cast<ConstantInt>(V))
- if (CV->getBitWidth() <= 64) {
+ if (CV->getValue().ule(UINT64_MAX)) {
VR = CV->getZExtValue();
return true;
}
@@ -399,10 +399,7 @@ struct specific_intval {
if (const auto *C = dyn_cast<Constant>(V))
CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue());
- if (CI && CI->getBitWidth() <= 64)
- return CI->getZExtValue() == Val;
-
- return false;
+ return CI && CI->getValue() == Val;
}
};
@@ -1363,6 +1360,11 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
// Helper intrinsic matching specializations.
template <typename Opnd0>
+inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) {
+ return m_Intrinsic<Intrinsic::bitreverse>(Op0);
+}
+
+template <typename Opnd0>
inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
return m_Intrinsic<Intrinsic::bswap>(Op0);
}
diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h
index 630f30667272..ad9537e9762e 100644
--- a/include/llvm/IR/Statepoint.h
+++ b/include/llvm/IR/Statepoint.h
@@ -329,12 +329,12 @@ public:
/// Currently, the only projections available are gc.result and gc.relocate.
class GCProjectionInst : public IntrinsicInst {
public:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::experimental_gc_relocate ||
I->getIntrinsicID() == Intrinsic::experimental_gc_result;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
@@ -373,11 +373,11 @@ public:
/// Represents calls to the gc.relocate intrinsic.
class GCRelocateInst : public GCProjectionInst {
public:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::experimental_gc_relocate;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
@@ -408,11 +408,11 @@ public:
/// Represents calls to the gc.result intrinsic.
class GCResultInst : public GCProjectionInst {
public:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::experimental_gc_result;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h
index 109a3d5e7be8..4dfa19cf241f 100644
--- a/include/llvm/IR/User.h
+++ b/include/llvm/IR/User.h
@@ -288,7 +288,7 @@ public:
void replaceUsesOfWith(Value *From, Value *To);
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<Instruction>(V) || isa<Constant>(V);
}
};
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index a52fa3b542a5..aab14070dbda 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -70,7 +70,6 @@ void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
void initializeArgPromotionPass(PassRegistry&);
void initializeAssumptionCacheTrackerPass(PassRegistry&);
void initializeAtomicExpandPass(PassRegistry&);
-void initializeBBVectorizePass(PassRegistry&);
void initializeBDCELegacyPassPass(PassRegistry&);
void initializeBarrierNoopPass(PassRegistry&);
void initializeBasicAAWrapperPassPass(PassRegistry&);
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index c309ddbe2f02..d07c15c1013b 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -195,7 +195,6 @@ namespace {
(void) llvm::createLoopVectorizePass();
(void) llvm::createSLPVectorizerPass();
(void) llvm::createLoadStoreVectorizerPass();
- (void) llvm::createBBVectorizePass();
(void) llvm::createPartiallyInlineLibCallsPass();
(void) llvm::createScalarizerPass();
(void) llvm::createSeparateConstOffsetFromGEPPass();
diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h
index a270973204f2..c9c43a22da5d 100644
--- a/include/llvm/MC/MCAsmBackend.h
+++ b/include/llvm/MC/MCAsmBackend.h
@@ -60,11 +60,12 @@ public:
/// Get information on a fixup kind.
virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const;
- /// Target hook to adjust the literal value of a fixup if necessary.
- /// IsResolved signals whether the caller believes a relocation is needed; the
- /// target can modify the value. The default does nothing.
- virtual void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) {}
+ /// Hook to check if a relocation is needed for some target specific reason.
+ virtual bool shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
+ return false;
+ }
/// Apply the \p Value for given \p Fixup into the provided data fragment, at
/// the offset specified by the fixup and following the fixup kind as
diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h
index 6e14cefaa0ab..198a08b5f539 100644
--- a/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -13,6 +13,7 @@
namespace llvm {
class MCAsmBackend;
+class MCContext;
class MCFixup;
class MCObjectWriter;
class MCValue;
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index 6c5fb9d5c92b..e56e8e464de3 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -253,7 +253,7 @@ public:
}
// Cast methods.
- static inline bool classof(Binary const *v) {
+ static bool classof(Binary const *v) {
return v->isArchive();
}
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index bf0172822d3f..78e0b5f6ed30 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -1026,7 +1026,7 @@ public:
bool isRelocatableObject() const override;
bool is64() const { return PE32PlusHeader; }
- static inline bool classof(const Binary *v) { return v->isCOFF(); }
+ static bool classof(const Binary *v) { return v->isCOFF(); }
};
// The iterator for the import directory table.
diff --git a/include/llvm/Object/COFFImportFile.h b/include/llvm/Object/COFFImportFile.h
index 78044a2832fa..060f965233e1 100644
--- a/include/llvm/Object/COFFImportFile.h
+++ b/include/llvm/Object/COFFImportFile.h
@@ -33,7 +33,7 @@ public:
COFFImportFile(MemoryBufferRef Source)
: SymbolicFile(ID_COFFImportFile, Source) {}
- static inline bool classof(Binary const *V) { return V->isCOFFImportFile(); }
+ static bool classof(Binary const *V) { return V->isCOFFImportFile(); }
void moveSymbolNext(DataRefImpl &Symb) const override { ++Symb.p; }
diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h
index 2ba3b13f49da..73011f6f9fe1 100644
--- a/include/llvm/Object/ELFObjectFile.h
+++ b/include/llvm/Object/ELFObjectFile.h
@@ -70,7 +70,7 @@ public:
elf_symbol_iterator_range symbols() const;
- static inline bool classof(const Binary *v) { return v->isELF(); }
+ static bool classof(const Binary *v) { return v->isELF(); }
SubtargetFeatures getFeatures() const override;
@@ -389,7 +389,7 @@ public:
const ELFFile<ELFT> *getELFFile() const { return &EF; }
bool isDyldType() const { return isDyldELFObject; }
- static inline bool classof(const Binary *v) {
+ static bool classof(const Binary *v) {
return v->getType() == getELFType(ELFT::TargetEndianness == support::little,
ELFT::Is64Bits);
}
diff --git a/include/llvm/Object/IRObjectFile.h b/include/llvm/Object/IRObjectFile.h
index 3bce7813ee93..9a696bffd1f0 100644
--- a/include/llvm/Object/IRObjectFile.h
+++ b/include/llvm/Object/IRObjectFile.h
@@ -46,7 +46,7 @@ public:
StringRef getTargetTriple() const;
- static inline bool classof(const Binary *v) {
+ static bool classof(const Binary *v) {
return v->isIR();
}
diff --git a/include/llvm/Object/IRSymtab.h b/include/llvm/Object/IRSymtab.h
index 502f133d307d..824a67a672fa 100644
--- a/include/llvm/Object/IRSymtab.h
+++ b/include/llvm/Object/IRSymtab.h
@@ -124,6 +124,18 @@ struct Uncommon {
};
struct Header {
+ /// Version number of the symtab format. This number should be incremented
+ /// when the format changes, but it does not need to be incremented if a
+ /// change to LLVM would cause it to create a different symbol table.
+ Word Version;
+ enum { kCurrentVersion = 0 };
+
+ /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION).
+ /// Consumers should rebuild the symbol table from IR if the producer's
+ /// version does not match the consumer's version due to potential differences
+ /// in symbol table format, symbol enumeration order and so on.
+ Str Producer;
+
Range<Module> Modules;
Range<Comdat> Comdats;
Range<Symbol> Symbols;
@@ -243,6 +255,8 @@ public:
/// copied into an irsymtab::Symbol object.
symbol_range symbols() const;
+ size_t getNumModules() const { return Modules.size(); }
+
/// Returns a slice of the symbol table for the I'th module in the file.
/// The symbols enumerated by this method are ephemeral, but they can be
/// copied into an irsymtab::Symbol object.
diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h
index 8a6f0fc56971..72837d0970c4 100644
--- a/include/llvm/Object/MachOUniversal.h
+++ b/include/llvm/Object/MachOUniversal.h
@@ -154,7 +154,7 @@ public:
uint32_t getNumberOfObjects() const { return NumberOfObjects; }
// Cast methods.
- static inline bool classof(Binary const *V) {
+ static bool classof(Binary const *V) {
return V->isMachOUniversalBinary();
}
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 6b5b9d95fcf3..afcad3090703 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -313,7 +313,7 @@ public:
return createObjectFile(Object, llvm::file_magic::unknown);
}
- static inline bool classof(const Binary *v) {
+ static bool classof(const Binary *v) {
return v->isObject();
}
diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h
index 97eeba6611a2..5b9549bc3449 100644
--- a/include/llvm/Object/SymbolicFile.h
+++ b/include/llvm/Object/SymbolicFile.h
@@ -173,7 +173,7 @@ public:
static Expected<OwningBinary<SymbolicFile>>
createSymbolicFile(StringRef ObjectPath);
- static inline bool classof(const Binary *v) {
+ static bool classof(const Binary *v) {
return v->isSymbolic();
}
};
diff --git a/include/llvm/Object/Wasm.h b/include/llvm/Object/Wasm.h
index 9d53131234f4..5c8445f10f44 100644
--- a/include/llvm/Object/Wasm.h
+++ b/include/llvm/Object/Wasm.h
@@ -100,6 +100,7 @@ public:
const std::vector<wasm::WasmLimits>& memories() const { return Memories; }
const std::vector<wasm::WasmGlobal>& globals() const { return Globals; }
const std::vector<wasm::WasmExport>& exports() const { return Exports; }
+ const wasm::WasmLinkingData& linkingData() const { return LinkingData; }
uint32_t getNumberOfSymbols() const {
return Symbols.size();
@@ -214,6 +215,8 @@ private:
std::vector<WasmSymbol> Symbols;
ArrayRef<uint8_t> CodeSection;
uint32_t StartFunction = -1;
+ bool HasLinkingSection = false;
+ wasm::WasmLinkingData LinkingData;
StringMap<uint32_t> SymbolMap;
};
diff --git a/include/llvm/ObjectYAML/COFFYAML.h b/include/llvm/ObjectYAML/COFFYAML.h
index 719cb1acf6ef..bbceefac3d94 100644
--- a/include/llvm/ObjectYAML/COFFYAML.h
+++ b/include/llvm/ObjectYAML/COFFYAML.h
@@ -15,14 +15,18 @@
#define LLVM_OBJECTYAML_COFFYAML_H
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/ObjectYAML/CodeViewYAMLDebugSections.h"
#include "llvm/ObjectYAML/CodeViewYAMLTypes.h"
#include "llvm/ObjectYAML/YAML.h"
+#include <cstdint>
+#include <vector>
namespace llvm {
namespace COFF {
+
inline Characteristics operator|(Characteristics a, Characteristics b) {
uint32_t Ret = static_cast<uint32_t>(a) | static_cast<uint32_t>(b);
return static_cast<Characteristics>(Ret);
@@ -39,60 +43,67 @@ inline DLLCharacteristics operator|(DLLCharacteristics a,
uint16_t Ret = static_cast<uint16_t>(a) | static_cast<uint16_t>(b);
return static_cast<DLLCharacteristics>(Ret);
}
-}
+
+} // end namespace COFF
// The structure of the yaml files is not an exact 1:1 match to COFF. In order
// to use yaml::IO, we use these structures which are closer to the source.
namespace COFFYAML {
- LLVM_YAML_STRONG_TYPEDEF(uint8_t, COMDATType)
- LLVM_YAML_STRONG_TYPEDEF(uint32_t, WeakExternalCharacteristics)
- LLVM_YAML_STRONG_TYPEDEF(uint8_t, AuxSymbolType)
-
- struct Relocation {
- uint32_t VirtualAddress;
- uint16_t Type;
- StringRef SymbolName;
- };
-
- struct Section {
- COFF::section Header;
- unsigned Alignment = 0;
- yaml::BinaryRef SectionData;
- std::vector<CodeViewYAML::YAMLDebugSubsection> DebugS;
- std::vector<CodeViewYAML::LeafRecord> DebugT;
- std::vector<Relocation> Relocations;
- StringRef Name;
- Section();
- };
-
- struct Symbol {
- COFF::symbol Header;
- COFF::SymbolBaseType SimpleType = COFF::IMAGE_SYM_TYPE_NULL;
- COFF::SymbolComplexType ComplexType = COFF::IMAGE_SYM_DTYPE_NULL;
- Optional<COFF::AuxiliaryFunctionDefinition> FunctionDefinition;
- Optional<COFF::AuxiliarybfAndefSymbol> bfAndefSymbol;
- Optional<COFF::AuxiliaryWeakExternal> WeakExternal;
- StringRef File;
- Optional<COFF::AuxiliarySectionDefinition> SectionDefinition;
- Optional<COFF::AuxiliaryCLRToken> CLRToken;
- StringRef Name;
- Symbol();
- };
-
- struct PEHeader {
- COFF::PE32Header Header;
- Optional<COFF::DataDirectory> DataDirectories[COFF::NUM_DATA_DIRECTORIES];
- };
-
- struct Object {
- Optional<PEHeader> OptionalHeader;
- COFF::header Header;
- std::vector<Section> Sections;
- std::vector<Symbol> Symbols;
- Object();
- };
-}
-}
+
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, COMDATType)
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, WeakExternalCharacteristics)
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, AuxSymbolType)
+
+struct Relocation {
+ uint32_t VirtualAddress;
+ uint16_t Type;
+ StringRef SymbolName;
+};
+
+struct Section {
+ COFF::section Header;
+ unsigned Alignment = 0;
+ yaml::BinaryRef SectionData;
+ std::vector<CodeViewYAML::YAMLDebugSubsection> DebugS;
+ std::vector<CodeViewYAML::LeafRecord> DebugT;
+ std::vector<Relocation> Relocations;
+ StringRef Name;
+
+ Section();
+};
+
+struct Symbol {
+ COFF::symbol Header;
+ COFF::SymbolBaseType SimpleType = COFF::IMAGE_SYM_TYPE_NULL;
+ COFF::SymbolComplexType ComplexType = COFF::IMAGE_SYM_DTYPE_NULL;
+ Optional<COFF::AuxiliaryFunctionDefinition> FunctionDefinition;
+ Optional<COFF::AuxiliarybfAndefSymbol> bfAndefSymbol;
+ Optional<COFF::AuxiliaryWeakExternal> WeakExternal;
+ StringRef File;
+ Optional<COFF::AuxiliarySectionDefinition> SectionDefinition;
+ Optional<COFF::AuxiliaryCLRToken> CLRToken;
+ StringRef Name;
+
+ Symbol();
+};
+
+struct PEHeader {
+ COFF::PE32Header Header;
+ Optional<COFF::DataDirectory> DataDirectories[COFF::NUM_DATA_DIRECTORIES];
+};
+
+struct Object {
+ Optional<PEHeader> OptionalHeader;
+ COFF::header Header;
+ std::vector<Section> Sections;
+ std::vector<Symbol> Symbols;
+
+ Object();
+};
+
+} // end namespace COFFYAML
+
+} // end namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Section)
LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Symbol)
@@ -224,4 +235,4 @@ struct MappingTraits<COFFYAML::Object> {
} // end namespace yaml
} // end namespace llvm
-#endif
+#endif // LLVM_OBJECTYAML_COFFYAML_H
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
index 8180e0fc83f4..d620008e22d2 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
@@ -1,4 +1,4 @@
-//===- CodeViewYAMLDebugSections.h - CodeView YAMLIO debug sections -------===//
+//=- CodeViewYAMLDebugSections.h - CodeView YAMLIO debug sections -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -15,27 +15,33 @@
#ifndef LLVM_OBJECTYAML_CODEVIEWYAMLDEBUGSECTIONS_H
#define LLVM_OBJECTYAML_CODEVIEWYAMLDEBUGSECTIONS_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
-#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cstdint>
+#include <memory>
+#include <vector>
namespace llvm {
namespace codeview {
-class DebugStringTableSubsection;
-class DebugStringTableSubsectionRef;
-class DebugChecksumsSubsectionRef;
-class DebugStringTableSubsection;
-class DebugChecksumsSubsection;
+
class StringsAndChecksums;
class StringsAndChecksumsRef;
-}
+
+} // end namespace codeview
+
namespace CodeViewYAML {
namespace detail {
+
struct YAMLSubsectionBase;
-}
+
+} // end namespace detail
struct YAMLFrameData {
uint32_t RvaStart;
@@ -87,7 +93,6 @@ struct SourceLineInfo {
uint32_t RelocSegment;
codeview::LineFlags Flags;
uint32_t CodeSize;
-
std::vector<SourceLineBlock> Blocks;
};
@@ -124,11 +129,12 @@ fromDebugS(ArrayRef<uint8_t> Data, const codeview::StringsAndChecksumsRef &SC);
void initializeStringsAndChecksums(ArrayRef<YAMLDebugSubsection> Sections,
codeview::StringsAndChecksums &SC);
-} // namespace CodeViewYAML
-} // namespace llvm
+} // end namespace CodeViewYAML
+
+} // end namespace llvm
LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::YAMLDebugSubsection)
LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::YAMLDebugSubsection)
-#endif
+#endif // LLVM_OBJECTYAML_CODEVIEWYAMLDEBUGSECTIONS_H
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h b/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h
index 9b411e8b074f..791193c78f19 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h
@@ -17,13 +17,18 @@
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <memory>
namespace llvm {
namespace CodeViewYAML {
+
namespace detail {
+
struct SymbolRecordBase;
-}
+
+} // end namespace detail
struct SymbolRecord {
std::shared_ptr<detail::SymbolRecordBase> Symbol;
@@ -31,13 +36,14 @@ struct SymbolRecord {
codeview::CVSymbol
toCodeViewSymbol(BumpPtrAllocator &Allocator,
codeview::CodeViewContainer Container) const;
+
static Expected<SymbolRecord> fromCodeViewSymbol(codeview::CVSymbol Symbol);
};
-} // namespace CodeViewYAML
-} // namespace llvm
+} // end namespace CodeViewYAML
+} // end namespace llvm
LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SymbolRecord)
LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::SymbolRecord)
-#endif
+#endif // LLVM_OBJECTYAML_CODEVIEWYAMLSYMBOLS_H
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
index e97d5f92bf7f..6746fd60b6cb 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
@@ -1,4 +1,4 @@
-//===- CodeViewYAMLTypes.h - CodeView YAMLIO Type Record implementation ---===//
+//==- CodeViewYAMLTypes.h - CodeView YAMLIO Type implementation --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -15,20 +15,31 @@
#ifndef LLVM_OBJECTYAML_CODEVIEWYAMLTYPES_H
#define LLVM_OBJECTYAML_CODEVIEWYAMLTYPES_H
-#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/ObjectYAML/YAML.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cstdint>
+#include <memory>
+#include <vector>
namespace llvm {
+
namespace codeview {
+
class TypeTableBuilder;
-}
+
+} // end namespace codeview
+
namespace CodeViewYAML {
+
namespace detail {
+
struct LeafRecordBase;
struct MemberRecordBase;
-}
+
+} // end namespace detail
struct MemberRecord {
std::shared_ptr<detail::MemberRecordBase> Member;
@@ -44,8 +55,10 @@ struct LeafRecord {
std::vector<LeafRecord> fromDebugT(ArrayRef<uint8_t> DebugT);
ArrayRef<uint8_t> toDebugT(ArrayRef<LeafRecord>, BumpPtrAllocator &Alloc);
-} // namespace CodeViewYAML
-} // namespace llvm
+
+} // end namespace CodeViewYAML
+
+} // end namespace llvm
LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::LeafRecord)
LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::MemberRecord)
@@ -53,4 +66,4 @@ LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::MemberRecord)
LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::LeafRecord)
LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::MemberRecord)
-#endif
+#endif // LLVM_OBJECTYAML_CODEVIEWYAMLTYPES_H
diff --git a/include/llvm/ObjectYAML/DWARFEmitter.h b/include/llvm/ObjectYAML/DWARFEmitter.h
index ce231cc0ce68..0d7d8b4efbdf 100644
--- a/include/llvm/ObjectYAML/DWARFEmitter.h
+++ b/include/llvm/ObjectYAML/DWARFEmitter.h
@@ -1,5 +1,4 @@
-//===--- DWARFEmitter.h - -------------------------------------------*- C++
-//-*-===//
+//===--- DWARFEmitter.h - ---------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,6 +9,7 @@
/// \file
/// \brief Common declarations for yaml2obj
//===----------------------------------------------------------------------===//
+
#ifndef LLVM_OBJECTYAML_DWARFEMITTER_H
#define LLVM_OBJECTYAML_DWARFEMITTER_H
@@ -19,30 +19,31 @@
#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
#include <memory>
-#include <vector>
namespace llvm {
+
class raw_ostream;
namespace DWARFYAML {
+
struct Data;
struct PubSection;
-void EmitDebugAbbrev(llvm::raw_ostream &OS, const llvm::DWARFYAML::Data &DI);
-void EmitDebugStr(llvm::raw_ostream &OS, const llvm::DWARFYAML::Data &DI);
+void EmitDebugAbbrev(raw_ostream &OS, const Data &DI);
+void EmitDebugStr(raw_ostream &OS, const Data &DI);
-void EmitDebugAranges(llvm::raw_ostream &OS, const llvm::DWARFYAML::Data &DI);
-void EmitPubSection(llvm::raw_ostream &OS,
- const llvm::DWARFYAML::PubSection &Sect,
+void EmitDebugAranges(raw_ostream &OS, const Data &DI);
+void EmitPubSection(raw_ostream &OS, const PubSection &Sect,
bool IsLittleEndian);
-void EmitDebugInfo(llvm::raw_ostream &OS, const llvm::DWARFYAML::Data &DI);
-void EmitDebugLine(llvm::raw_ostream &OS, const llvm::DWARFYAML::Data &DI);
+void EmitDebugInfo(raw_ostream &OS, const Data &DI);
+void EmitDebugLine(raw_ostream &OS, const Data &DI);
Expected<StringMap<std::unique_ptr<MemoryBuffer>>>
EmitDebugSections(StringRef YAMLString,
bool IsLittleEndian = sys::IsLittleEndianHost);
-} // namespace DWARFYAML
-} // namespace llvm
+} // end namespace DWARFYAML
+
+} // end namespace llvm
-#endif
+#endif // LLVM_OBJECTYAML_DWARFEMITTER_H
diff --git a/include/llvm/ObjectYAML/DWARFYAML.h b/include/llvm/ObjectYAML/DWARFYAML.h
index 75e9112e121a..2162f0fef852 100644
--- a/include/llvm/ObjectYAML/DWARFYAML.h
+++ b/include/llvm/ObjectYAML/DWARFYAML.h
@@ -16,8 +16,11 @@
#ifndef LLVM_OBJECTYAML_DWARFYAML_H
#define LLVM_OBJECTYAML_DWARFYAML_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cstdint>
+#include <vector>
namespace llvm {
namespace DWARFYAML {
@@ -76,13 +79,11 @@ struct PubEntry {
};
struct PubSection {
- PubSection() : IsGNUStyle(false) {}
-
InitialLength Length;
uint16_t Version;
uint32_t UnitOffset;
uint32_t UnitSize;
- bool IsGNUStyle;
+ bool IsGNUStyle = false;
std::vector<PubEntry> Entries;
};
@@ -158,12 +159,10 @@ struct Data {
bool isEmpty() const;
};
-} // namespace llvm::DWARFYAML
-} // namespace llvm
+} // end namespace DWARFYAML
+} // end namespace llvm
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex64)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::StringRef)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex8)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::AttributeAbbrev)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::Abbrev)
@@ -304,7 +303,7 @@ template <> struct ScalarEnumerationTraits<dwarf::Constants> {
}
};
-} // namespace llvm::yaml
-} // namespace llvm
+} // end namespace yaml
+} // end namespace llvm
-#endif
+#endif // LLVM_OBJECTYAML_DWARFYAML_H
diff --git a/include/llvm/ObjectYAML/ELFYAML.h b/include/llvm/ObjectYAML/ELFYAML.h
index 9d62ec27ad31..ed455311696e 100644
--- a/include/llvm/ObjectYAML/ELFYAML.h
+++ b/include/llvm/ObjectYAML/ELFYAML.h
@@ -16,8 +16,12 @@
#ifndef LLVM_OBJECTYAML_ELFYAML_H
#define LLVM_OBJECTYAML_ELFYAML_H
-#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cstdint>
+#include <memory>
+#include <vector>
namespace llvm {
namespace ELFYAML {
@@ -66,6 +70,7 @@ struct FileHeader {
ELF_EF Flags;
llvm::yaml::Hex64 Entry;
};
+
struct Symbol {
StringRef Name;
ELF_STT Type;
@@ -74,6 +79,7 @@ struct Symbol {
llvm::yaml::Hex64 Size;
uint8_t Other;
};
+
struct LocalGlobalWeakSymbols {
std::vector<Symbol> Local;
std::vector<Symbol> Global;
@@ -100,13 +106,16 @@ struct Section {
StringRef Link;
StringRef Info;
llvm::yaml::Hex64 AddressAlign;
+
Section(SectionKind Kind) : Kind(Kind) {}
virtual ~Section();
};
struct RawContentSection : Section {
yaml::BinaryRef Content;
llvm::yaml::Hex64 Size;
+
RawContentSection() : Section(SectionKind::RawContent) {}
+
static bool classof(const Section *S) {
return S->Kind == SectionKind::RawContent;
}
@@ -114,7 +123,9 @@ struct RawContentSection : Section {
struct NoBitsSection : Section {
llvm::yaml::Hex64 Size;
+
NoBitsSection() : Section(SectionKind::NoBits) {}
+
static bool classof(const Section *S) {
return S->Kind == SectionKind::NoBits;
}
@@ -124,7 +135,9 @@ struct Group : Section {
// Members of a group contain a flag and a list of section indices
// that are part of the group.
std::vector<SectionOrType> Members;
+
Group() : Section(SectionKind::Group) {}
+
static bool classof(const Section *S) {
return S->Kind == SectionKind::Group;
}
@@ -136,9 +149,12 @@ struct Relocation {
ELF_REL Type;
StringRef Symbol;
};
+
struct RelocationSection : Section {
std::vector<Relocation> Relocations;
+
RelocationSection() : Section(SectionKind::Relocation) {}
+
static bool classof(const Section *S) {
return S->Kind == SectionKind::Relocation;
}
@@ -157,7 +173,9 @@ struct MipsABIFlags : Section {
MIPS_AFL_ASE ASEs;
MIPS_AFL_FLAGS1 Flags1;
llvm::yaml::Hex32 Flags2;
+
MipsABIFlags() : Section(SectionKind::MipsABIFlags) {}
+
static bool classof(const Section *S) {
return S->Kind == SectionKind::MipsABIFlags;
}
@@ -316,4 +334,4 @@ template <> struct MappingTraits<ELFYAML::SectionOrType> {
} // end namespace yaml
} // end namespace llvm
-#endif
+#endif // LLVM_OBJECTYAML_ELFYAML_H
diff --git a/include/llvm/ObjectYAML/MachOYAML.h b/include/llvm/ObjectYAML/MachOYAML.h
index 59aca9a1ddf2..305497b6aa6a 100644
--- a/include/llvm/ObjectYAML/MachOYAML.h
+++ b/include/llvm/ObjectYAML/MachOYAML.h
@@ -16,9 +16,13 @@
#ifndef LLVM_OBJECTYAML_MACHOYAML_H
#define LLVM_OBJECTYAML_MACHOYAML_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/ObjectYAML/DWARFYAML.h"
-#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cstdint>
+#include <string>
+#include <vector>
namespace llvm {
namespace MachOYAML {
@@ -51,6 +55,7 @@ struct FileHeader {
struct LoadCommand {
virtual ~LoadCommand();
+
llvm::MachO::macho_load_command Data;
std::vector<Section> Sections;
std::vector<MachO::build_tool_version> Tools;
@@ -66,6 +71,7 @@ struct NListEntry {
uint16_t n_desc;
uint64_t n_value;
};
+
struct RebaseOpcode {
MachO::RebaseOpcode Opcode;
uint8_t Imm;
@@ -81,15 +87,12 @@ struct BindOpcode {
};
struct ExportEntry {
- ExportEntry()
- : TerminalSize(0), NodeOffset(0), Name(), Flags(0), Address(0), Other(0),
- ImportName(), Children() {}
- uint64_t TerminalSize;
- uint64_t NodeOffset;
+ uint64_t TerminalSize = 0;
+ uint64_t NodeOffset = 0;
std::string Name;
- llvm::yaml::Hex64 Flags;
- llvm::yaml::Hex64 Address;
- llvm::yaml::Hex64 Other;
+ llvm::yaml::Hex64 Flags = 0;
+ llvm::yaml::Hex64 Address = 0;
+ llvm::yaml::Hex64 Other = 0;
std::string ImportName;
std::vector<MachOYAML::ExportEntry> Children;
};
@@ -135,12 +138,11 @@ struct UniversalBinary {
std::vector<Object> Slices;
};
-} // namespace llvm::MachOYAML
-} // namespace llvm
+} // end namespace MachOYAML
+} // end namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::LoadCommand)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::Section)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(int64_t)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::RebaseOpcode)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::BindOpcode)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::ExportEntry)
@@ -150,6 +152,9 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::FatArch)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachO::build_tool_version)
namespace llvm {
+
+class raw_ostream;
+
namespace yaml {
template <> struct MappingTraits<MachOYAML::FileHeader> {
@@ -251,22 +256,20 @@ template <> struct ScalarEnumerationTraits<MachO::BindOpcode> {
};
// This trait is used for 16-byte chars in Mach structures used for strings
-typedef char char_16[16];
+using char_16 = char[16];
template <> struct ScalarTraits<char_16> {
- static void output(const char_16 &Val, void *, llvm::raw_ostream &Out);
-
+ static void output(const char_16 &Val, void *, raw_ostream &Out);
static StringRef input(StringRef Scalar, void *, char_16 &Val);
static bool mustQuote(StringRef S);
};
// This trait is used for UUIDs. It reads and writes them matching otool's
// formatting style.
-typedef uint8_t uuid_t[16];
+using uuid_t = uint8_t[16];
template <> struct ScalarTraits<uuid_t> {
- static void output(const uuid_t &Val, void *, llvm::raw_ostream &Out);
-
+ static void output(const uuid_t &Val, void *, raw_ostream &Out);
static StringRef input(StringRef Scalar, void *, uuid_t &Val);
static bool mustQuote(StringRef S);
};
@@ -297,8 +300,8 @@ template <> struct MappingTraits<MachO::section_64> {
static void mapping(IO &IO, MachO::section_64 &LoadCommand);
};
-} // namespace llvm::yaml
+} // end namespace yaml
-} // namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_OBJECTYAML_MACHOYAML_H
diff --git a/include/llvm/ObjectYAML/ObjectYAML.h b/include/llvm/ObjectYAML/ObjectYAML.h
index 36d6ed5417cf..00ce86430fca 100644
--- a/include/llvm/ObjectYAML/ObjectYAML.h
+++ b/include/llvm/ObjectYAML/ObjectYAML.h
@@ -15,10 +15,13 @@
#include "llvm/ObjectYAML/MachOYAML.h"
#include "llvm/ObjectYAML/WasmYAML.h"
#include "llvm/Support/YAMLTraits.h"
+#include <memory>
namespace llvm {
namespace yaml {
+class IO;
+
struct YamlObjectFile {
std::unique_ptr<ELFYAML::Object> Elf;
std::unique_ptr<COFFYAML::Object> Coff;
@@ -31,7 +34,7 @@ template <> struct MappingTraits<YamlObjectFile> {
static void mapping(IO &IO, YamlObjectFile &ObjectFile);
};
-} // namespace yaml
-} // namespace llvm
+} // end namespace yaml
+} // end namespace llvm
-#endif
+#endif // LLVM_OBJECTYAML_OBJECTYAML_H
diff --git a/include/llvm/ObjectYAML/WasmYAML.h b/include/llvm/ObjectYAML/WasmYAML.h
index 74f5664c43ac..6bf08d340eeb 100644
--- a/include/llvm/ObjectYAML/WasmYAML.h
+++ b/include/llvm/ObjectYAML/WasmYAML.h
@@ -16,8 +16,13 @@
#ifndef LLVM_OBJECTYAML_WASMYAML_H
#define LLVM_OBJECTYAML_WASMYAML_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/Casting.h"
+#include <cstdint>
+#include <memory>
+#include <vector>
namespace llvm {
namespace WasmYAML {
@@ -104,10 +109,8 @@ struct NameEntry {
};
struct Signature {
- Signature() : Form(wasm::WASM_TYPE_FUNC) {}
-
uint32_t Index;
- SignatureForm Form;
+ SignatureForm Form = wasm::WASM_TYPE_FUNC;
std::vector<ValueType> ParamTypes;
ValueType ReturnType;
};
@@ -128,6 +131,7 @@ struct Section {
struct CustomSection : Section {
explicit CustomSection(StringRef Name)
: Section(wasm::WASM_SEC_CUSTOM), Name(Name) {}
+
static bool classof(const Section *S) {
return S->Type == wasm::WASM_SEC_CUSTOM;
}
@@ -138,6 +142,7 @@ struct CustomSection : Section {
struct NameSection : CustomSection {
NameSection() : CustomSection("name") {}
+
static bool classof(const Section *S) {
auto C = dyn_cast<CustomSection>(S);
return C && C->Name == "name";
@@ -148,16 +153,20 @@ struct NameSection : CustomSection {
struct LinkingSection : CustomSection {
LinkingSection() : CustomSection("linking") {}
+
static bool classof(const Section *S) {
auto C = dyn_cast<CustomSection>(S);
return C && C->Name == "linking";
}
std::vector<SymbolInfo> SymbolInfos;
+ uint32_t DataSize;
+ uint32_t DataAlignment;
};
struct TypeSection : Section {
TypeSection() : Section(wasm::WASM_SEC_TYPE) {}
+
static bool classof(const Section *S) {
return S->Type == wasm::WASM_SEC_TYPE;
}
@@ -167,6 +176,7 @@ struct TypeSection : Section {
struct ImportSection : Section {
ImportSection() : Section(wasm::WASM_SEC_IMPORT) {}
+
static bool classof(const Section *S) {
return S->Type == wasm::WASM_SEC_IMPORT;
}
@@ -176,6 +186,7 @@ struct ImportSection : Section {
struct FunctionSection : Section {
FunctionSection() : Section(wasm::WASM_SEC_FUNCTION) {}
+
static bool classof(const Section *S) {
return S->Type == wasm::WASM_SEC_FUNCTION;
}
@@ -185,6 +196,7 @@ struct FunctionSection : Section {
struct TableSection : Section {
TableSection() : Section(wasm::WASM_SEC_TABLE) {}
+
static bool classof(const Section *S) {
return S->Type == wasm::WASM_SEC_TABLE;
}
@@ -194,6 +206,7 @@ struct TableSection : Section {
struct MemorySection : Section {
MemorySection() : Section(wasm::WASM_SEC_MEMORY) {}
+
static bool classof(const Section *S) {
return S->Type == wasm::WASM_SEC_MEMORY;
}
@@ -203,6 +216,7 @@ struct MemorySection : Section {
struct GlobalSection : Section {
GlobalSection() : Section(wasm::WASM_SEC_GLOBAL) {}
+
static bool classof(const Section *S) {
return S->Type == wasm::WASM_SEC_GLOBAL;
}
@@ -212,6 +226,7 @@ struct GlobalSection : Section {
struct ExportSection : Section {
ExportSection() : Section(wasm::WASM_SEC_EXPORT) {}
+
static bool classof(const Section *S) {
return S->Type == wasm::WASM_SEC_EXPORT;
}
@@ -221,6 +236,7 @@ struct ExportSection : Section {
struct StartSection : Section {
StartSection() : Section(wasm::WASM_SEC_START) {}
+
static bool classof(const Section *S) {
return S->Type == wasm::WASM_SEC_START;
}
@@ -230,6 +246,7 @@ struct StartSection : Section {
struct ElemSection : Section {
ElemSection() : Section(wasm::WASM_SEC_ELEM) {}
+
static bool classof(const Section *S) {
return S->Type == wasm::WASM_SEC_ELEM;
}
@@ -239,6 +256,7 @@ struct ElemSection : Section {
struct CodeSection : Section {
CodeSection() : Section(wasm::WASM_SEC_CODE) {}
+
static bool classof(const Section *S) {
return S->Type == wasm::WASM_SEC_CODE;
}
@@ -248,6 +266,7 @@ struct CodeSection : Section {
struct DataSection : Section {
DataSection() : Section(wasm::WASM_SEC_DATA) {}
+
static bool classof(const Section *S) {
return S->Type == wasm::WASM_SEC_DATA;
}
@@ -278,7 +297,6 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::LocalDecl)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Relocation)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::NameEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SymbolInfo)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
namespace llvm {
namespace yaml {
@@ -378,4 +396,4 @@ template <> struct ScalarEnumerationTraits<WasmYAML::RelocType> {
} // end namespace yaml
} // end namespace llvm
-#endif
+#endif // LLVM_OBJECTYAML_WASMYAML_H
diff --git a/include/llvm/ObjectYAML/YAML.h b/include/llvm/ObjectYAML/YAML.h
index 7f6836809b6d..29151a269df0 100644
--- a/include/llvm/ObjectYAML/YAML.h
+++ b/include/llvm/ObjectYAML/YAML.h
@@ -10,10 +10,17 @@
#ifndef LLVM_OBJECTYAML_YAML_H
#define LLVM_OBJECTYAML_YAML_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/YAMLTraits.h"
+#include <cstdint>
namespace llvm {
+
+class raw_ostream;
+
namespace yaml {
+
/// \brief Specialized YAMLIO scalar type for representing a binary blob.
///
/// A typical use case would be to represent the content of a section in a
@@ -56,18 +63,20 @@ namespace yaml {
/// \endcode
class BinaryRef {
friend bool operator==(const BinaryRef &LHS, const BinaryRef &RHS);
+
/// \brief Either raw binary data, or a string of hex bytes (must always
/// be an even number of characters).
ArrayRef<uint8_t> Data;
+
/// \brief Discriminator between the two states of the `Data` member.
- bool DataIsHexString;
+ bool DataIsHexString = true;
public:
+ BinaryRef() = default;
BinaryRef(ArrayRef<uint8_t> Data) : Data(Data), DataIsHexString(false) {}
BinaryRef(StringRef Data)
- : Data(reinterpret_cast<const uint8_t *>(Data.data()), Data.size()),
- DataIsHexString(true) {}
- BinaryRef() : DataIsHexString(true) {}
+ : Data(reinterpret_cast<const uint8_t *>(Data.data()), Data.size()) {}
+
/// \brief The number of bytes that are represented by this BinaryRef.
/// This is the number of bytes that writeAsBinary() will write.
ArrayRef<uint8_t>::size_type binary_size() const {
@@ -75,9 +84,11 @@ public:
return Data.size() / 2;
return Data.size();
}
+
/// \brief Write the contents (regardless of whether it is binary or a
/// hex string) as binary to the given raw_ostream.
void writeAsBinary(raw_ostream &OS) const;
+
/// \brief Write the contents (regardless of whether it is binary or a
/// hex string) as hex to the given raw_ostream.
///
@@ -94,10 +105,13 @@ inline bool operator==(const BinaryRef &LHS, const BinaryRef &RHS) {
}
template <> struct ScalarTraits<BinaryRef> {
- static void output(const BinaryRef &, void *, llvm::raw_ostream &);
+ static void output(const BinaryRef &, void *, raw_ostream &);
static StringRef input(StringRef, void *, BinaryRef &);
static bool mustQuote(StringRef S) { return needsQuotes(S); }
};
-}
-}
-#endif
+
+} // end namespace yaml
+
+} // end namespace llvm
+
+#endif // LLVM_OBJECTYAML_YAML_H
diff --git a/include/llvm/Passes/PassBuilder.h b/include/llvm/Passes/PassBuilder.h
index 12b05e4ff0c5..ff1958397331 100644
--- a/include/llvm/Passes/PassBuilder.h
+++ b/include/llvm/Passes/PassBuilder.h
@@ -31,8 +31,8 @@ class TargetMachine;
struct PGOOptions {
std::string ProfileGenFile = "";
std::string ProfileUseFile = "";
+ std::string SampleProfileFile = "";
bool RunProfileGen = false;
- bool SamplePGO = false;
};
/// \brief This class provides access to building LLVM's passes.
diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h
index 0ba792e8dc43..fa9a87aed680 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -168,13 +168,21 @@ class CounterExpressionBuilder {
/// expression is added to the builder's collection of expressions.
Counter get(const CounterExpression &E);
+ /// Represents a term in a counter expression tree.
+ struct Term {
+ unsigned CounterID;
+ int Factor;
+
+ Term(unsigned CounterID, int Factor)
+ : CounterID(CounterID), Factor(Factor) {}
+ };
+
/// \brief Gather the terms of the expression tree for processing.
///
/// This collects each addition and subtraction referenced by the counter into
/// a sequence that can be sorted and combined to build a simplified counter
/// expression.
- void extractTerms(Counter C, int Sign,
- SmallVectorImpl<std::pair<unsigned, int>> &Terms);
+ void extractTerms(Counter C, int Sign, SmallVectorImpl<Term> &Terms);
/// \brief Simplifies the given expression tree
/// by getting rid of algebraically redundant operations.
@@ -443,20 +451,9 @@ public:
/// \brief Load the coverage mapping using the given readers.
static Expected<std::unique_ptr<CoverageMapping>>
- load(CoverageMappingReader &CoverageReader,
- IndexedInstrProfReader &ProfileReader);
-
- static Expected<std::unique_ptr<CoverageMapping>>
load(ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders,
IndexedInstrProfReader &ProfileReader);
- /// \brief Load the coverage mapping from the given files.
- static Expected<std::unique_ptr<CoverageMapping>>
- load(StringRef ObjectFilename, StringRef ProfileFilename,
- StringRef Arch = StringRef()) {
- return load(ArrayRef<StringRef>(ObjectFilename), ProfileFilename, Arch);
- }
-
static Expected<std::unique_ptr<CoverageMapping>>
load(ArrayRef<StringRef> ObjectFilenames, StringRef ProfileFilename,
StringRef Arch = StringRef());
diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h
index 573ea90cfd00..a6b2850ccd22 100644
--- a/include/llvm/ProfileData/InstrProf.h
+++ b/include/llvm/ProfileData/InstrProf.h
@@ -598,6 +598,28 @@ struct InstrProfRecord {
InstrProfRecord() = default;
InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t> Counts)
: Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
+ InstrProfRecord(InstrProfRecord &&) = default;
+ InstrProfRecord(const InstrProfRecord &RHS)
+ : Name(RHS.Name), Hash(RHS.Hash), Counts(RHS.Counts), SIPE(RHS.SIPE),
+ ValueData(RHS.ValueData
+ ? llvm::make_unique<ValueProfData>(*RHS.ValueData)
+ : nullptr) {}
+ InstrProfRecord &operator=(InstrProfRecord &&) = default;
+ InstrProfRecord &operator=(const InstrProfRecord &RHS) {
+ Name = RHS.Name;
+ Hash = RHS.Hash;
+ Counts = RHS.Counts;
+ SIPE = RHS.SIPE;
+ if (!RHS.ValueData) {
+ ValueData = nullptr;
+ return *this;
+ }
+ if (!ValueData)
+ ValueData = llvm::make_unique<ValueProfData>(*RHS.ValueData);
+ else
+ *ValueData = *RHS.ValueData;
+ return *this;
+ }
using ValueMapType = std::vector<std::pair<uint64_t, uint64_t>>;
@@ -647,12 +669,9 @@ struct InstrProfRecord {
/// Sort value profile data (per site) by count.
void sortValueData() {
- for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
- std::vector<InstrProfValueSiteRecord> &SiteRecords =
- getValueSitesForKind(Kind);
- for (auto &SR : SiteRecords)
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ for (auto &SR : getValueSitesForKind(Kind))
SR.sortByCount();
- }
}
/// Clear value data entries and edge counters.
@@ -662,36 +681,54 @@ struct InstrProfRecord {
}
/// Clear value data entries
- void clearValueData() {
- for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
- getValueSitesForKind(Kind).clear();
- }
+ void clearValueData() { ValueData = nullptr; }
/// Get the error contained within the record's soft error counter.
Error takeError() { return SIPE.takeError(); }
private:
- std::vector<InstrProfValueSiteRecord> IndirectCallSites;
- std::vector<InstrProfValueSiteRecord> MemOPSizes;
+ struct ValueProfData {
+ std::vector<InstrProfValueSiteRecord> IndirectCallSites;
+ std::vector<InstrProfValueSiteRecord> MemOPSizes;
+ };
+ std::unique_ptr<ValueProfData> ValueData;
- const std::vector<InstrProfValueSiteRecord> &
+ MutableArrayRef<InstrProfValueSiteRecord>
+ getValueSitesForKind(uint32_t ValueKind) {
+ // Cast to /add/ const (should be an implicit_cast, ideally, if that's ever
+ // implemented in LLVM) to call the const overload of this function, then
+ // cast away the constness from the result.
+ auto AR = const_cast<const InstrProfRecord *>(this)->getValueSitesForKind(
+ ValueKind);
+ return makeMutableArrayRef(
+ const_cast<InstrProfValueSiteRecord *>(AR.data()), AR.size());
+ }
+ ArrayRef<InstrProfValueSiteRecord>
getValueSitesForKind(uint32_t ValueKind) const {
+ if (!ValueData)
+ return None;
switch (ValueKind) {
case IPVK_IndirectCallTarget:
- return IndirectCallSites;
+ return ValueData->IndirectCallSites;
case IPVK_MemOPSize:
- return MemOPSizes;
+ return ValueData->MemOPSizes;
default:
llvm_unreachable("Unknown value kind!");
}
- return IndirectCallSites;
}
std::vector<InstrProfValueSiteRecord> &
- getValueSitesForKind(uint32_t ValueKind) {
- return const_cast<std::vector<InstrProfValueSiteRecord> &>(
- const_cast<const InstrProfRecord *>(this)
- ->getValueSitesForKind(ValueKind));
+ getOrCreateValueSitesForKind(uint32_t ValueKind) {
+ if (!ValueData)
+ ValueData = llvm::make_unique<ValueProfData>();
+ switch (ValueKind) {
+ case IPVK_IndirectCallTarget:
+ return ValueData->IndirectCallSites;
+ case IPVK_MemOPSize:
+ return ValueData->MemOPSizes;
+ default:
+ llvm_unreachable("Unknown value kind!");
+ }
}
// Map indirect call target name hash to name string.
@@ -765,9 +802,9 @@ uint64_t InstrProfRecord::getValueForSite(InstrProfValueData Dest[],
}
void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) {
- std::vector<InstrProfValueSiteRecord> &ValueSites =
- getValueSitesForKind(ValueKind);
- ValueSites.reserve(NumValueSites);
+ if (!NumValueSites)
+ return;
+ getOrCreateValueSitesForKind(ValueKind).reserve(NumValueSites);
}
inline support::endianness getHostEndianness() {
diff --git a/include/llvm/Support/CMakeLists.txt b/include/llvm/Support/CMakeLists.txt
index c58ccf216303..95752cf01856 100644
--- a/include/llvm/Support/CMakeLists.txt
+++ b/include/llvm/Support/CMakeLists.txt
@@ -9,25 +9,27 @@ function(find_first_existing_file out_var)
endfunction()
macro(find_first_existing_vc_file out_var path)
- find_program(git_executable NAMES git git.exe git.cmd)
- # Run from a subdirectory to force git to print an absolute path.
- execute_process(COMMAND ${git_executable} rev-parse --git-dir
- WORKING_DIRECTORY ${path}/cmake
- RESULT_VARIABLE git_result
- OUTPUT_VARIABLE git_dir
- ERROR_QUIET)
- if(git_result EQUAL 0)
- string(STRIP "${git_dir}" git_dir)
- set(${out_var} "${git_dir}/logs/HEAD")
- # some branchless cases (e.g. 'repo') may not yet have .git/logs/HEAD
- if (NOT EXISTS "${git_dir}/logs/HEAD")
- file(WRITE "${git_dir}/logs/HEAD" "")
+ if ( LLVM_APPEND_VC_REV )
+ find_program(git_executable NAMES git git.exe git.cmd)
+ # Run from a subdirectory to force git to print an absolute path.
+ execute_process(COMMAND ${git_executable} rev-parse --git-dir
+ WORKING_DIRECTORY ${path}/cmake
+ RESULT_VARIABLE git_result
+ OUTPUT_VARIABLE git_dir
+ ERROR_QUIET)
+ if(git_result EQUAL 0)
+ string(STRIP "${git_dir}" git_dir)
+ set(${out_var} "${git_dir}/logs/HEAD")
+ # some branchless cases (e.g. 'repo') may not yet have .git/logs/HEAD
+ if (NOT EXISTS "${git_dir}/logs/HEAD")
+ file(WRITE "${git_dir}/logs/HEAD" "")
+ endif()
+ else()
+ find_first_existing_file(${out_var}
+ "${path}/.svn/wc.db" # SVN 1.7
+ "${path}/.svn/entries" # SVN 1.6
+ )
endif()
- else()
- find_first_existing_file(${out_var}
- "${path}/.svn/wc.db" # SVN 1.7
- "${path}/.svn/entries" # SVN 1.6
- )
endif()
endmacro()
diff --git a/include/llvm/Support/Errno.h b/include/llvm/Support/Errno.h
index 4ce65e7dc83c..35dc1ea7cf84 100644
--- a/include/llvm/Support/Errno.h
+++ b/include/llvm/Support/Errno.h
@@ -16,6 +16,7 @@
#include <cerrno>
#include <string>
+#include <type_traits>
namespace llvm {
namespace sys {
@@ -29,6 +30,16 @@ std::string StrError();
/// Like the no-argument version above, but uses \p errnum instead of errno.
std::string StrError(int errnum);
+template <typename FailT, typename Fun, typename... Args>
+inline auto RetryAfterSignal(const FailT &Fail, const Fun &F,
+ const Args &... As) -> decltype(F(As...)) {
+ decltype(F(As...)) Res;
+ do
+ Res = F(As...);
+ while (Res == Fail && errno == EINTR);
+ return Res;
+}
+
} // namespace sys
} // namespace llvm
diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h
index 601633d41cff..394a45387d8a 100644
--- a/include/llvm/Support/GenericDomTree.h
+++ b/include/llvm/Support/GenericDomTree.h
@@ -58,40 +58,6 @@ template <typename GT>
using DominatorTreeBaseByGraphTraits =
typename detail::DominatorTreeBaseTraits<GT>::type;
-/// \brief Base class that other, more interesting dominator analyses
-/// inherit from.
-template <class NodeT> class DominatorBase {
-protected:
- std::vector<NodeT *> Roots;
- bool IsPostDominators;
-
- explicit DominatorBase(bool isPostDom)
- : Roots(), IsPostDominators(isPostDom) {}
-
- DominatorBase(DominatorBase &&Arg)
- : Roots(std::move(Arg.Roots)), IsPostDominators(Arg.IsPostDominators) {
- Arg.Roots.clear();
- }
-
- DominatorBase &operator=(DominatorBase &&RHS) {
- Roots = std::move(RHS.Roots);
- IsPostDominators = RHS.IsPostDominators;
- RHS.Roots.clear();
- return *this;
- }
-
-public:
- /// getRoots - Return the root blocks of the current CFG. This may include
- /// multiple blocks if we are computing post dominators. For forward
- /// dominators, this will always be a single block (the entry node).
- ///
- const std::vector<NodeT *> &getRoots() const { return Roots; }
-
- /// isPostDominator - Returns true if analysis based of postdoms
- ///
- bool isPostDominator() const { return IsPostDominators; }
-};
-
/// \brief Base class for the actual dominator tree node.
template <class NodeT> class DomTreeNodeBase {
friend struct PostDominatorTree;
@@ -99,12 +65,14 @@ template <class NodeT> class DomTreeNodeBase {
NodeT *TheBB;
DomTreeNodeBase *IDom;
+ unsigned Level;
std::vector<DomTreeNodeBase *> Children;
mutable unsigned DFSNumIn = ~0;
mutable unsigned DFSNumOut = ~0;
public:
- DomTreeNodeBase(NodeT *BB, DomTreeNodeBase *iDom) : TheBB(BB), IDom(iDom) {}
+ DomTreeNodeBase(NodeT *BB, DomTreeNodeBase *iDom)
+ : TheBB(BB), IDom(iDom), Level(IDom ? IDom->Level + 1 : 0) {}
using iterator = typename std::vector<DomTreeNodeBase *>::iterator;
using const_iterator =
@@ -117,6 +85,7 @@ template <class NodeT> class DomTreeNodeBase {
NodeT *getBlock() const { return TheBB; }
DomTreeNodeBase *getIDom() const { return IDom; }
+ unsigned getLevel() const { return Level; }
const std::vector<DomTreeNodeBase *> &getChildren() const { return Children; }
@@ -134,6 +103,8 @@ template <class NodeT> class DomTreeNodeBase {
if (getNumChildren() != Other->getNumChildren())
return true;
+ if (Level != Other->Level) return true;
+
SmallPtrSet<const NodeT *, 4> OtherChildren;
for (const DomTreeNodeBase *I : *Other) {
const NodeT *Nd = I->getBlock();
@@ -150,18 +121,19 @@ template <class NodeT> class DomTreeNodeBase {
void setIDom(DomTreeNodeBase *NewIDom) {
assert(IDom && "No immediate dominator?");
- if (IDom != NewIDom) {
- typename std::vector<DomTreeNodeBase *>::iterator I =
- find(IDom->Children, this);
- assert(I != IDom->Children.end() &&
- "Not in immediate dominator children set!");
- // I am no longer your child...
- IDom->Children.erase(I);
+ if (IDom == NewIDom) return;
- // Switch to new dominator
- IDom = NewIDom;
- IDom->Children.push_back(this);
- }
+ auto I = find(IDom->Children, this);
+ assert(I != IDom->Children.end() &&
+ "Not in immediate dominator children set!");
+ // I am no longer your child...
+ IDom->Children.erase(I);
+
+ // Switch to new dominator
+ IDom = NewIDom;
+ IDom->Children.push_back(this);
+
+ UpdateLevel();
}
/// getDFSNumIn/getDFSNumOut - These return the DFS visitation order for nodes
@@ -177,6 +149,23 @@ private:
return this->DFSNumIn >= other->DFSNumIn &&
this->DFSNumOut <= other->DFSNumOut;
}
+
+ void UpdateLevel() {
+ assert(IDom);
+ if (Level == IDom->Level + 1) return;
+
+ SmallVector<DomTreeNodeBase *, 64> WorkStack = {this};
+
+ while (!WorkStack.empty()) {
+ DomTreeNodeBase *Current = WorkStack.pop_back_val();
+ Current->Level = Current->IDom->Level + 1;
+
+ for (DomTreeNodeBase *C : *Current) {
+ assert(C->IDom);
+ if (C->Level != C->IDom->Level + 1) WorkStack.push_back(C);
+ }
+ }
+ }
};
template <class NodeT>
@@ -186,9 +175,10 @@ raw_ostream &operator<<(raw_ostream &O, const DomTreeNodeBase<NodeT> *Node) {
else
O << " <<exit node>>";
- O << " {" << Node->getDFSNumIn() << "," << Node->getDFSNumOut() << "}";
+ O << " {" << Node->getDFSNumIn() << "," << Node->getDFSNumOut() << "} ["
+ << Node->getLevel() << "]\n";
- return O << "\n";
+ return O;
}
template <class NodeT>
@@ -201,40 +191,28 @@ void PrintDomTree(const DomTreeNodeBase<NodeT> *N, raw_ostream &O,
PrintDomTree<NodeT>(*I, O, Lev + 1);
}
+namespace DomTreeBuilder {
+template <class NodeT>
+struct SemiNCAInfo;
+
// The calculate routine is provided in a separate header but referenced here.
template <class FuncT, class N>
void Calculate(DominatorTreeBaseByGraphTraits<GraphTraits<N>> &DT, FuncT &F);
+// The verify function is provided in a separate header but referenced here.
+template <class N>
+bool Verify(const DominatorTreeBaseByGraphTraits<GraphTraits<N>> &DT);
+} // namespace DomTreeBuilder
+
/// \brief Core dominator tree base class.
///
/// This class is a generic template over graph nodes. It is instantiated for
/// various graphs in the LLVM IR or in the code generator.
-template <class NodeT> class DominatorTreeBase : public DominatorBase<NodeT> {
- bool dominatedBySlowTreeWalk(const DomTreeNodeBase<NodeT> *A,
- const DomTreeNodeBase<NodeT> *B) const {
- assert(A != B);
- assert(isReachableFromEntry(B));
- assert(isReachableFromEntry(A));
-
- const DomTreeNodeBase<NodeT> *IDom;
- while ((IDom = B->getIDom()) != nullptr && IDom != A && IDom != B)
- B = IDom; // Walk up the tree
- return IDom != nullptr;
- }
-
- /// \brief Wipe this tree's state without releasing any resources.
- ///
- /// This is essentially a post-move helper only. It leaves the object in an
- /// assignable and destroyable state, but otherwise invalid.
- void wipe() {
- DomTreeNodes.clear();
- IDoms.clear();
- Vertex.clear();
- Info.clear();
- RootNode = nullptr;
- }
+template <class NodeT> class DominatorTreeBase {
+ protected:
+ std::vector<NodeT *> Roots;
+ bool IsPostDominators;
-protected:
using DomTreeNodeMapType =
DenseMap<NodeT *, std::unique_ptr<DomTreeNodeBase<NodeT>>>;
DomTreeNodeMapType DomTreeNodes;
@@ -242,117 +220,30 @@ protected:
mutable bool DFSInfoValid = false;
mutable unsigned int SlowQueries = 0;
- // Information record used during immediate dominators computation.
- struct InfoRec {
- unsigned DFSNum = 0;
- unsigned Parent = 0;
- unsigned Semi = 0;
- NodeT *Label = nullptr;
-
- InfoRec() = default;
- };
- DenseMap<NodeT *, NodeT *> IDoms;
+ friend struct DomTreeBuilder::SemiNCAInfo<NodeT>;
+ using SNCAInfoTy = DomTreeBuilder::SemiNCAInfo<NodeT>;
- // Vertex - Map the DFS number to the NodeT*
- std::vector<NodeT *> Vertex;
-
- // Info - Collection of information used during the computation of idoms.
- DenseMap<NodeT *, InfoRec> Info;
-
- void reset() {
- DomTreeNodes.clear();
- IDoms.clear();
- this->Roots.clear();
- Vertex.clear();
- RootNode = nullptr;
- DFSInfoValid = false;
- SlowQueries = 0;
- }
-
- // NewBB is split and now it has one successor. Update dominator tree to
- // reflect this change.
- template <class N>
- void Split(typename GraphTraits<N>::NodeRef NewBB) {
- using GraphT = GraphTraits<N>;
- using NodeRef = typename GraphT::NodeRef;
- assert(std::distance(GraphT::child_begin(NewBB),
- GraphT::child_end(NewBB)) == 1 &&
- "NewBB should have a single successor!");
- NodeRef NewBBSucc = *GraphT::child_begin(NewBB);
-
- std::vector<NodeRef> PredBlocks;
- for (const auto &Pred : children<Inverse<N>>(NewBB))
- PredBlocks.push_back(Pred);
-
- assert(!PredBlocks.empty() && "No predblocks?");
-
- bool NewBBDominatesNewBBSucc = true;
- for (const auto &Pred : children<Inverse<N>>(NewBBSucc)) {
- if (Pred != NewBB && !dominates(NewBBSucc, Pred) &&
- isReachableFromEntry(Pred)) {
- NewBBDominatesNewBBSucc = false;
- break;
- }
- }
-
- // Find NewBB's immediate dominator and create new dominator tree node for
- // NewBB.
- NodeT *NewBBIDom = nullptr;
- unsigned i = 0;
- for (i = 0; i < PredBlocks.size(); ++i)
- if (isReachableFromEntry(PredBlocks[i])) {
- NewBBIDom = PredBlocks[i];
- break;
- }
-
- // It's possible that none of the predecessors of NewBB are reachable;
- // in that case, NewBB itself is unreachable, so nothing needs to be
- // changed.
- if (!NewBBIDom)
- return;
-
- for (i = i + 1; i < PredBlocks.size(); ++i) {
- if (isReachableFromEntry(PredBlocks[i]))
- NewBBIDom = findNearestCommonDominator(NewBBIDom, PredBlocks[i]);
- }
-
- // Create the new dominator tree node... and set the idom of NewBB.
- DomTreeNodeBase<NodeT> *NewBBNode = addNewBlock(NewBB, NewBBIDom);
-
- // If NewBB strictly dominates other blocks, then it is now the immediate
- // dominator of NewBBSucc. Update the dominator tree as appropriate.
- if (NewBBDominatesNewBBSucc) {
- DomTreeNodeBase<NodeT> *NewBBSuccNode = getNode(NewBBSucc);
- changeImmediateDominator(NewBBSuccNode, NewBBNode);
- }
- }
-
-public:
- explicit DominatorTreeBase(bool isPostDom)
- : DominatorBase<NodeT>(isPostDom) {}
+ public:
+ explicit DominatorTreeBase(bool isPostDom) : IsPostDominators(isPostDom) {}
DominatorTreeBase(DominatorTreeBase &&Arg)
- : DominatorBase<NodeT>(
- std::move(static_cast<DominatorBase<NodeT> &>(Arg))),
+ : Roots(std::move(Arg.Roots)),
+ IsPostDominators(Arg.IsPostDominators),
DomTreeNodes(std::move(Arg.DomTreeNodes)),
RootNode(std::move(Arg.RootNode)),
DFSInfoValid(std::move(Arg.DFSInfoValid)),
- SlowQueries(std::move(Arg.SlowQueries)), IDoms(std::move(Arg.IDoms)),
- Vertex(std::move(Arg.Vertex)), Info(std::move(Arg.Info)) {
+ SlowQueries(std::move(Arg.SlowQueries)) {
Arg.wipe();
}
DominatorTreeBase &operator=(DominatorTreeBase &&RHS) {
- DominatorBase<NodeT>::operator=(
- std::move(static_cast<DominatorBase<NodeT> &>(RHS)));
+ Roots = std::move(RHS.Roots);
+ IsPostDominators = RHS.IsPostDominators;
DomTreeNodes = std::move(RHS.DomTreeNodes);
RootNode = std::move(RHS.RootNode);
DFSInfoValid = std::move(RHS.DFSInfoValid);
SlowQueries = std::move(RHS.SlowQueries);
- IDoms = std::move(RHS.IDoms);
- Vertex = std::move(RHS.Vertex);
- Info = std::move(RHS.Info);
RHS.wipe();
return *this;
}
@@ -360,6 +251,16 @@ public:
DominatorTreeBase(const DominatorTreeBase &) = delete;
DominatorTreeBase &operator=(const DominatorTreeBase &) = delete;
+ /// getRoots - Return the root blocks of the current CFG. This may include
+ /// multiple blocks if we are computing post dominators. For forward
+ /// dominators, this will always be a single block (the entry node).
+ ///
+ const std::vector<NodeT *> &getRoots() const { return Roots; }
+
+ /// isPostDominator - Returns true if analysis based of postdoms
+ ///
+ bool isPostDominator() const { return IsPostDominators; }
+
/// compare - Return false if the other dominator tree base matches this
/// dominator tree base. Otherwise return true.
bool compare(const DominatorTreeBase &Other) const {
@@ -468,6 +369,13 @@ public:
if (!isReachableFromEntry(A))
return false;
+ if (B->getIDom() == A) return true;
+
+ if (A->getIDom() == B) return false;
+
+ // A can only dominate B if it is higher in the tree.
+ if (A->getLevel() >= B->getLevel()) return false;
+
// Compare the result of the tree walk and the dfs numbers, if expensive
// checks are enabled.
#ifdef EXPENSIVE_CHECKS
@@ -499,7 +407,7 @@ public:
/// findNearestCommonDominator - Find nearest common dominator basic block
/// for basic block A and B. If there is no such block then return NULL.
- NodeT *findNearestCommonDominator(NodeT *A, NodeT *B) {
+ NodeT *findNearestCommonDominator(NodeT *A, NodeT *B) const {
assert(A->getParent() == B->getParent() &&
"Two blocks are not in same function");
@@ -511,54 +419,24 @@ public:
return &Entry;
}
- // If B dominates A then B is nearest common dominator.
- if (dominates(B, A))
- return B;
-
- // If A dominates B then A is nearest common dominator.
- if (dominates(A, B))
- return A;
-
DomTreeNodeBase<NodeT> *NodeA = getNode(A);
DomTreeNodeBase<NodeT> *NodeB = getNode(B);
- // If we have DFS info, then we can avoid all allocations by just querying
- // it from each IDom. Note that because we call 'dominates' twice above, we
- // expect to call through this code at most 16 times in a row without
- // building valid DFS information. This is important as below is a *very*
- // slow tree walk.
- if (DFSInfoValid) {
- DomTreeNodeBase<NodeT> *IDomA = NodeA->getIDom();
- while (IDomA) {
- if (NodeB->DominatedBy(IDomA))
- return IDomA->getBlock();
- IDomA = IDomA->getIDom();
- }
- return nullptr;
- }
-
- // Collect NodeA dominators set.
- SmallPtrSet<DomTreeNodeBase<NodeT> *, 16> NodeADoms;
- NodeADoms.insert(NodeA);
- DomTreeNodeBase<NodeT> *IDomA = NodeA->getIDom();
- while (IDomA) {
- NodeADoms.insert(IDomA);
- IDomA = IDomA->getIDom();
- }
+ if (!NodeA || !NodeB) return nullptr;
- // Walk NodeB immediate dominators chain and find common dominator node.
- DomTreeNodeBase<NodeT> *IDomB = NodeB->getIDom();
- while (IDomB) {
- if (NodeADoms.count(IDomB) != 0)
- return IDomB->getBlock();
+ // Use level information to go up the tree until the levels match. Then
+ // continue going up til we arrive at the same node.
+ while (NodeA && NodeA != NodeB) {
+ if (NodeA->getLevel() < NodeB->getLevel()) std::swap(NodeA, NodeB);
- IDomB = IDomB->getIDom();
+ NodeA = NodeA->IDom;
}
- return nullptr;
+ return NodeA ? NodeA->getBlock() : nullptr;
}
- const NodeT *findNearestCommonDominator(const NodeT *A, const NodeT *B) {
+ const NodeT *findNearestCommonDominator(const NodeT *A,
+ const NodeT *B) const {
// Cast away the const qualifiers here. This is ok since
// const is re-introduced on the return type.
return findNearestCommonDominator(const_cast<NodeT *>(A),
@@ -597,7 +475,6 @@ public:
assert(!this->isPostDominator() &&
"Cannot change root of post-dominator tree");
DFSInfoValid = false;
- auto &Roots = DominatorBase<NodeT>::Roots;
DomTreeNodeBase<NodeT> *NewNode = (DomTreeNodes[BB] =
llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, nullptr)).get();
if (Roots.empty()) {
@@ -605,8 +482,10 @@ public:
} else {
assert(Roots.size() == 1);
NodeT *OldRoot = Roots.front();
- DomTreeNodes[OldRoot] =
- NewNode->addChild(std::move(DomTreeNodes[OldRoot]));
+ auto &OldNode = DomTreeNodes[OldRoot];
+ OldNode = NewNode->addChild(std::move(DomTreeNodes[OldRoot]));
+ OldNode->IDom = NewNode;
+ OldNode->UpdateLevel();
Roots[0] = BB;
}
return RootNode = NewNode;
@@ -673,45 +552,6 @@ public:
if (getRootNode()) PrintDomTree<NodeT>(getRootNode(), O, 1);
}
-protected:
- template <class GraphT>
- friend typename GraphT::NodeRef
- Eval(DominatorTreeBaseByGraphTraits<GraphT> &DT, typename GraphT::NodeRef V,
- unsigned LastLinked);
-
- template <class GraphT>
- friend unsigned ReverseDFSPass(DominatorTreeBaseByGraphTraits<GraphT> &DT,
- typename GraphT::NodeRef V, unsigned N);
-
- template <class GraphT>
- friend unsigned DFSPass(DominatorTreeBaseByGraphTraits<GraphT> &DT,
- typename GraphT::NodeRef V, unsigned N);
-
- template <class FuncT, class N>
- friend void Calculate(DominatorTreeBaseByGraphTraits<GraphTraits<N>> &DT,
- FuncT &F);
-
- DomTreeNodeBase<NodeT> *getNodeForBlock(NodeT *BB) {
- if (DomTreeNodeBase<NodeT> *Node = getNode(BB))
- return Node;
-
- // Haven't calculated this node yet? Get or calculate the node for the
- // immediate dominator.
- NodeT *IDom = getIDom(BB);
-
- assert(IDom || DomTreeNodes[nullptr]);
- DomTreeNodeBase<NodeT> *IDomNode = getNodeForBlock(IDom);
-
- // Add a new tree node for this NodeT, and link it as a child of
- // IDomNode
- return (DomTreeNodes[BB] = IDomNode->addChild(
- llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode))).get();
- }
-
- NodeT *getIDom(NodeT *BB) const { return IDoms.lookup(BB); }
-
- void addRoot(NodeT *BB) { this->Roots.push_back(BB); }
-
public:
/// updateDFSNumbers - Assign In and Out numbers to the nodes while walking
/// dominator tree in dfs order.
@@ -767,23 +607,119 @@ public:
template <class FT> void recalculate(FT &F) {
using TraitsTy = GraphTraits<FT *>;
reset();
- Vertex.push_back(nullptr);
if (!this->IsPostDominators) {
// Initialize root
NodeT *entry = TraitsTy::getEntryNode(&F);
addRoot(entry);
- Calculate<FT, NodeT *>(*this, F);
+ DomTreeBuilder::Calculate<FT, NodeT *>(*this, F);
} else {
// Initialize the roots list
for (auto *Node : nodes(&F))
if (TraitsTy::child_begin(Node) == TraitsTy::child_end(Node))
addRoot(Node);
- Calculate<FT, Inverse<NodeT *>>(*this, F);
+ DomTreeBuilder::Calculate<FT, Inverse<NodeT *>>(*this, F);
}
}
+
+ /// verify - check parent and sibling property
+ bool verify() const {
+ return this->isPostDominator()
+ ? DomTreeBuilder::Verify<Inverse<NodeT *>>(*this)
+ : DomTreeBuilder::Verify<NodeT *>(*this);
+ }
+
+ protected:
+ void addRoot(NodeT *BB) { this->Roots.push_back(BB); }
+
+ void reset() {
+ DomTreeNodes.clear();
+ this->Roots.clear();
+ RootNode = nullptr;
+ DFSInfoValid = false;
+ SlowQueries = 0;
+ }
+
+ // NewBB is split and now it has one successor. Update dominator tree to
+ // reflect this change.
+ template <class N>
+ void Split(typename GraphTraits<N>::NodeRef NewBB) {
+ using GraphT = GraphTraits<N>;
+ using NodeRef = typename GraphT::NodeRef;
+ assert(std::distance(GraphT::child_begin(NewBB),
+ GraphT::child_end(NewBB)) == 1 &&
+ "NewBB should have a single successor!");
+ NodeRef NewBBSucc = *GraphT::child_begin(NewBB);
+
+ std::vector<NodeRef> PredBlocks;
+ for (const auto &Pred : children<Inverse<N>>(NewBB))
+ PredBlocks.push_back(Pred);
+
+ assert(!PredBlocks.empty() && "No predblocks?");
+
+ bool NewBBDominatesNewBBSucc = true;
+ for (const auto &Pred : children<Inverse<N>>(NewBBSucc)) {
+ if (Pred != NewBB && !dominates(NewBBSucc, Pred) &&
+ isReachableFromEntry(Pred)) {
+ NewBBDominatesNewBBSucc = false;
+ break;
+ }
+ }
+
+ // Find NewBB's immediate dominator and create new dominator tree node for
+ // NewBB.
+ NodeT *NewBBIDom = nullptr;
+ unsigned i = 0;
+ for (i = 0; i < PredBlocks.size(); ++i)
+ if (isReachableFromEntry(PredBlocks[i])) {
+ NewBBIDom = PredBlocks[i];
+ break;
+ }
+
+ // It's possible that none of the predecessors of NewBB are reachable;
+ // in that case, NewBB itself is unreachable, so nothing needs to be
+ // changed.
+ if (!NewBBIDom) return;
+
+ for (i = i + 1; i < PredBlocks.size(); ++i) {
+ if (isReachableFromEntry(PredBlocks[i]))
+ NewBBIDom = findNearestCommonDominator(NewBBIDom, PredBlocks[i]);
+ }
+
+ // Create the new dominator tree node... and set the idom of NewBB.
+ DomTreeNodeBase<NodeT> *NewBBNode = addNewBlock(NewBB, NewBBIDom);
+
+ // If NewBB strictly dominates other blocks, then it is now the immediate
+ // dominator of NewBBSucc. Update the dominator tree as appropriate.
+ if (NewBBDominatesNewBBSucc) {
+ DomTreeNodeBase<NodeT> *NewBBSuccNode = getNode(NewBBSucc);
+ changeImmediateDominator(NewBBSuccNode, NewBBNode);
+ }
+ }
+
+ private:
+ bool dominatedBySlowTreeWalk(const DomTreeNodeBase<NodeT> *A,
+ const DomTreeNodeBase<NodeT> *B) const {
+ assert(A != B);
+ assert(isReachableFromEntry(B));
+ assert(isReachableFromEntry(A));
+
+ const DomTreeNodeBase<NodeT> *IDom;
+ while ((IDom = B->getIDom()) != nullptr && IDom != A && IDom != B)
+ B = IDom; // Walk up the tree
+ return IDom != nullptr;
+ }
+
+ /// \brief Wipe this tree's state without releasing any resources.
+ ///
+ /// This is essentially a post-move helper only. It leaves the object in an
+ /// assignable and destroyable state, but otherwise invalid.
+ void wipe() {
+ DomTreeNodes.clear();
+ RootNode = nullptr;
+ }
};
// These two functions are declared out of line as a workaround for building
diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h
index 449c385bc86a..9edf03aa3621 100644
--- a/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/include/llvm/Support/GenericDomTreeConstruction.h
@@ -10,10 +10,11 @@
///
/// Generic dominator tree construction - This file provides routines to
/// construct immediate dominator information for a flow-graph based on the
-/// algorithm described in this document:
+/// Semi-NCA algorithm described in this dissertation:
///
-/// A Fast Algorithm for Finding Dominators in a Flowgraph
-/// T. Lengauer & R. Tarjan, ACM TOPLAS July 1979, pgs 121-141.
+/// Linear-Time Algorithms for Dominators and Related Problems
+/// Loukas Georgiadis, Princeton University, November 2005, pp. 21-23:
+/// ftp://ftp.cs.princeton.edu/reports/2005/737.pdf
///
/// This implements the O(n*log(n)) versions of EVAL and LINK, because it turns
/// out that the theoretically slower O(n*log(n)) implementation is actually
@@ -29,256 +30,496 @@
#include "llvm/Support/GenericDomTree.h"
namespace llvm {
+namespace DomTreeBuilder {
+
+// Information record used by Semi-NCA during tree construction.
+template <typename NodeT>
+struct SemiNCAInfo {
+ using NodePtr = NodeT *;
+ using DomTreeT = DominatorTreeBase<NodeT>;
+ using TreeNodePtr = DomTreeNodeBase<NodeT> *;
+
+ struct InfoRec {
+ unsigned DFSNum = 0;
+ unsigned Parent = 0;
+ unsigned Semi = 0;
+ NodePtr Label = nullptr;
+ NodePtr IDom = nullptr;
+ };
+
+ std::vector<NodePtr> NumToNode;
+ DenseMap<NodePtr, InfoRec> NodeToInfo;
+
+ void clear() {
+ NumToNode.clear();
+ NodeToInfo.clear();
+ }
+
+ NodePtr getIDom(NodePtr BB) const {
+ auto InfoIt = NodeToInfo.find(BB);
+ if (InfoIt == NodeToInfo.end()) return nullptr;
-// External storage for depth first iterator that reuses the info lookup map
-// domtree already has. We don't have a set, but a map instead, so we are
-// converting the one argument insert calls.
-template <class NodeRef, class InfoType> struct df_iterator_dom_storage {
-public:
- using BaseSet = DenseMap<NodeRef, InfoType>;
- df_iterator_dom_storage(BaseSet &Storage) : Storage(Storage) {}
-
- using iterator = typename BaseSet::iterator;
- std::pair<iterator, bool> insert(NodeRef N) {
- return Storage.insert({N, InfoType()});
+ return InfoIt->second.IDom;
}
- void completed(NodeRef) {}
-private:
- BaseSet &Storage;
-};
+ TreeNodePtr getNodeForBlock(NodePtr BB, DomTreeT &DT) {
+ if (TreeNodePtr Node = DT.getNode(BB)) return Node;
+
+ // Haven't calculated this node yet? Get or calculate the node for the
+ // immediate dominator.
+ NodePtr IDom = getIDom(BB);
+
+ assert(IDom || DT.DomTreeNodes[nullptr]);
+ TreeNodePtr IDomNode = getNodeForBlock(IDom, DT);
-template <class GraphT>
-unsigned ReverseDFSPass(DominatorTreeBaseByGraphTraits<GraphT> &DT,
- typename GraphT::NodeRef V, unsigned N) {
- df_iterator_dom_storage<
- typename GraphT::NodeRef,
- typename DominatorTreeBaseByGraphTraits<GraphT>::InfoRec>
- DFStorage(DT.Info);
- bool IsChildOfArtificialExit = (N != 0);
- for (auto I = idf_ext_begin(V, DFStorage), E = idf_ext_end(V, DFStorage);
- I != E; ++I) {
- typename GraphT::NodeRef BB = *I;
- auto &BBInfo = DT.Info[BB];
- BBInfo.DFSNum = BBInfo.Semi = ++N;
- BBInfo.Label = BB;
- // Set the parent to the top of the visited stack. The stack includes us,
- // and is 1 based, so we subtract to account for both of these.
- if (I.getPathLength() > 1)
- BBInfo.Parent = DT.Info[I.getPath(I.getPathLength() - 2)].DFSNum;
- DT.Vertex.push_back(BB); // Vertex[n] = V;
-
- if (IsChildOfArtificialExit)
- BBInfo.Parent = 1;
-
- IsChildOfArtificialExit = false;
+ // Add a new tree node for this NodeT, and link it as a child of
+ // IDomNode
+ return (DT.DomTreeNodes[BB] = IDomNode->addChild(
+ llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode)))
+ .get();
}
- return N;
-}
-template <class GraphT>
-unsigned DFSPass(DominatorTreeBaseByGraphTraits<GraphT> &DT,
- typename GraphT::NodeRef V, unsigned N) {
- df_iterator_dom_storage<
- typename GraphT::NodeRef,
- typename DominatorTreeBaseByGraphTraits<GraphT>::InfoRec>
- DFStorage(DT.Info);
- for (auto I = df_ext_begin(V, DFStorage), E = df_ext_end(V, DFStorage);
- I != E; ++I) {
- typename GraphT::NodeRef BB = *I;
- auto &BBInfo = DT.Info[BB];
- BBInfo.DFSNum = BBInfo.Semi = ++N;
- BBInfo.Label = BB;
- // Set the parent to the top of the visited stack. The stack includes us,
- // and is 1 based, so we subtract to account for both of these.
- if (I.getPathLength() > 1)
- BBInfo.Parent = DT.Info[I.getPath(I.getPathLength() - 2)].DFSNum;
- DT.Vertex.push_back(BB); // Vertex[n] = V;
+
+ // External storage for depth first iterator that reuses the info lookup map
+ // SemiNCAInfo already has. We don't have a set, but a map instead, so we are
+ // converting the one argument insert calls.
+ struct df_iterator_dom_storage {
+ public:
+ using BaseSet = decltype(NodeToInfo);
+ df_iterator_dom_storage(BaseSet &Storage) : Storage(Storage) {}
+
+ using iterator = typename BaseSet::iterator;
+ std::pair<iterator, bool> insert(NodePtr N) {
+ return Storage.insert({N, InfoRec()});
+ }
+ void completed(NodePtr) {}
+
+ private:
+ BaseSet &Storage;
+ };
+
+ df_iterator_dom_storage getStorage() { return {NodeToInfo}; }
+
+ unsigned runReverseDFS(NodePtr V, unsigned N) {
+ auto DFStorage = getStorage();
+
+ bool IsChildOfArtificialExit = (N != 0);
+ for (auto I = idf_ext_begin(V, DFStorage), E = idf_ext_end(V, DFStorage);
+ I != E; ++I) {
+ NodePtr BB = *I;
+ auto &BBInfo = NodeToInfo[BB];
+ BBInfo.DFSNum = BBInfo.Semi = ++N;
+ BBInfo.Label = BB;
+ // Set the parent to the top of the visited stack. The stack includes us,
+ // and is 1 based, so we subtract to account for both of these.
+ if (I.getPathLength() > 1)
+ BBInfo.Parent = NodeToInfo[I.getPath(I.getPathLength() - 2)].DFSNum;
+ NumToNode.push_back(BB); // NumToNode[n] = V;
+
+ if (IsChildOfArtificialExit)
+ BBInfo.Parent = 1;
+
+ IsChildOfArtificialExit = false;
+ }
+ return N;
}
- return N;
-}
-template <class GraphT>
-typename GraphT::NodeRef Eval(DominatorTreeBaseByGraphTraits<GraphT> &DT,
- typename GraphT::NodeRef VIn,
- unsigned LastLinked) {
- using NodePtr = typename GraphT::NodeRef;
+ unsigned runForwardDFS(NodePtr V, unsigned N) {
+ auto DFStorage = getStorage();
+
+ for (auto I = df_ext_begin(V, DFStorage), E = df_ext_end(V, DFStorage);
+ I != E; ++I) {
+ NodePtr BB = *I;
+ auto &BBInfo = NodeToInfo[BB];
+ BBInfo.DFSNum = BBInfo.Semi = ++N;
+ BBInfo.Label = BB;
+ // Set the parent to the top of the visited stack. The stack includes us,
+ // and is 1 based, so we subtract to account for both of these.
+ if (I.getPathLength() > 1)
+ BBInfo.Parent = NodeToInfo[I.getPath(I.getPathLength() - 2)].DFSNum;
+ NumToNode.push_back(BB); // NumToNode[n] = V;
+ }
+ return N;
+ }
- auto &VInInfo = DT.Info[VIn];
- if (VInInfo.DFSNum < LastLinked)
- return VIn;
+ NodePtr eval(NodePtr VIn, unsigned LastLinked) {
+ auto &VInInfo = NodeToInfo[VIn];
+ if (VInInfo.DFSNum < LastLinked)
+ return VIn;
- SmallVector<NodePtr, 32> Work;
- SmallPtrSet<NodePtr, 32> Visited;
+ SmallVector<NodePtr, 32> Work;
+ SmallPtrSet<NodePtr, 32> Visited;
- if (VInInfo.Parent >= LastLinked)
- Work.push_back(VIn);
+ if (VInInfo.Parent >= LastLinked)
+ Work.push_back(VIn);
- while (!Work.empty()) {
- NodePtr V = Work.back();
- auto &VInfo = DT.Info[V];
- NodePtr VAncestor = DT.Vertex[VInfo.Parent];
+ while (!Work.empty()) {
+ NodePtr V = Work.back();
+ auto &VInfo = NodeToInfo[V];
+ NodePtr VAncestor = NumToNode[VInfo.Parent];
- // Process Ancestor first
- if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) {
- Work.push_back(VAncestor);
- continue;
+ // Process Ancestor first
+ if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) {
+ Work.push_back(VAncestor);
+ continue;
+ }
+ Work.pop_back();
+
+ // Update VInfo based on Ancestor info
+ if (VInfo.Parent < LastLinked)
+ continue;
+
+ auto &VAInfo = NodeToInfo[VAncestor];
+ NodePtr VAncestorLabel = VAInfo.Label;
+ NodePtr VLabel = VInfo.Label;
+ if (NodeToInfo[VAncestorLabel].Semi < NodeToInfo[VLabel].Semi)
+ VInfo.Label = VAncestorLabel;
+ VInfo.Parent = VAInfo.Parent;
}
- Work.pop_back();
-
- // Update VInfo based on Ancestor info
- if (VInfo.Parent < LastLinked)
- continue;
-
- auto &VAInfo = DT.Info[VAncestor];
- NodePtr VAncestorLabel = VAInfo.Label;
- NodePtr VLabel = VInfo.Label;
- if (DT.Info[VAncestorLabel].Semi < DT.Info[VLabel].Semi)
- VInfo.Label = VAncestorLabel;
- VInfo.Parent = VAInfo.Parent;
+
+ return VInInfo.Label;
}
- return VInInfo.Label;
-}
+ template <typename NodeType>
+ void runSemiNCA(DomTreeT &DT, unsigned NumBlocks) {
+ unsigned N = 0;
+ NumToNode.push_back(nullptr);
-template <class FuncT, class NodeT>
-void Calculate(DominatorTreeBaseByGraphTraits<GraphTraits<NodeT>> &DT,
- FuncT &F) {
- using GraphT = GraphTraits<NodeT>;
- using NodePtr = typename GraphT::NodeRef;
- static_assert(std::is_pointer<NodePtr>::value,
- "NodeRef should be pointer type");
- using NodeType = typename std::remove_pointer<NodePtr>::type;
+ bool MultipleRoots = (DT.Roots.size() > 1);
+ if (MultipleRoots) {
+ auto &BBInfo = NodeToInfo[nullptr];
+ BBInfo.DFSNum = BBInfo.Semi = ++N;
+ BBInfo.Label = nullptr;
- unsigned N = 0;
- bool MultipleRoots = (DT.Roots.size() > 1);
- if (MultipleRoots) {
- auto &BBInfo = DT.Info[nullptr];
- BBInfo.DFSNum = BBInfo.Semi = ++N;
- BBInfo.Label = nullptr;
+ NumToNode.push_back(nullptr); // NumToNode[n] = V;
+ }
- DT.Vertex.push_back(nullptr); // Vertex[n] = V;
- }
+ // Step #1: Number blocks in depth-first order and initialize variables used
+ // in later stages of the algorithm.
+ if (DT.isPostDominator()){
+ for (unsigned i = 0, e = static_cast<unsigned>(DT.Roots.size());
+ i != e; ++i)
+ N = runReverseDFS(DT.Roots[i], N);
+ } else {
+ N = runForwardDFS(DT.Roots[0], N);
+ }
- // Step #1: Number blocks in depth-first order and initialize variables used
- // in later stages of the algorithm.
- if (DT.isPostDominator()){
- for (unsigned i = 0, e = static_cast<unsigned>(DT.Roots.size());
- i != e; ++i)
- N = ReverseDFSPass<GraphT>(DT, DT.Roots[i], N);
- } else {
- N = DFSPass<GraphT>(DT, DT.Roots[0], N);
- }
+ // It might be that some blocks did not get a DFS number (e.g., blocks of
+ // infinite loops). In these cases an artificial exit node is required.
+ MultipleRoots |= (DT.isPostDominator() && N != NumBlocks);
- // it might be that some blocks did not get a DFS number (e.g., blocks of
- // infinite loops). In these cases an artificial exit node is required.
- MultipleRoots |= (DT.isPostDominator() && N != GraphTraits<FuncT*>::size(&F));
+ // Initialize IDoms to spanning tree parents.
+ for (unsigned i = 1; i <= N; ++i) {
+ const NodePtr V = NumToNode[i];
+ auto &VInfo = NodeToInfo[V];
+ VInfo.IDom = NumToNode[VInfo.Parent];
+ }
- // When naively implemented, the Lengauer-Tarjan algorithm requires a separate
- // bucket for each vertex. However, this is unnecessary, because each vertex
- // is only placed into a single bucket (that of its semidominator), and each
- // vertex's bucket is processed before it is added to any bucket itself.
- //
- // Instead of using a bucket per vertex, we use a single array Buckets that
- // has two purposes. Before the vertex V with preorder number i is processed,
- // Buckets[i] stores the index of the first element in V's bucket. After V's
- // bucket is processed, Buckets[i] stores the index of the next element in the
- // bucket containing V, if any.
- SmallVector<unsigned, 32> Buckets;
- Buckets.resize(N + 1);
- for (unsigned i = 1; i <= N; ++i)
- Buckets[i] = i;
-
- for (unsigned i = N; i >= 2; --i) {
- NodePtr W = DT.Vertex[i];
- auto &WInfo = DT.Info[W];
-
- // Step #2: Implicitly define the immediate dominator of vertices
- for (unsigned j = i; Buckets[j] != i; j = Buckets[j]) {
- NodePtr V = DT.Vertex[Buckets[j]];
- NodePtr U = Eval<GraphT>(DT, V, i + 1);
- DT.IDoms[V] = DT.Info[U].Semi < i ? U : W;
+ // Step #2: Calculate the semidominators of all vertices.
+ for (unsigned i = N; i >= 2; --i) {
+ NodePtr W = NumToNode[i];
+ auto &WInfo = NodeToInfo[W];
+
+ // Initialize the semi dominator to point to the parent node.
+ WInfo.Semi = WInfo.Parent;
+ for (const auto &N : inverse_children<NodeType>(W))
+ if (NodeToInfo.count(N)) { // Only if this predecessor is reachable!
+ unsigned SemiU = NodeToInfo[eval(N, i + 1)].Semi;
+ if (SemiU < WInfo.Semi)
+ WInfo.Semi = SemiU;
+ }
}
- // Step #3: Calculate the semidominators of all vertices
+ // Step #3: Explicitly define the immediate dominator of each vertex.
+ // IDom[i] = NCA(SDom[i], SpanningTreeParent(i)).
+ // Note that the parents were stored in IDoms and later got invalidated
+ // during path compression in Eval.
+ for (unsigned i = 2; i <= N; ++i) {
+ const NodePtr W = NumToNode[i];
+ auto &WInfo = NodeToInfo[W];
+ const unsigned SDomNum = NodeToInfo[NumToNode[WInfo.Semi]].DFSNum;
+ NodePtr WIDomCandidate = WInfo.IDom;
+ while (NodeToInfo[WIDomCandidate].DFSNum > SDomNum)
+ WIDomCandidate = NodeToInfo[WIDomCandidate].IDom;
+
+ WInfo.IDom = WIDomCandidate;
+ }
- // initialize the semi dominator to point to the parent node
- WInfo.Semi = WInfo.Parent;
- for (const auto &N : inverse_children<NodeT>(W))
- if (DT.Info.count(N)) { // Only if this predecessor is reachable!
- unsigned SemiU = DT.Info[Eval<GraphT>(DT, N, i + 1)].Semi;
- if (SemiU < WInfo.Semi)
- WInfo.Semi = SemiU;
- }
+ if (DT.Roots.empty()) return;
- // If V is a non-root vertex and sdom(V) = parent(V), then idom(V) is
- // necessarily parent(V). In this case, set idom(V) here and avoid placing
- // V into a bucket.
- if (WInfo.Semi == WInfo.Parent) {
- DT.IDoms[W] = DT.Vertex[WInfo.Parent];
- } else {
- Buckets[i] = Buckets[WInfo.Semi];
- Buckets[WInfo.Semi] = i;
+ // Add a node for the root. This node might be the actual root, if there is
+ // one exit block, or it may be the virtual exit (denoted by
+ // (BasicBlock *)0) which postdominates all real exits if there are multiple
+ // exit blocks, or an infinite loop.
+ NodePtr Root = !MultipleRoots ? DT.Roots[0] : nullptr;
+
+ DT.RootNode =
+ (DT.DomTreeNodes[Root] =
+ llvm::make_unique<DomTreeNodeBase<NodeT>>(Root, nullptr))
+ .get();
+
+ // Loop over all of the reachable blocks in the function...
+ for (unsigned i = 2; i <= N; ++i) {
+ NodePtr W = NumToNode[i];
+
+ // Don't replace this with 'count', the insertion side effect is important
+ if (DT.DomTreeNodes[W])
+ continue; // Haven't calculated this node yet?
+
+ NodePtr ImmDom = getIDom(W);
+
+ assert(ImmDom || DT.DomTreeNodes[nullptr]);
+
+ // Get or calculate the node for the immediate dominator
+ TreeNodePtr IDomNode = getNodeForBlock(ImmDom, DT);
+
+ // Add a new tree node for this BasicBlock, and link it as a child of
+ // IDomNode
+ DT.DomTreeNodes[W] = IDomNode->addChild(
+ llvm::make_unique<DomTreeNodeBase<NodeT>>(W, IDomNode));
}
}
- if (N >= 1) {
- NodePtr Root = DT.Vertex[1];
- for (unsigned j = 1; Buckets[j] != 1; j = Buckets[j]) {
- NodePtr V = DT.Vertex[Buckets[j]];
- DT.IDoms[V] = Root;
- }
+ void doFullDFSWalk(const DomTreeT &DT) {
+ NumToNode.push_back(nullptr);
+ unsigned Num = 0;
+ for (auto *Root : DT.Roots)
+ if (!DT.isPostDominator())
+ Num = runForwardDFS(Root, Num);
+ else
+ Num = runReverseDFS(Root, Num);
}
- // Step #4: Explicitly define the immediate dominator of each vertex
- for (unsigned i = 2; i <= N; ++i) {
- NodePtr W = DT.Vertex[i];
- NodePtr &WIDom = DT.IDoms[W];
- if (WIDom != DT.Vertex[DT.Info[W].Semi])
- WIDom = DT.IDoms[WIDom];
+ static void PrintBlockOrNullptr(raw_ostream &O, NodePtr Obj) {
+ if (!Obj)
+ O << "nullptr";
+ else
+ Obj->printAsOperand(O, false);
}
- if (DT.Roots.empty()) return;
+ // Checks if the tree contains all reachable nodes in the input graph.
+ bool verifyReachability(const DomTreeT &DT) {
+ clear();
+ doFullDFSWalk(DT);
- // Add a node for the root. This node might be the actual root, if there is
- // one exit block, or it may be the virtual exit (denoted by (BasicBlock *)0)
- // which postdominates all real exits if there are multiple exit blocks, or
- // an infinite loop.
- NodePtr Root = !MultipleRoots ? DT.Roots[0] : nullptr;
+ for (auto &NodeToTN : DT.DomTreeNodes) {
+ const TreeNodePtr TN = NodeToTN.second.get();
+ const NodePtr BB = TN->getBlock();
+ if (!BB) continue;
- DT.RootNode =
- (DT.DomTreeNodes[Root] =
- llvm::make_unique<DomTreeNodeBase<NodeType>>(Root, nullptr))
- .get();
+ if (NodeToInfo.count(BB) == 0) {
+ errs() << "DomTree node ";
+ PrintBlockOrNullptr(errs(), BB);
+ errs() << " not found by DFS walk!\n";
+ errs().flush();
- // Loop over all of the reachable blocks in the function...
- for (unsigned i = 2; i <= N; ++i) {
- NodePtr W = DT.Vertex[i];
+ return false;
+ }
+ }
- // Don't replace this with 'count', the insertion side effect is important
- if (DT.DomTreeNodes[W])
- continue; // Haven't calculated this node yet?
+ return true;
+ }
- NodePtr ImmDom = DT.getIDom(W);
+ // Check if for every parent with a level L in the tree all of its children
+ // have level L + 1.
+ static bool VerifyLevels(const DomTreeT &DT) {
+ for (auto &NodeToTN : DT.DomTreeNodes) {
+ const TreeNodePtr TN = NodeToTN.second.get();
+ const NodePtr BB = TN->getBlock();
+ if (!BB) continue;
+
+ const TreeNodePtr IDom = TN->getIDom();
+ if (!IDom && TN->getLevel() != 0) {
+ errs() << "Node without an IDom ";
+ PrintBlockOrNullptr(errs(), BB);
+ errs() << " has a nonzero level " << TN->getLevel() << "!\n";
+ errs().flush();
+
+ return false;
+ }
- assert(ImmDom || DT.DomTreeNodes[nullptr]);
+ if (IDom && TN->getLevel() != IDom->getLevel() + 1) {
+ errs() << "Node ";
+ PrintBlockOrNullptr(errs(), BB);
+ errs() << " has level " << TN->getLevel() << " while it's IDom ";
+ PrintBlockOrNullptr(errs(), IDom->getBlock());
+ errs() << " has level " << IDom->getLevel() << "!\n";
+ errs().flush();
- // Get or calculate the node for the immediate dominator
- DomTreeNodeBase<NodeType> *IDomNode = DT.getNodeForBlock(ImmDom);
+ return false;
+ }
+ }
- // Add a new tree node for this BasicBlock, and link it as a child of
- // IDomNode
- DT.DomTreeNodes[W] = IDomNode->addChild(
- llvm::make_unique<DomTreeNodeBase<NodeType>>(W, IDomNode));
+ return true;
+ }
+
+ // Checks if for every edge From -> To in the graph
+ // NCD(From, To) == IDom(To) or To.
+ bool verifyNCD(const DomTreeT &DT) {
+ clear();
+ doFullDFSWalk(DT);
+
+ for (auto &BlockToInfo : NodeToInfo) {
+ auto &Info = BlockToInfo.second;
+
+ const NodePtr From = NumToNode[Info.Parent];
+ if (!From) continue;
+
+ const NodePtr To = BlockToInfo.first;
+ const TreeNodePtr ToTN = DT.getNode(To);
+ assert(ToTN);
+
+ const NodePtr NCD = DT.findNearestCommonDominator(From, To);
+ const TreeNodePtr NCDTN = NCD ? DT.getNode(NCD) : nullptr;
+ const TreeNodePtr ToIDom = ToTN->getIDom();
+ if (NCDTN != ToTN && NCDTN != ToIDom) {
+ errs() << "NearestCommonDominator verification failed:\n\tNCD(From:";
+ PrintBlockOrNullptr(errs(), From);
+ errs() << ", To:";
+ PrintBlockOrNullptr(errs(), To);
+ errs() << ") = ";
+ PrintBlockOrNullptr(errs(), NCD);
+ errs() << ",\t (should be To or IDom[To]: ";
+ PrintBlockOrNullptr(errs(), ToIDom ? ToIDom->getBlock() : nullptr);
+ errs() << ")\n";
+ errs().flush();
+
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ // The below routines verify the correctness of the dominator tree relative to
+ // the CFG it's coming from. A tree is a dominator tree iff it has two
+ // properties, called the parent property and the sibling property. Tarjan
+ // and Lengauer prove (but don't explicitly name) the properties as part of
+ // the proofs in their 1972 paper, but the proofs are mostly part of proving
+ // things about semidominators and idoms, and some of them are simply asserted
+ // based on even earlier papers (see, e.g., lemma 2). Some papers refer to
+ // these properties as "valid" and "co-valid". See, e.g., "Dominators,
+ // directed bipolar orders, and independent spanning trees" by Loukas
+ // Georgiadis and Robert E. Tarjan, as well as "Dominator Tree Verification
+ // and Vertex-Disjoint Paths " by the same authors.
+
+ // A very simple and direct explanation of these properties can be found in
+ // "An Experimental Study of Dynamic Dominators", found at
+ // https://arxiv.org/abs/1604.02711
+
+ // The easiest way to think of the parent property is that it's a requirement
+ // of being a dominator. Let's just take immediate dominators. For PARENT to
+ // be an immediate dominator of CHILD, all paths in the CFG must go through
+ // PARENT before they hit CHILD. This implies that if you were to cut PARENT
+ // out of the CFG, there should be no paths to CHILD that are reachable. If
+ // there are, then you now have a path from PARENT to CHILD that goes around
+ // PARENT and still reaches CHILD, which by definition, means PARENT can't be
+ // a dominator of CHILD (let alone an immediate one).
+
+ // The sibling property is similar. It says that for each pair of sibling
+ // nodes in the dominator tree (LEFT and RIGHT) , they must not dominate each
+ // other. If sibling LEFT dominated sibling RIGHT, it means there are no
+ // paths in the CFG from sibling LEFT to sibling RIGHT that do not go through
+ // LEFT, and thus, LEFT is really an ancestor (in the dominator tree) of
+ // RIGHT, not a sibling.
+
+ // It is possible to verify the parent and sibling properties in
+ // linear time, but the algorithms are complex. Instead, we do it in a
+ // straightforward N^2 and N^3 way below, using direct path reachability.
+
+
+ // Checks if the tree has the parent property: if for all edges from V to W in
+ // the input graph, such that V is reachable, the parent of W in the tree is
+ // an ancestor of V in the tree.
+ //
+ // This means that if a node gets disconnected from the graph, then all of
+ // the nodes it dominated previously will now become unreachable.
+ bool verifyParentProperty(const DomTreeT &DT) {
+ for (auto &NodeToTN : DT.DomTreeNodes) {
+ const TreeNodePtr TN = NodeToTN.second.get();
+ const NodePtr BB = TN->getBlock();
+ if (!BB || TN->getChildren().empty()) continue;
+
+ clear();
+ NodeToInfo.insert({BB, {}});
+ doFullDFSWalk(DT);
+
+ for (TreeNodePtr Child : TN->getChildren())
+ if (NodeToInfo.count(Child->getBlock()) != 0) {
+ errs() << "Child ";
+ PrintBlockOrNullptr(errs(), Child->getBlock());
+ errs() << " reachable after its parent ";
+ PrintBlockOrNullptr(errs(), BB);
+ errs() << " is removed!\n";
+ errs().flush();
+
+ return false;
+ }
+ }
+
+ return true;
}
- // Free temporary memory used to construct idom's
- DT.IDoms.clear();
- DT.Info.clear();
- DT.Vertex.clear();
- DT.Vertex.shrink_to_fit();
+ // Check if the tree has sibling property: if a node V does not dominate a
+ // node W for all siblings V and W in the tree.
+ //
+ // This means that if a node gets disconnected from the graph, then all of its
+ // siblings will now still be reachable.
+ bool verifySiblingProperty(const DomTreeT &DT) {
+ for (auto &NodeToTN : DT.DomTreeNodes) {
+ const TreeNodePtr TN = NodeToTN.second.get();
+ const NodePtr BB = TN->getBlock();
+ if (!BB || TN->getChildren().empty()) continue;
+
+ const auto &Siblings = TN->getChildren();
+ for (const TreeNodePtr N : Siblings) {
+ clear();
+ NodeToInfo.insert({N->getBlock(), {}});
+ doFullDFSWalk(DT);
+
+ for (const TreeNodePtr S : Siblings) {
+ if (S == N) continue;
+
+ if (NodeToInfo.count(S->getBlock()) == 0) {
+ errs() << "Node ";
+ PrintBlockOrNullptr(errs(), S->getBlock());
+ errs() << " not reachable when its sibling ";
+ PrintBlockOrNullptr(errs(), N->getBlock());
+ errs() << " is removed!\n";
+ errs().flush();
+
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+ }
+};
- DT.updateDFSNumbers();
+template <class FuncT, class NodeT>
+void Calculate(DominatorTreeBaseByGraphTraits<GraphTraits<NodeT>> &DT,
+ FuncT &F) {
+ using NodePtr = typename GraphTraits<NodeT>::NodeRef;
+ static_assert(std::is_pointer<NodePtr>::value,
+ "NodePtr should be a pointer type");
+ SemiNCAInfo<typename std::remove_pointer<NodePtr>::type> SNCA;
+ SNCA.template runSemiNCA<NodeT>(DT, GraphTraits<FuncT *>::size(&F));
}
+
+template <class NodeT>
+bool Verify(const DominatorTreeBaseByGraphTraits<GraphTraits<NodeT>> &DT) {
+ using NodePtr = typename GraphTraits<NodeT>::NodeRef;
+ static_assert(std::is_pointer<NodePtr>::value,
+ "NodePtr should be a pointer type");
+ SemiNCAInfo<typename std::remove_pointer<NodePtr>::type> SNCA;
+
+ return SNCA.verifyReachability(DT) && SNCA.VerifyLevels(DT) &&
+ SNCA.verifyNCD(DT) && SNCA.verifyParentProperty(DT) &&
+ SNCA.verifySiblingProperty(DT);
}
+} // namespace DomTreeBuilder
+} // namespace llvm
+
#endif
diff --git a/include/llvm/Support/TargetParser.h b/include/llvm/Support/TargetParser.h
index f29cc40ffdd5..72c28865ac57 100644
--- a/include/llvm/Support/TargetParser.h
+++ b/include/llvm/Support/TargetParser.h
@@ -17,6 +17,7 @@
// FIXME: vector is used because that's what clang uses for subtarget feature
// lists, but SmallVector would probably be better
+#include "llvm/ADT/Triple.h"
#include <vector>
namespace llvm {
@@ -140,6 +141,8 @@ unsigned parseArchEndian(StringRef Arch);
unsigned parseArchProfile(StringRef Arch);
unsigned parseArchVersion(StringRef Arch);
+StringRef computeDefaultTargetABI(const Triple &TT, StringRef CPU);
+
} // namespace ARM
// FIXME:This should be made into class design,to avoid dupplication.
diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h
index c196dd6c1ddc..549da3ccad51 100644
--- a/include/llvm/Support/YAMLParser.h
+++ b/include/llvm/Support/YAMLParser.h
@@ -188,7 +188,7 @@ public:
NullNode(std::unique_ptr<Document> &D)
: Node(NK_Null, D, StringRef(), StringRef()) {}
- static inline bool classof(const Node *N) { return N->getType() == NK_Null; }
+ static bool classof(const Node *N) { return N->getType() == NK_Null; }
};
/// \brief A scalar node is an opaque datum that can be presented as a
@@ -220,7 +220,7 @@ public:
/// This happens with escaped characters and multi-line literals.
StringRef getValue(SmallVectorImpl<char> &Storage) const;
- static inline bool classof(const Node *N) {
+ static bool classof(const Node *N) {
return N->getType() == NK_Scalar;
}
@@ -254,7 +254,7 @@ public:
/// \brief Gets the value of this node as a StringRef.
StringRef getValue() const { return Value; }
- static inline bool classof(const Node *N) {
+ static bool classof(const Node *N) {
return N->getType() == NK_BlockScalar;
}
@@ -296,7 +296,7 @@ public:
Val->skip();
}
- static inline bool classof(const Node *N) {
+ static bool classof(const Node *N) {
return N->getType() == NK_KeyValue;
}
@@ -419,7 +419,7 @@ public:
void skip() override { yaml::skip(*this); }
- static inline bool classof(const Node *N) {
+ static bool classof(const Node *N) {
return N->getType() == NK_Mapping;
}
@@ -476,7 +476,7 @@ public:
void skip() override { yaml::skip(*this); }
- static inline bool classof(const Node *N) {
+ static bool classof(const Node *N) {
return N->getType() == NK_Sequence;
}
@@ -502,7 +502,7 @@ public:
StringRef getName() const { return Name; }
Node *getTarget();
- static inline bool classof(const Node *N) { return N->getType() == NK_Alias; }
+ static bool classof(const Node *N) { return N->getType() == NK_Alias; }
private:
StringRef Name;
diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h
index 53618a56f853..15b3b11db045 100644
--- a/include/llvm/Support/YAMLTraits.h
+++ b/include/llvm/Support/YAMLTraits.h
@@ -180,17 +180,17 @@ struct BlockScalarTraits {
/// to/from a YAML sequence. For example:
///
/// template<>
-/// struct SequenceTraits< std::vector<MyType>> {
-/// static size_t size(IO &io, std::vector<MyType> &seq) {
+/// struct SequenceTraits<MyContainer> {
+/// static size_t size(IO &io, MyContainer &seq) {
/// return seq.size();
/// }
-/// static MyType& element(IO &, std::vector<MyType> &seq, size_t index) {
+/// static MyType& element(IO &, MyContainer &seq, size_t index) {
/// if ( index >= seq.size() )
/// seq.resize(index+1);
/// return seq[index];
/// }
/// };
-template<typename T>
+template<typename T, typename EnableIf = void>
struct SequenceTraits {
// Must provide:
// static size_t size(IO &io, T &seq);
@@ -201,6 +201,14 @@ struct SequenceTraits {
// static const bool flow = true;
};
+/// This class should be specialized by any type for which vectors of that
+/// type need to be converted to/from a YAML sequence.
+template<typename T, typename EnableIf = void>
+struct SequenceElementTraits {
+ // Must provide:
+ // static const bool flow;
+};
+
/// This class should be specialized by any type that needs to be converted
/// to/from a list of YAML documents.
template<typename T>
@@ -1148,7 +1156,7 @@ private:
HNode(Node *n) : _node(n) { }
virtual ~HNode() = default;
- static inline bool classof(const HNode *) { return true; }
+ static bool classof(const HNode *) { return true; }
Node *_node;
};
@@ -1159,11 +1167,9 @@ private:
public:
EmptyHNode(Node *n) : HNode(n) { }
- static inline bool classof(const HNode *n) {
- return NullNode::classof(n->_node);
- }
+ static bool classof(const HNode *n) { return NullNode::classof(n->_node); }
- static inline bool classof(const EmptyHNode *) { return true; }
+ static bool classof(const EmptyHNode *) { return true; }
};
class ScalarHNode : public HNode {
@@ -1174,12 +1180,12 @@ private:
StringRef value() const { return _value; }
- static inline bool classof(const HNode *n) {
+ static bool classof(const HNode *n) {
return ScalarNode::classof(n->_node) ||
BlockScalarNode::classof(n->_node);
}
- static inline bool classof(const ScalarHNode *) { return true; }
+ static bool classof(const ScalarHNode *) { return true; }
protected:
StringRef _value;
@@ -1191,11 +1197,11 @@ private:
public:
MapHNode(Node *n) : HNode(n) { }
- static inline bool classof(const HNode *n) {
+ static bool classof(const HNode *n) {
return MappingNode::classof(n->_node);
}
- static inline bool classof(const MapHNode *) { return true; }
+ static bool classof(const MapHNode *) { return true; }
using NameToNode = StringMap<std::unique_ptr<HNode>>;
@@ -1209,11 +1215,11 @@ private:
public:
SequenceHNode(Node *n) : HNode(n) { }
- static inline bool classof(const HNode *n) {
+ static bool classof(const HNode *n) {
return SequenceNode::classof(n->_node);
}
- static inline bool classof(const SequenceHNode *) { return true; }
+ static bool classof(const SequenceHNode *) { return true; }
std::vector<std::unique_ptr<HNode>> Entries;
};
@@ -1544,18 +1550,59 @@ operator<<(Output &yout, T &seq) {
return yout;
}
-template <typename T> struct SequenceTraitsImpl {
- using _type = typename T::value_type;
+template <bool B> struct IsFlowSequenceBase {};
+template <> struct IsFlowSequenceBase<true> { static const bool flow = true; };
+
+template <typename T, bool Flow>
+struct SequenceTraitsImpl : IsFlowSequenceBase<Flow> {
+private:
+ using type = typename T::value_type;
+public:
static size_t size(IO &io, T &seq) { return seq.size(); }
- static _type &element(IO &io, T &seq, size_t index) {
+ static type &element(IO &io, T &seq, size_t index) {
if (index >= seq.size())
seq.resize(index + 1);
return seq[index];
}
};
+// Simple helper to check an expression can be used as a bool-valued template
+// argument.
+template <bool> struct CheckIsBool { static const bool value = true; };
+
+// If T has SequenceElementTraits, then vector<T> and SmallVector<T, N> have
+// SequenceTraits that do the obvious thing.
+template <typename T>
+struct SequenceTraits<std::vector<T>,
+ typename std::enable_if<CheckIsBool<
+ SequenceElementTraits<T>::flow>::value>::type>
+ : SequenceTraitsImpl<std::vector<T>, SequenceElementTraits<T>::flow> {};
+template <typename T, unsigned N>
+struct SequenceTraits<SmallVector<T, N>,
+ typename std::enable_if<CheckIsBool<
+ SequenceElementTraits<T>::flow>::value>::type>
+ : SequenceTraitsImpl<SmallVector<T, N>, SequenceElementTraits<T>::flow> {};
+
+// Sequences of fundamental types use flow formatting.
+template <typename T>
+struct SequenceElementTraits<
+ T, typename std::enable_if<std::is_fundamental<T>::value>::type> {
+ static const bool flow = true;
+};
+
+// Sequences of strings use block formatting.
+template<> struct SequenceElementTraits<std::string> {
+ static const bool flow = false;
+};
+template<> struct SequenceElementTraits<StringRef> {
+ static const bool flow = false;
+};
+template<> struct SequenceElementTraits<std::pair<std::string, std::string>> {
+ static const bool flow = false;
+};
+
/// Implementation of CustomMappingTraits for std::map<std::string, T>.
template <typename T> struct StdMapStringCustomMappingTraitsImpl {
using map_type = std::map<std::string, T>;
@@ -1573,42 +1620,29 @@ template <typename T> struct StdMapStringCustomMappingTraitsImpl {
} // end namespace yaml
} // end namespace llvm
-/// Utility for declaring that a std::vector of a particular type
-/// should be considered a YAML sequence.
-#define LLVM_YAML_IS_SEQUENCE_VECTOR(_type) \
+#define LLVM_YAML_IS_SEQUENCE_VECTOR_IMPL(TYPE, FLOW) \
namespace llvm { \
namespace yaml { \
- template <> \
- struct SequenceTraits<std::vector<_type>> \
- : public SequenceTraitsImpl<std::vector<_type>> {}; \
- template <unsigned N> \
- struct SequenceTraits<SmallVector<_type, N>> \
- : public SequenceTraitsImpl<SmallVector<_type, N>> {}; \
+ static_assert( \
+ !std::is_fundamental<TYPE>::value && \
+ !std::is_same<TYPE, std::string>::value && \
+ !std::is_same<TYPE, llvm::StringRef>::value, \
+ "only use LLVM_YAML_IS_SEQUENCE_VECTOR for types you control"); \
+ template <> struct SequenceElementTraits<TYPE> { \
+ static const bool flow = FLOW; \
+ }; \
} \
}
/// Utility for declaring that a std::vector of a particular type
+/// should be considered a YAML sequence.
+#define LLVM_YAML_IS_SEQUENCE_VECTOR(type) \
+ LLVM_YAML_IS_SEQUENCE_VECTOR_IMPL(type, false)
+
+/// Utility for declaring that a std::vector of a particular type
/// should be considered a YAML flow sequence.
-/// We need to do a partial specialization on the vector version, not a full.
-/// If this is a full specialization, the compiler is a bit too "smart" and
-/// decides to warn on -Wunused-const-variable. This workaround can be
-/// removed and we can do a full specialization on std::vector<T> once
-/// PR28878 is fixed.
-#define LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(_type) \
- namespace llvm { \
- namespace yaml { \
- template <unsigned N> \
- struct SequenceTraits<SmallVector<_type, N>> \
- : public SequenceTraitsImpl<SmallVector<_type, N>> { \
- static const bool flow = true; \
- }; \
- template <typename Allocator> \
- struct SequenceTraits<std::vector<_type, Allocator>> \
- : public SequenceTraitsImpl<std::vector<_type, Allocator>> { \
- static const bool flow = true; \
- }; \
- } \
- }
+#define LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(type) \
+ LLVM_YAML_IS_SEQUENCE_VECTOR_IMPL(type, true)
#define LLVM_YAML_DECLARE_MAPPING_TRAITS(Type) \
namespace llvm { \
@@ -1655,10 +1689,10 @@ template <typename T> struct StdMapStringCustomMappingTraitsImpl {
namespace yaml { \
template <unsigned N> \
struct DocumentListTraits<SmallVector<_type, N>> \
- : public SequenceTraitsImpl<SmallVector<_type, N>> {}; \
+ : public SequenceTraitsImpl<SmallVector<_type, N>, false> {}; \
template <> \
struct DocumentListTraits<std::vector<_type>> \
- : public SequenceTraitsImpl<std::vector<_type>> {}; \
+ : public SequenceTraitsImpl<std::vector<_type>, false> {}; \
} \
}
diff --git a/include/llvm/Target/GenericOpcodes.td b/include/llvm/Target/GenericOpcodes.td
index 9593d8bd7edb..e35bcb015d6a 100644
--- a/include/llvm/Target/GenericOpcodes.td
+++ b/include/llvm/Target/GenericOpcodes.td
@@ -49,6 +49,12 @@ def G_TRUNC : Instruction {
let hasSideEffects = 0;
}
+def G_IMPLICIT_DEF : Instruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins);
+ let hasSideEffects = 0;
+}
+
def G_FRAME_INDEX : Instruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$src2);
@@ -416,6 +422,34 @@ def G_FPOW : Instruction {
let hasSideEffects = 0;
}
+// Floating point base-e exponential of a value.
+def G_FEXP : Instruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
+// Floating point base-2 exponential of a value.
+def G_FEXP2 : Instruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
+// Floating point base-2 logarithm of a value.
+def G_FLOG : Instruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
+// Floating point base-2 logarithm of a value.
+def G_FLOG2 : Instruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
//------------------------------------------------------------------------------
// Memory ops
//------------------------------------------------------------------------------
diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index a06c67fe814c..3a3118139bcb 100644
--- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -62,6 +62,8 @@ def : GINodeEquiv<G_FMUL, fmul>;
def : GINodeEquiv<G_FDIV, fdiv>;
def : GINodeEquiv<G_FREM, frem>;
def : GINodeEquiv<G_FPOW, fpow>;
+def : GINodeEquiv<G_FEXP2, fexp2>;
+def : GINodeEquiv<G_FLOG2, flog2>;
def : GINodeEquiv<G_BR, br>;
// Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern.
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 22dab2e82828..964d6314b127 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -2051,7 +2051,7 @@ public:
/// this information should not be provided because it will generate more
/// loads.
virtual bool hasPairedLoad(EVT /*LoadedType*/,
- unsigned & /*RequiredAligment*/) const {
+ unsigned & /*RequiredAlignment*/) const {
return false;
}
@@ -2722,7 +2722,7 @@ public:
// This transformation may not be desirable if it disrupts a particularly
// auspicious target-specific tree (e.g. bitfield extraction in AArch64).
// By default, it returns true.
- virtual bool isDesirableToCommuteWithShift(const SDNode *N /*Op*/) const {
+ virtual bool isDesirableToCommuteWithShift(const SDNode *N) const {
return true;
}
diff --git a/include/llvm/Target/TargetOpcodes.def b/include/llvm/Target/TargetOpcodes.def
index 836b11cf89c6..cadf86058f0c 100644
--- a/include/llvm/Target/TargetOpcodes.def
+++ b/include/llvm/Target/TargetOpcodes.def
@@ -222,6 +222,8 @@ HANDLE_TARGET_OPCODE(G_OR)
HANDLE_TARGET_OPCODE(G_XOR)
+HANDLE_TARGET_OPCODE(G_IMPLICIT_DEF)
+
/// Generic instruction to materialize the address of an alloca or other
/// stack-based object.
HANDLE_TARGET_OPCODE(G_FRAME_INDEX)
@@ -369,6 +371,18 @@ HANDLE_TARGET_OPCODE(G_FREM)
/// Generic FP exponentiation.
HANDLE_TARGET_OPCODE(G_FPOW)
+/// Generic base-e exponential of a value.
+HANDLE_TARGET_OPCODE(G_FEXP)
+
+/// Generic base-2 exponential of a value.
+HANDLE_TARGET_OPCODE(G_FEXP2)
+
+/// Floating point base-e logarithm of a value.
+HANDLE_TARGET_OPCODE(G_FLOG)
+
+/// Floating point base-2 logarithm of a value.
+HANDLE_TARGET_OPCODE(G_FLOG2)
+
/// Generic FP negation.
HANDLE_TARGET_OPCODE(G_FNEG)
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index db4bfb15f51d..276306f686ff 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -145,7 +145,6 @@ public:
bool DisableTailCalls;
bool DisableUnitAtATime;
bool DisableUnrollLoops;
- bool BBVectorize;
bool SLPVectorize;
bool LoopVectorize;
bool RerollLoops;
diff --git a/include/llvm/Transforms/SampleProfile.h b/include/llvm/Transforms/SampleProfile.h
index 93fa9532cc3a..c984fe74ba93 100644
--- a/include/llvm/Transforms/SampleProfile.h
+++ b/include/llvm/Transforms/SampleProfile.h
@@ -21,6 +21,10 @@ namespace llvm {
class SampleProfileLoaderPass : public PassInfoMixin<SampleProfileLoaderPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ SampleProfileLoaderPass(std::string File = "") : ProfileFileName(File) {}
+
+private:
+ std::string ProfileFileName;
};
} // End llvm namespace
diff --git a/include/llvm/Transforms/Scalar/ConstantHoisting.h b/include/llvm/Transforms/Scalar/ConstantHoisting.h
index edc91add7a73..a2a9afc083a0 100644
--- a/include/llvm/Transforms/Scalar/ConstantHoisting.h
+++ b/include/llvm/Transforms/Scalar/ConstantHoisting.h
@@ -132,6 +132,8 @@ private:
Instruction *Inst, unsigned Idx,
ConstantInt *ConstInt);
void collectConstantCandidates(ConstCandMapType &ConstCandMap,
+ Instruction *Inst, unsigned Idx);
+ void collectConstantCandidates(ConstCandMapType &ConstCandMap,
Instruction *Inst);
void collectConstantCandidates(Function &Fn);
void findAndMakeBaseConstant(ConstCandVecType::iterator S,
diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h
index 0397eb95e763..1344285917ba 100644
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@@ -184,9 +184,14 @@ public:
/// Returns true if Phi is a first-order recurrence. A first-order recurrence
/// is a non-reduction recurrence relation in which the value of the
/// recurrence in the current loop iteration equals a value defined in the
- /// previous iteration.
- static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
- DominatorTree *DT);
+ /// previous iteration. \p SinkAfter includes pairs of instructions where the
+ /// first will be rescheduled to appear after the second if/when the loop is
+ /// vectorized. It may be augmented with additional pairs if needed in order
+ /// to handle Phi as a first-order recurrence.
+ static bool
+ isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ DenseMap<Instruction *, Instruction *> &SinkAfter,
+ DominatorTree *DT);
RecurrenceKind getRecurrenceKind() { return Kind; }
diff --git a/include/llvm/Transforms/Utils/OrderedInstructions.h b/include/llvm/Transforms/Utils/OrderedInstructions.h
index 64c6bcb68b18..165d4bdaa6d4 100644
--- a/include/llvm/Transforms/Utils/OrderedInstructions.h
+++ b/include/llvm/Transforms/Utils/OrderedInstructions.h
@@ -46,7 +46,7 @@ public:
/// i.e. If an instruction is deleted or added to the basic block, the user
/// should call this function to invalidate the OrderedBasicBlock cache for
/// this basic block.
- void invalidateBlock(BasicBlock *BB) { OBBMap.erase(BB); }
+ void invalidateBlock(const BasicBlock *BB) { OBBMap.erase(BB); }
};
} // end namespace llvm
diff --git a/include/llvm/Transforms/Utils/PredicateInfo.h b/include/llvm/Transforms/Utils/PredicateInfo.h
index 1322c686eb90..8150f1528397 100644
--- a/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -74,6 +74,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Utils/OrderedInstructions.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -89,7 +90,6 @@ class Instruction;
class MemoryAccess;
class LLVMContext;
class raw_ostream;
-class OrderedBasicBlock;
enum PredicateType { PT_Branch, PT_Assume, PT_Switch };
@@ -114,8 +114,9 @@ protected:
class PredicateWithCondition : public PredicateBase {
public:
Value *Condition;
- static inline bool classof(const PredicateBase *PB) {
- return PB->Type == PT_Assume || PB->Type == PT_Branch || PB->Type == PT_Switch;
+ static bool classof(const PredicateBase *PB) {
+ return PB->Type == PT_Assume || PB->Type == PT_Branch ||
+ PB->Type == PT_Switch;
}
protected:
@@ -133,7 +134,7 @@ public:
: PredicateWithCondition(PT_Assume, Op, Condition),
AssumeInst(AssumeInst) {}
PredicateAssume() = delete;
- static inline bool classof(const PredicateBase *PB) {
+ static bool classof(const PredicateBase *PB) {
return PB->Type == PT_Assume;
}
};
@@ -146,7 +147,7 @@ public:
BasicBlock *From;
BasicBlock *To;
PredicateWithEdge() = delete;
- static inline bool classof(const PredicateBase *PB) {
+ static bool classof(const PredicateBase *PB) {
return PB->Type == PT_Branch || PB->Type == PT_Switch;
}
@@ -166,7 +167,7 @@ public:
: PredicateWithEdge(PT_Branch, Op, BranchBB, SplitBB, Condition),
TrueEdge(TakenEdge) {}
PredicateBranch() = delete;
- static inline bool classof(const PredicateBase *PB) {
+ static bool classof(const PredicateBase *PB) {
return PB->Type == PT_Branch;
}
};
@@ -182,7 +183,7 @@ public:
SI->getCondition()),
CaseValue(CaseValue), Switch(SI) {}
PredicateSwitch() = delete;
- static inline bool classof(const PredicateBase *PB) {
+ static bool classof(const PredicateBase *PB) {
return PB->Type == PT_Switch;
}
};
@@ -244,6 +245,7 @@ private:
Function &F;
DominatorTree &DT;
AssumptionCache &AC;
+ OrderedInstructions OI;
// This maps from copy operands to Predicate Info. Note that it does not own
// the Predicate Info, they belong to the ValueInfo structs in the ValueInfos
// vector.
@@ -256,8 +258,6 @@ private:
// 0 is not a valid Value Info index, you can use DenseMap::lookup and tell
// whether it returned a valid result.
DenseMap<Value *, unsigned int> ValueInfoNums;
- // OrderedBasicBlocks used during sorting uses
- DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>> OBBMap;
// The set of edges along which we can only handle phi uses, due to critical
// edges.
DenseSet<std::pair<BasicBlock *, BasicBlock *>> EdgeUsesOnly;
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index 0cc6b34d4593..45ef8246dcd1 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -116,7 +116,7 @@ static inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
/// - \a scheduleMapGlobalAliasee()
/// - \a scheduleRemapFunction()
///
-/// Sometimes a callback needs a diferent mapping context. Such a context can
+/// Sometimes a callback needs a different mapping context. Such a context can
/// be registered using \a registerAlternateMappingContext(), which takes an
/// alternate \a ValueToValueMapTy and \a ValueMaterializer and returns a ID to
/// pass into the schedule*() functions.
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index f734e299c6e9..19845e471e48 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -108,13 +108,6 @@ struct VectorizeConfig {
//===----------------------------------------------------------------------===//
//
-// BBVectorize - A basic-block vectorization pass.
-//
-BasicBlockPass *
-createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
-
-//===----------------------------------------------------------------------===//
-//
// LoopVectorize - Create a loop vectorization pass.
//
Pass *createLoopVectorizePass(bool NoUnrolling = false,
diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp
index ddd5123d0eff..0de7ad98af46 100644
--- a/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -68,17 +68,6 @@ CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS)
: AAResultBase(std::move(RHS)), TLI(RHS.TLI) {}
CFLAndersAAResult::~CFLAndersAAResult() {}
-static const Function *parentFunctionOfValue(const Value *Val) {
- if (auto *Inst = dyn_cast<Instruction>(Val)) {
- auto *Bb = Inst->getParent();
- return Bb->getParent();
- }
-
- if (auto *Arg = dyn_cast<Argument>(Val))
- return Arg->getParent();
- return nullptr;
-}
-
namespace {
enum class MatchState : uint8_t {
@@ -789,10 +778,10 @@ void CFLAndersAAResult::scan(const Function &Fn) {
// resize and invalidating the reference returned by operator[]
auto FunInfo = buildInfoFrom(Fn);
Cache[&Fn] = std::move(FunInfo);
- Handles.push_front(FunctionHandle(const_cast<Function *>(&Fn), this));
+ Handles.emplace_front(const_cast<Function *>(&Fn), this);
}
-void CFLAndersAAResult::evict(const Function &Fn) { Cache.erase(&Fn); }
+void CFLAndersAAResult::evict(const Function *Fn) { Cache.erase(Fn); }
const Optional<CFLAndersAAResult::FunctionInfo> &
CFLAndersAAResult::ensureCached(const Function &Fn) {
diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp
index 6e4263920e58..adbdd82012a3 100644
--- a/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -88,19 +88,6 @@ const StratifiedIndex StratifiedLink::SetSentinel =
//===----------------------------------------------------------------------===//
/// Determines whether it would be pointless to add the given Value to our sets.
-static bool canSkipAddingToSets(Value *Val);
-
-static Optional<Function *> parentFunctionOfValue(Value *Val) {
- if (auto *Inst = dyn_cast<Instruction>(Val)) {
- auto *Bb = Inst->getParent();
- return Bb->getParent();
- }
-
- if (auto *Arg = dyn_cast<Argument>(Val))
- return Arg->getParent();
- return None;
-}
-
static bool canSkipAddingToSets(Value *Val) {
// Constants can share instances, which may falsely unify multiple
// sets, e.g. in
@@ -245,7 +232,7 @@ void CFLSteensAAResult::scan(Function *Fn) {
auto FunInfo = buildSetsFrom(Fn);
Cache[Fn] = std::move(FunInfo);
- Handles.push_front(FunctionHandle(Fn, this));
+ Handles.emplace_front(Fn, this);
}
void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
@@ -281,9 +268,9 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
return NoAlias;
Function *Fn = nullptr;
- auto MaybeFnA = parentFunctionOfValue(ValA);
- auto MaybeFnB = parentFunctionOfValue(ValB);
- if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
+ Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA));
+ Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB));
+ if (!MaybeFnA && !MaybeFnB) {
// The only times this is known to happen are when globals + InlineAsm are
// involved
DEBUG(dbgs()
@@ -291,12 +278,12 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
return MayAlias;
}
- if (MaybeFnA.hasValue()) {
- Fn = *MaybeFnA;
- assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) &&
+ if (MaybeFnA) {
+ Fn = MaybeFnA;
+ assert((!MaybeFnB || MaybeFnB == MaybeFnA) &&
"Interprocedural queries not supported");
} else {
- Fn = *MaybeFnB;
+ Fn = MaybeFnB;
}
assert(Fn != nullptr);
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 77ad6f1e166f..35693666aa03 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -66,6 +66,12 @@ static cl::opt<int>
cl::ZeroOrMore,
cl::desc("Threshold for hot callsites "));
+static cl::opt<int> ColdCallSiteRelFreq(
+ "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
+ cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
+ "entry frequency, for a callsite to be cold in the absence of "
+ "profile information."));
+
namespace {
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@@ -172,6 +178,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Return true if size growth is allowed when inlining the callee at CS.
bool allowSizeGrowth(CallSite CS);
+ /// Return true if \p CS is a cold callsite.
+ bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
+
// Custom analysis routines.
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
@@ -631,6 +640,26 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
return true;
}
+bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
+ // If global profile summary is available, then callsite's coldness is
+ // determined based on that.
+ if (PSI->hasProfileSummary())
+ return PSI->isColdCallSite(CS, CallerBFI);
+ if (!CallerBFI)
+ return false;
+
+ // In the absence of global profile summary, determine if the callsite is cold
+ // relative to caller's entry. We could potentially cache the computation of
+ // scaled entry frequency, but the added complexity is not worth it unless
+ // this scaling shows up high in the profiles.
+ const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
+ auto CallSiteBB = CS.getInstruction()->getParent();
+ auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
+ auto CallerEntryFreq =
+ CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
+ return CallSiteFreq < CallerEntryFreq * ColdProb;
+}
+
void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// If no size growth is allowed for this inlining, set Threshold to 0.
if (!allowSizeGrowth(CS)) {
@@ -676,7 +705,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
if (PSI->isHotCallSite(CS, CallerBFI)) {
DEBUG(dbgs() << "Hot callsite.\n");
Threshold = Params.HotCallSiteThreshold.getValue();
- } else if (PSI->isColdCallSite(CS, CallerBFI)) {
+ } else if (isColdCallSite(CS, CallerBFI)) {
DEBUG(dbgs() << "Cold callsite.\n");
Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
}
@@ -1010,7 +1039,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
if (isa<ConstantInt>(V))
return true;
- // Assume the most general case where the swith is lowered into
+ // Assume the most general case where the switch is lowered into
// either a jump table, bit test, or a balanced binary tree consisting of
// case clusters without merging adjacent clusters with the same
// destination. We do not consider the switches that are lowered with a mix
diff --git a/lib/Analysis/IteratedDominanceFrontier.cpp b/lib/Analysis/IteratedDominanceFrontier.cpp
index 2a736ec0379c..0e02850df349 100644
--- a/lib/Analysis/IteratedDominanceFrontier.cpp
+++ b/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -20,14 +20,6 @@ namespace llvm {
template <class NodeTy>
void IDFCalculator<NodeTy>::calculate(
SmallVectorImpl<BasicBlock *> &PHIBlocks) {
- // If we haven't computed dominator tree levels, do so now.
- if (DomLevels.empty()) {
- for (auto DFI = df_begin(DT.getRootNode()), DFE = df_end(DT.getRootNode());
- DFI != DFE; ++DFI) {
- DomLevels[*DFI] = DFI.getPathLength() - 1;
- }
- }
-
// Use a priority queue keyed on dominator tree level so that inserted nodes
// are handled from the bottom of the dominator tree upwards.
typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
@@ -37,7 +29,7 @@ void IDFCalculator<NodeTy>::calculate(
for (BasicBlock *BB : *DefBlocks) {
if (DomTreeNode *Node = DT.getNode(BB))
- PQ.push(std::make_pair(Node, DomLevels.lookup(Node)));
+ PQ.push({Node, Node->getLevel()});
}
SmallVector<DomTreeNode *, 32> Worklist;
@@ -72,7 +64,7 @@ void IDFCalculator<NodeTy>::calculate(
if (SuccNode->getIDom() == Node)
continue;
- unsigned SuccLevel = DomLevels.lookup(SuccNode);
+ const unsigned SuccLevel = SuccNode->getLevel();
if (SuccLevel > RootLevel)
continue;
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 7983d62c2f7a..f88d54b21e1e 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -400,8 +400,8 @@ static APInt getSizeWithOverflow(const SizeOffsetType &Data) {
/// \brief Compute the size of the object pointed by Ptr. Returns true and the
/// object size in Size if successful, and false otherwise.
-/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
-/// byval arguments, and global variables.
+/// If RoundToAlign is true, then Size is rounded up to the alignment of
+/// allocas, byval arguments, and global variables.
bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
const TargetLibraryInfo *TLI, ObjectSizeOpts Opts) {
ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), Opts);
diff --git a/lib/Analysis/OptimizationDiagnosticInfo.cpp b/lib/Analysis/OptimizationDiagnosticInfo.cpp
index e38e530c052d..eb259fd7a384 100644
--- a/lib/Analysis/OptimizationDiagnosticInfo.cpp
+++ b/lib/Analysis/OptimizationDiagnosticInfo.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F)
: F(F), BFI(nullptr) {
- if (!F->getContext().getDiagnosticHotnessRequested())
+ if (!F->getContext().getDiagnosticsHotnessRequested())
return;
// First create a dominator tree.
@@ -155,6 +155,13 @@ void OptimizationRemarkEmitter::emit(
DiagnosticInfoOptimizationBase &OptDiagBase) {
auto &OptDiag = cast<DiagnosticInfoIROptimization>(OptDiagBase);
computeHotness(OptDiag);
+ // If a diagnostic has a hotness value, then only emit it if its hotness
+ // meets the threshold.
+ if (OptDiag.getHotness() &&
+ *OptDiag.getHotness() <
+ F->getContext().getDiagnosticsHotnessThreshold()) {
+ return;
+ }
yaml::Output *Out = F->getContext().getDiagnosticsOutputFile();
if (Out) {
@@ -176,7 +183,7 @@ OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass()
bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) {
BlockFrequencyInfo *BFI;
- if (Fn.getContext().getDiagnosticHotnessRequested())
+ if (Fn.getContext().getDiagnosticsHotnessRequested())
BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
else
BFI = nullptr;
@@ -198,7 +205,7 @@ OptimizationRemarkEmitterAnalysis::run(Function &F,
FunctionAnalysisManager &AM) {
BlockFrequencyInfo *BFI;
- if (F.getContext().getDiagnosticHotnessRequested())
+ if (F.getContext().getDiagnosticsHotnessRequested())
BFI = &AM.getResult<BlockFrequencyAnalysis>(F);
else
BFI = nullptr;
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 63ef8d28d44a..900487323005 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -10,28 +10,29 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionInfo.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/RegionInfoImpl.h"
-#include "llvm/Analysis/RegionIterator.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#ifndef NDEBUG
#include "llvm/Analysis/RegionPrinter.h"
#endif
+#include "llvm/Analysis/RegionInfoImpl.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "region"
namespace llvm {
+
template class RegionBase<RegionTraits<Function>>;
template class RegionNodeBase<RegionTraits<Function>>;
template class RegionInfoBase<RegionTraits<Function>>;
-}
+
+} // end namespace llvm
STATISTIC(numRegions, "The # of regions");
STATISTIC(numSimpleRegions, "The # of simple regions");
@@ -44,7 +45,6 @@ VerifyRegionInfoX(
cl::location(RegionInfoBase<RegionTraits<Function>>::VerifyRegionInfo),
cl::desc("Verify region info (time consuming)"));
-
static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style",
cl::location(RegionInfo::printStyle),
cl::Hidden,
@@ -56,7 +56,6 @@ static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style",
clEnumValN(Region::PrintRN, "rn",
"print regions in detail with element_iterator")));
-
//===----------------------------------------------------------------------===//
// Region implementation
//
@@ -68,20 +67,15 @@ Region::Region(BasicBlock *Entry, BasicBlock *Exit,
}
-Region::~Region() { }
+Region::~Region() = default;
//===----------------------------------------------------------------------===//
// RegionInfo implementation
//
-RegionInfo::RegionInfo() :
- RegionInfoBase<RegionTraits<Function>>() {
-
-}
+RegionInfo::RegionInfo() = default;
-RegionInfo::~RegionInfo() {
-
-}
+RegionInfo::~RegionInfo() = default;
bool RegionInfo::invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &) {
@@ -126,9 +120,7 @@ RegionInfoPass::RegionInfoPass() : FunctionPass(ID) {
initializeRegionInfoPassPass(*PassRegistry::getPassRegistry());
}
-RegionInfoPass::~RegionInfoPass() {
-
-}
+RegionInfoPass::~RegionInfoPass() = default;
bool RegionInfoPass::runOnFunction(Function &F) {
releaseMemory();
@@ -181,10 +173,12 @@ INITIALIZE_PASS_END(RegionInfoPass, "regions",
// the link time optimization.
namespace llvm {
+
FunctionPass *createRegionInfoPass() {
return new RegionInfoPass();
}
-}
+
+} // end namespace llvm
//===----------------------------------------------------------------------===//
// RegionInfoAnalysis implementation
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 73a95ec405c7..678ad3af5e85 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -157,6 +157,11 @@ static cl::opt<unsigned> MaxConstantEvolvingDepth(
"scalar-evolution-max-constant-evolving-depth", cl::Hidden,
cl::desc("Maximum depth of recursive constant evolving"), cl::init(32));
+static cl::opt<unsigned>
+ MaxExtDepth("scalar-evolution-max-ext-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive SExt/ZExt"),
+ cl::init(8));
+
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -1285,8 +1290,8 @@ static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
namespace {
struct ExtendOpTraitsBase {
- typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(
- const SCEV *, Type *, ScalarEvolution::ExtendCacheTy &Cache);
+ typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *,
+ unsigned);
};
// Used to make code generic over signed and unsigned overflow.
@@ -1315,9 +1320,8 @@ struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
}
};
-const ExtendOpTraitsBase::GetExtendExprTy
- ExtendOpTraits<SCEVSignExtendExpr>::GetExtendExpr =
- &ScalarEvolution::getSignExtendExprCached;
+const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
+ SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
template <>
struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
@@ -1332,9 +1336,8 @@ struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
}
};
-const ExtendOpTraitsBase::GetExtendExprTy
- ExtendOpTraits<SCEVZeroExtendExpr>::GetExtendExpr =
- &ScalarEvolution::getZeroExtendExprCached;
+const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
+ SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
}
// The recurrence AR has been shown to have no signed/unsigned wrap or something
@@ -1346,8 +1349,7 @@ const ExtendOpTraitsBase::GetExtendExprTy
// "sext/zext(PostIncAR)"
template <typename ExtendOpTy>
static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
- ScalarEvolution *SE,
- ScalarEvolution::ExtendCacheTy &Cache) {
+ ScalarEvolution *SE, unsigned Depth) {
auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
@@ -1394,9 +1396,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
const SCEV *OperandExtendedStart =
- SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Cache),
- (SE->*GetExtendExpr)(Step, WideTy, Cache));
- if ((SE->*GetExtendExpr)(Start, WideTy, Cache) == OperandExtendedStart) {
+ SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth),
+ (SE->*GetExtendExpr)(Step, WideTy, Depth));
+ if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) {
if (PreAR && AR->getNoWrapFlags(WrapType)) {
// If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
// or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
@@ -1422,16 +1424,16 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
template <typename ExtendOpTy>
static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
ScalarEvolution *SE,
- ScalarEvolution::ExtendCacheTy &Cache) {
+ unsigned Depth) {
auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
- const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Cache);
+ const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth);
if (!PreStart)
- return (SE->*GetExtendExpr)(AR->getStart(), Ty, Cache);
+ return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth);
- return SE->getAddExpr(
- (SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, Cache),
- (SE->*GetExtendExpr)(PreStart, Ty, Cache));
+ return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty,
+ Depth),
+ (SE->*GetExtendExpr)(PreStart, Ty, Depth));
}
// Try to prove away overflow by looking at "nearby" add recurrences. A
@@ -1511,31 +1513,8 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
return false;
}
-const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) {
- // Use the local cache to prevent exponential behavior of
- // getZeroExtendExprImpl.
- ExtendCacheTy Cache;
- return getZeroExtendExprCached(Op, Ty, Cache);
-}
-
-/// Query \p Cache before calling getZeroExtendExprImpl. If there is no
-/// related entry in the \p Cache, call getZeroExtendExprImpl and save
-/// the result in the \p Cache.
-const SCEV *ScalarEvolution::getZeroExtendExprCached(const SCEV *Op, Type *Ty,
- ExtendCacheTy &Cache) {
- auto It = Cache.find({Op, Ty});
- if (It != Cache.end())
- return It->second;
- const SCEV *ZExt = getZeroExtendExprImpl(Op, Ty, Cache);
- auto InsertResult = Cache.insert({{Op, Ty}, ZExt});
- assert(InsertResult.second && "Expect the key was not in the cache");
- (void)InsertResult;
- return ZExt;
-}
-
-/// The real implementation of getZeroExtendExpr.
-const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
- ExtendCacheTy &Cache) {
+const SCEV *
+ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
@@ -1545,11 +1524,11 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
- cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
+ cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
// zext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
- return getZeroExtendExprCached(SZ->getOperand(), Ty, Cache);
+ return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
// Before doing any expensive analysis, check to see if we've already
// computed a SCEV for this Op and Ty.
@@ -1559,6 +1538,12 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
ID.AddPointer(Ty);
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ if (Depth > MaxExtDepth) {
+ SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+ }
// zext(trunc(x)) --> zext(x) or x or trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
@@ -1593,8 +1578,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
// we don't need to do any further analysis.
if (AR->hasNoUnsignedWrap())
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
- getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
+ getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
@@ -1618,22 +1603,29 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no unsigned overflow.
- const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
- const SCEV *ZAdd =
- getZeroExtendExprCached(getAddExpr(Start, ZMul), WideTy, Cache);
- const SCEV *WideStart = getZeroExtendExprCached(Start, WideTy, Cache);
+ const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step,
+ SCEV::FlagAnyWrap, Depth + 1);
+ const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul,
+ SCEV::FlagAnyWrap,
+ Depth + 1),
+ WideTy, Depth + 1);
+ const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1);
const SCEV *WideMaxBECount =
- getZeroExtendExprCached(CastedMaxBECount, WideTy, Cache);
- const SCEV *OperandExtendedAdd = getAddExpr(
- WideStart, getMulExpr(WideMaxBECount, getZeroExtendExprCached(
- Step, WideTy, Cache)));
+ getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
+ const SCEV *OperandExtendedAdd =
+ getAddExpr(WideStart,
+ getMulExpr(WideMaxBECount,
+ getZeroExtendExpr(Step, WideTy, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
if (ZAdd == OperandExtendedAdd) {
// Cache knowledge of AR NUW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
- getZeroExtendExprCached(Step, Ty, Cache), L,
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+ Depth + 1),
+ getZeroExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as signed.
@@ -1641,15 +1633,19 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
OperandExtendedAdd =
getAddExpr(WideStart,
getMulExpr(WideMaxBECount,
- getSignExtendExpr(Step, WideTy)));
+ getSignExtendExpr(Step, WideTy, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
if (ZAdd == OperandExtendedAdd) {
// Cache knowledge of AR NW, which is propagated to this AddRec.
// Negative step causes unsigned wrap, but it still can't self-wrap.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
- getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+ Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L,
+ AR->getNoWrapFlags());
}
}
}
@@ -1680,8 +1676,9 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
- getZeroExtendExprCached(Step, Ty, Cache), L,
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+ Depth + 1),
+ getZeroExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
}
} else if (isKnownNegative(Step)) {
@@ -1697,8 +1694,10 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
- getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+ Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L,
+ AR->getNoWrapFlags());
}
}
}
@@ -1706,8 +1705,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
- getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
+ getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
}
}
@@ -1718,8 +1717,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
// commute the zero extension with the addition operation.
SmallVector<const SCEV *, 4> Ops;
for (const auto *Op : SA->operands())
- Ops.push_back(getZeroExtendExprCached(Op, Ty, Cache));
- return getAddExpr(Ops, SCEV::FlagNUW);
+ Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
+ return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1);
}
}
@@ -1732,31 +1731,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
return S;
}
-const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty) {
- // Use the local cache to prevent exponential behavior of
- // getSignExtendExprImpl.
- ExtendCacheTy Cache;
- return getSignExtendExprCached(Op, Ty, Cache);
-}
-
-/// Query \p Cache before calling getSignExtendExprImpl. If there is no
-/// related entry in the \p Cache, call getSignExtendExprImpl and save
-/// the result in the \p Cache.
-const SCEV *ScalarEvolution::getSignExtendExprCached(const SCEV *Op, Type *Ty,
- ExtendCacheTy &Cache) {
- auto It = Cache.find({Op, Ty});
- if (It != Cache.end())
- return It->second;
- const SCEV *SExt = getSignExtendExprImpl(Op, Ty, Cache);
- auto InsertResult = Cache.insert({{Op, Ty}, SExt});
- assert(InsertResult.second && "Expect the key was not in the cache");
- (void)InsertResult;
- return SExt;
-}
-
-/// The real implementation of getSignExtendExpr.
-const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
- ExtendCacheTy &Cache) {
+const SCEV *
+ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
@@ -1766,15 +1742,15 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
- cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
+ cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
// sext(sext(x)) --> sext(x)
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
- return getSignExtendExprCached(SS->getOperand(), Ty, Cache);
+ return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1);
// sext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
- return getZeroExtendExpr(SZ->getOperand(), Ty);
+ return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
// Before doing any expensive analysis, check to see if we've already
// computed a SCEV for this Op and Ty.
@@ -1784,6 +1760,13 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
ID.AddPointer(Ty);
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ // Limit recursion depth.
+ if (Depth > MaxExtDepth) {
+ SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
+ Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+ }
// sext(trunc(x)) --> sext(x) or x or trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
@@ -1809,8 +1792,9 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
const APInt &C2 = SC2->getAPInt();
if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
C2.ugt(C1) && C2.isPowerOf2())
- return getAddExpr(getSignExtendExprCached(SC1, Ty, Cache),
- getSignExtendExprCached(SMul, Ty, Cache));
+ return getAddExpr(getSignExtendExpr(SC1, Ty, Depth + 1),
+ getSignExtendExpr(SMul, Ty, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
}
}
}
@@ -1821,8 +1805,8 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
// commute the sign extension with the addition operation.
SmallVector<const SCEV *, 4> Ops;
for (const auto *Op : SA->operands())
- Ops.push_back(getSignExtendExprCached(Op, Ty, Cache));
- return getAddExpr(Ops, SCEV::FlagNSW);
+ Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1));
+ return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1);
}
}
// If the input value is a chrec scev, and we can prove that the value
@@ -1845,8 +1829,8 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
// we don't need to do any further analysis.
if (AR->hasNoSignedWrap())
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
- getSignExtendExprCached(Step, Ty, Cache), L, SCEV::FlagNSW);
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW);
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
@@ -1870,22 +1854,29 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no signed overflow.
- const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
- const SCEV *SAdd =
- getSignExtendExprCached(getAddExpr(Start, SMul), WideTy, Cache);
- const SCEV *WideStart = getSignExtendExprCached(Start, WideTy, Cache);
+ const SCEV *SMul = getMulExpr(CastedMaxBECount, Step,
+ SCEV::FlagAnyWrap, Depth + 1);
+ const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul,
+ SCEV::FlagAnyWrap,
+ Depth + 1),
+ WideTy, Depth + 1);
+ const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1);
const SCEV *WideMaxBECount =
- getZeroExtendExpr(CastedMaxBECount, WideTy);
- const SCEV *OperandExtendedAdd = getAddExpr(
- WideStart, getMulExpr(WideMaxBECount, getSignExtendExprCached(
- Step, WideTy, Cache)));
+ getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
+ const SCEV *OperandExtendedAdd =
+ getAddExpr(WideStart,
+ getMulExpr(WideMaxBECount,
+ getSignExtendExpr(Step, WideTy, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
if (SAdd == OperandExtendedAdd) {
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
- getSignExtendExprCached(Step, Ty, Cache), L,
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
+ Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as unsigned.
@@ -1893,7 +1884,9 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
OperandExtendedAdd =
getAddExpr(WideStart,
getMulExpr(WideMaxBECount,
- getZeroExtendExpr(Step, WideTy)));
+ getZeroExtendExpr(Step, WideTy, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
if (SAdd == OperandExtendedAdd) {
// If AR wraps around then
//
@@ -1907,8 +1900,10 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
// Return the expression with the addrec on the outside.
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
- getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
+ Depth + 1),
+ getZeroExtendExpr(Step, Ty, Depth + 1), L,
+ AR->getNoWrapFlags());
}
}
}
@@ -1939,9 +1934,8 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
// Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
- getSignExtendExprCached(Step, Ty, Cache), L,
- AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
}
}
@@ -1955,25 +1949,26 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
const APInt &C2 = SC2->getAPInt();
if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
C2.isPowerOf2()) {
- Start = getSignExtendExprCached(Start, Ty, Cache);
+ Start = getSignExtendExpr(Start, Ty, Depth + 1);
const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
AR->getNoWrapFlags());
- return getAddExpr(Start, getSignExtendExprCached(NewAR, Ty, Cache));
+ return getAddExpr(Start, getSignExtendExpr(NewAR, Ty, Depth + 1),
+ SCEV::FlagAnyWrap, Depth + 1);
}
}
if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
- getSignExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags());
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
+ getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
}
}
// If the input value is provably positive and we could not simplify
// away the sext build a zext instead.
if (isKnownNonNegative(Op))
- return getZeroExtendExpr(Op, Ty);
+ return getZeroExtendExpr(Op, Ty, Depth + 1);
// The cast wasn't folded; create an explicit cast node.
// Recompute the insert position, as it may have been invalidated.
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 92328f6e5efd..f938a9a52065 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -89,8 +89,9 @@ TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
}
-int TargetTransformInfo::getUserCost(const User *U) const {
- int Cost = TTIImpl->getUserCost(U);
+int TargetTransformInfo::getUserCost(const User *U,
+ ArrayRef<const Value *> Operands) const {
+ int Cost = TTIImpl->getUserCost(U, Operands);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -116,8 +117,8 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
}
void TargetTransformInfo::getUnrollingPreferences(
- Loop *L, UnrollingPreferences &UP) const {
- return TTIImpl->getUnrollingPreferences(L, UP);
+ Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
+ return TTIImpl->getUnrollingPreferences(L, SE, UP);
}
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index cd9972ab56a6..86c528de267a 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -23,10 +23,10 @@
//
// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
// three fields, e.g.:
-// !0 = metadata !{ metadata !"an example type tree" }
-// !1 = metadata !{ metadata !"int", metadata !0 }
-// !2 = metadata !{ metadata !"float", metadata !0 }
-// !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+// !0 = !{ !"an example type tree" }
+// !1 = !{ !"int", !0 }
+// !2 = !{ !"float", !0 }
+// !3 = !{ !"const float", !2, i64 1 }
//
// The first field is an identity field. It can be any value, usually
// an MDString, which uniquely identifies the type. The most important
@@ -74,13 +74,13 @@
// instruction. The base type is !4 (struct B), the access type is !2 (scalar
// type short) and the offset is 4.
//
-// !0 = metadata !{metadata !"Simple C/C++ TBAA"}
-// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node
-// !2 = metadata !{metadata !"short", metadata !1} // Scalar type node
-// !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node
-// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4}
+// !0 = !{!"Simple C/C++ TBAA"}
+// !1 = !{!"omnipotent char", !0} // Scalar type node
+// !2 = !{!"short", !1} // Scalar type node
+// !3 = !{!"A", !2, i64 0} // Struct type node
+// !4 = !{!"B", !2, i64 0, !3, i64 4}
// // Struct type node
-// !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node
+// !5 = !{!4, !2, i64 4} // Path tag node
//
// The struct type nodes and the scalar type nodes form a type DAG.
// Root (!0)
diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp
index f24f22c88a8a..b19a07a9066b 100644
--- a/lib/BinaryFormat/Magic.cpp
+++ b/lib/BinaryFormat/Magic.cpp
@@ -191,8 +191,8 @@ file_magic llvm::identify_magic(StringRef Magic) {
}
break;
- case 0x64: // x86-64 Windows.
- if (Magic[1] == char(0x86))
+ case 0x64: // x86-64 or ARM64 Windows.
+ if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
return file_magic::coff_object;
break;
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 0629c2d326ae..1ebef3173135 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -5360,8 +5360,9 @@ const std::error_category &llvm::BitcodeErrorCategory() {
return *ErrorCategory;
}
-static Expected<StringRef> readStrtab(BitstreamCursor &Stream) {
- if (Stream.EnterSubBlock(bitc::STRTAB_BLOCK_ID))
+static Expected<StringRef> readBlobInRecord(BitstreamCursor &Stream,
+ unsigned Block, unsigned RecordID) {
+ if (Stream.EnterSubBlock(Block))
return error("Invalid record");
StringRef Strtab;
@@ -5382,7 +5383,7 @@ static Expected<StringRef> readStrtab(BitstreamCursor &Stream) {
case BitstreamEntry::Record:
StringRef Blob;
SmallVector<uint64_t, 1> Record;
- if (Stream.readRecord(Entry.ID, Record, &Blob) == bitc::STRTAB_BLOB)
+ if (Stream.readRecord(Entry.ID, Record, &Blob) == RecordID)
Strtab = Blob;
break;
}
@@ -5450,7 +5451,8 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
}
if (Entry.ID == bitc::STRTAB_BLOCK_ID) {
- Expected<StringRef> Strtab = readStrtab(Stream);
+ Expected<StringRef> Strtab =
+ readBlobInRecord(Stream, bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB);
if (!Strtab)
return Strtab.takeError();
// This string table is used by every preceding bitcode module that does
@@ -5462,6 +5464,28 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
break;
I->Strtab = *Strtab;
}
+ // Similarly, the string table is used by every preceding symbol table;
+ // normally there will be just one unless the bitcode file was created
+ // by binary concatenation.
+ if (!F.Symtab.empty() && F.StrtabForSymtab.empty())
+ F.StrtabForSymtab = *Strtab;
+ continue;
+ }
+
+ if (Entry.ID == bitc::SYMTAB_BLOCK_ID) {
+ Expected<StringRef> SymtabOrErr =
+ readBlobInRecord(Stream, bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB);
+ if (!SymtabOrErr)
+ return SymtabOrErr.takeError();
+
+ // We can expect the bitcode file to have multiple symbol tables if it
+ // was created by binary concatenation. In that case we silently
+ // ignore any subsequent symbol tables, which is fine because this is a
+ // low level function. The client is expected to notice that the number
+ // of modules in the symbol table does not match the number of modules
+ // in the input file and regenerate the symbol table.
+ if (F.Symtab.empty())
+ F.Symtab = *SymtabOrErr;
continue;
}
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index feeba31908ae..b2b1ea6de374 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -29,10 +29,12 @@
#include "llvm/IR/UseListOrder.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Object/IRSymtab.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/SHA1.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
#include <map>
@@ -3820,6 +3822,38 @@ void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) {
Stream->ExitBlock();
}
+void BitcodeWriter::writeSymtab() {
+ assert(!WroteStrtab && !WroteSymtab);
+
+ // If any module has module-level inline asm, we will require a registered asm
+ // parser for the target so that we can create an accurate symbol table for
+ // the module.
+ for (Module *M : Mods) {
+ if (M->getModuleInlineAsm().empty())
+ continue;
+
+ std::string Err;
+ const Triple TT(M->getTargetTriple());
+ const Target *T = TargetRegistry::lookupTarget(TT.str(), Err);
+ if (!T || !T->hasMCAsmParser())
+ return;
+ }
+
+ WroteSymtab = true;
+ SmallVector<char, 0> Symtab;
+ // The irsymtab::build function may be unable to create a symbol table if the
+ // module is malformed (e.g. it contains an invalid alias). Writing a symbol
+ // table is not required for correctness, but we still want to be able to
+ // write malformed modules to bitcode files, so swallow the error.
+ if (Error E = irsymtab::build(Mods, Symtab, StrtabBuilder, Alloc)) {
+ consumeError(std::move(E));
+ return;
+ }
+
+ writeBlob(bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB,
+ {Symtab.data(), Symtab.size()});
+}
+
void BitcodeWriter::writeStrtab() {
assert(!WroteStrtab);
@@ -3843,6 +3877,15 @@ void BitcodeWriter::writeModule(const Module *M,
bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index,
bool GenerateHash, ModuleHash *ModHash) {
+ assert(!WroteStrtab);
+
+ // The Mods vector is used by irsymtab::build, which requires non-const
+ // Modules in case it needs to materialize metadata. But the bitcode writer
+ // requires that the module is materialized, so we can cast to non-const here,
+ // after checking that it is in fact materialized.
+ assert(M->isMaterialized());
+ Mods.push_back(const_cast<Module *>(M));
+
ModuleBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream,
ShouldPreserveUseListOrder, Index,
GenerateHash, ModHash);
@@ -3875,6 +3918,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
BitcodeWriter Writer(Buffer);
Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash,
ModHash);
+ Writer.writeSymtab();
Writer.writeStrtab();
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
diff --git a/lib/Bitcode/Writer/LLVMBuild.txt b/lib/Bitcode/Writer/LLVMBuild.txt
index a07c280fa9e3..ef6dc9f901e2 100644
--- a/lib/Bitcode/Writer/LLVMBuild.txt
+++ b/lib/Bitcode/Writer/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = BitWriter
parent = Bitcode
-required_libraries = Analysis Core MC Support
+required_libraries = Analysis Core MC Object Support
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c48fcaa7b0d1..ff427c9a0d75 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -631,7 +631,9 @@ void AsmPrinter::EmitFunctionHeader() {
const Function *F = MF->getFunction();
if (isVerbose())
- OutStreamer->GetCommentOS() << "-- Begin function " << F->getName() << '\n';
+ OutStreamer->GetCommentOS()
+ << "-- Begin function "
+ << GlobalValue::dropLLVMManglingEscape(F->getName()) << '\n';
// Print out constants referenced by the function
EmitConstantPool();
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index e94616fd5900..a81d56e9618b 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -365,7 +365,7 @@ static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs,
void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
const MachineFunction *MF) {
// Skip this instruction if it has the same location as the previous one.
- if (DL == CurFn->LastLoc)
+ if (!DL || DL == PrevInstLoc)
return;
const DIScope *Scope = DL.get()->getScope();
@@ -385,11 +385,11 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
if (!CurFn->HaveLineInfo)
CurFn->HaveLineInfo = true;
unsigned FileId = 0;
- if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile())
+ if (PrevInstLoc.get() && PrevInstLoc->getFile() == DL->getFile())
FileId = CurFn->LastFileId;
else
FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile());
- CurFn->LastLoc = DL;
+ PrevInstLoc = DL;
unsigned FuncId = CurFn->FuncId;
if (const DILocation *SiteLoc = DL->getInlinedAt()) {
@@ -2150,9 +2150,23 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
if (!Asm || !CurFn || MI->isDebugValue() ||
MI->getFlag(MachineInstr::FrameSetup))
return;
+
+ // If the first instruction of a new MBB has no location, find the first
+ // instruction with a location and use that.
DebugLoc DL = MI->getDebugLoc();
- if (DL == PrevInstLoc || !DL)
+ if (!DL && MI->getParent() != PrevInstBB) {
+ for (const auto &NextMI : *MI->getParent()) {
+ DL = NextMI.getDebugLoc();
+ if (DL)
+ break;
+ }
+ }
+ PrevInstBB = MI->getParent();
+
+ // If we still don't have a debug location, don't record a location.
+ if (!DL)
return;
+
maybeRecordLocation(DL, Asm->MF);
}
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 2cd495aec6dc..fd8f60425c24 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -118,7 +118,6 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<LocalVariable, 1> Locals;
- DebugLoc LastLoc;
const MCSymbol *Begin = nullptr;
const MCSymbol *End = nullptr;
unsigned FuncId = 0;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index dc39d1e6cb52..d4a90eeabe15 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -245,17 +245,6 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
CURanges.back().setEnd(Range.getEnd());
}
-DIE::value_iterator
-DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label, const MCSymbol *Sec) {
- if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- return addLabel(Die, Attribute,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
- Label);
- return addSectionDelta(Die, Attribute, Label, Sec);
-}
-
void DwarfCompileUnit::initStmtList() {
// Define start line table label for each Compile Unit.
MCSymbol *LineTableStartSym =
@@ -380,15 +369,6 @@ void DwarfCompileUnit::constructScopeDIE(
FinalChildren.push_back(std::move(ScopeDIE));
}
-DIE::value_iterator
-DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Hi, const MCSymbol *Lo) {
- return Die.addValue(DIEValueAllocator, Attribute,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
- new (DIEValueAllocator) DIEDelta(Hi, Lo));
-}
-
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
SmallVector<RangeSpan, 2> Range) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 3c2fb8d99db7..e38672792867 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -127,10 +127,6 @@ public:
void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label);
- /// addSectionDelta - Add a label delta attribute data and value.
- DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Hi, const MCSymbol *Lo);
-
DwarfCompileUnit &getCU() override { return *this; }
unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
@@ -151,12 +147,6 @@ public:
void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End);
- /// addSectionLabel - Add a Dwarf section label attribute data and value.
- ///
- DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label,
- const MCSymbol *Sec);
-
/// \brief Find DIE for the given subprogram and attach appropriate
/// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
/// variables in this scope then create and insert DIEs for these
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 708f5f7536ff..4f4ebfc56297 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1587,6 +1587,26 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) {
sizeof(Ty->getOffset()));
}
+DIE::value_iterator
+DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ return Die.addValue(DIEValueAllocator, Attribute,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ new (DIEValueAllocator) DIEDelta(Hi, Lo));
+}
+
+DIE::value_iterator
+DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label, const MCSymbol *Sec) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ return addLabel(Die, Attribute,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ Label);
+ return addSectionDelta(Die, Attribute, Label, Sec);
+}
+
bool DwarfTypeUnit::isDwoUnit() const {
// Since there are no skeleton type units, all type units are dwo type units
// when split DWARF is being used.
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 7acad2cbd89f..4cc01b3298d4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -291,6 +291,15 @@ public:
void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+ /// addSectionDelta - Add a label delta attribute data and value.
+ DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo);
+
+ /// Add a Dwarf section label attribute data and value.
+ DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label,
+ const MCSymbol *Sec);
+
protected:
~DwarfUnit();
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index cb31c21293f4..b50e76f2e3ba 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1662,6 +1662,7 @@ class MemCmpExpansion {
PHINode *PhiRes;
bool IsUsedForZeroCmp;
const DataLayout &DL;
+ IRBuilder<> Builder;
unsigned calculateNumBlocks(unsigned Size);
void createLoadCmpBlocks();
@@ -1671,13 +1672,14 @@ class MemCmpExpansion {
void emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
unsigned GEPIndex);
Value *getCompareLoadPairs(unsigned Index, unsigned Size,
- unsigned &NumBytesProcessed, IRBuilder<> &Builder);
+ unsigned &NumBytesProcessed);
void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size,
unsigned &NumBytesProcessed);
void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex);
void emitMemCmpResultBlock();
Value *getMemCmpExpansionZeroCase(unsigned Size);
Value *getMemCmpEqZeroOneBlock(unsigned Size);
+ Value *getMemCmpOneBlock(unsigned Size);
unsigned getLoadSize(unsigned Size);
unsigned getNumLoads(unsigned Size);
@@ -1702,7 +1704,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
unsigned MaxLoadSize, unsigned LoadsPerBlock,
const DataLayout &TheDataLayout)
: CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock),
- DL(TheDataLayout) {
+ DL(TheDataLayout), Builder(CI) {
// A memcmp with zero-comparison with only one block of load and compare does
// not need to set up any extra blocks. This case could be handled in the DAG,
@@ -1710,7 +1712,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
// we choose to handle this case too to avoid fragmented lowering.
IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
NumBlocks = calculateNumBlocks(Size);
- if (!IsUsedForZeroCmp || NumBlocks != 1) {
+ if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) {
BasicBlock *StartBlock = CI->getParent();
EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
setupEndBlockPHINodes();
@@ -1731,7 +1733,6 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
}
- IRBuilder<> Builder(CI->getContext());
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
}
@@ -1754,8 +1755,6 @@ void MemCmpExpansion::createResultBlock() {
// final phi node for selecting the memcmp result.
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index,
unsigned GEPIndex) {
- IRBuilder<> Builder(CI->getContext());
-
Value *Source1 = CI->getArgOperand(0);
Value *Source2 = CI->getArgOperand(1);
@@ -1811,8 +1810,7 @@ unsigned MemCmpExpansion::getLoadSize(unsigned Size) {
/// This is used in the case where the memcmp() call is compared equal or not
/// equal to zero.
Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
- unsigned &NumBytesProcessed,
- IRBuilder<> &Builder) {
+ unsigned &NumBytesProcessed) {
std::vector<Value *> XorList, OrList;
Value *Diff;
@@ -1910,8 +1908,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
unsigned Index, unsigned Size, unsigned &NumBytesProcessed) {
- IRBuilder<> Builder(CI->getContext());
- Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed, Builder);
+ Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed);
BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
? EndBlock
@@ -1946,8 +1943,6 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
return;
}
- IRBuilder<> Builder(CI->getContext());
-
Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
assert(LoadSize <= MaxLoadSize && "Unexpected load type");
@@ -1975,9 +1970,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
if (DL.isLittleEndian()) {
- Function *F = LoadCmpBlocks[Index]->getParent();
-
- Function *Bswap = Intrinsic::getDeclaration(F->getParent(),
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::bswap, LoadSizeType);
LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
@@ -1995,16 +1988,13 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]);
}
- Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
-
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
- ConstantInt::get(Diff->getType(), 0));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2);
BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
? EndBlock
: LoadCmpBlocks[Index + 1];
// Early exit branch if difference found to ResultBlock. Otherwise, continue
// to next LoadCmpBlock or EndBlock.
- BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
+ BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
Builder.Insert(CmpBr);
// Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
@@ -2020,8 +2010,6 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
// memcmp result. It compares the two loaded source values and returns -1 if
// src1 < src2 and 1 if src1 > src2.
void MemCmpExpansion::emitMemCmpResultBlock() {
- IRBuilder<> Builder(CI->getContext());
-
// Special case: if memcmp result is used in a zero equality, result does not
// need to be calculated and can simply return 1.
if (IsUsedForZeroCmp) {
@@ -2070,7 +2058,6 @@ unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) {
}
void MemCmpExpansion::setupResultBlockPHINodes() {
- IRBuilder<> Builder(CI->getContext());
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
Builder.SetInsertPoint(ResBlock.BB);
ResBlock.PhiSrc1 =
@@ -2080,8 +2067,6 @@ void MemCmpExpansion::setupResultBlockPHINodes() {
}
void MemCmpExpansion::setupEndBlockPHINodes() {
- IRBuilder<> Builder(CI->getContext());
-
Builder.SetInsertPoint(&EndBlock->front());
PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
}
@@ -2102,11 +2087,45 @@ Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) {
/// in the general case.
Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) {
unsigned NumBytesProcessed = 0;
- IRBuilder<> Builder(CI->getContext());
- Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed, Builder);
+ Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed);
return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
}
+/// A memcmp expansion that only has one block of load and compare can bypass
+/// the compare, branch, and phi IR that is required in the general case.
+Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) {
+ assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block");
+
+ Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ // Cast source to LoadSizeType*.
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Load LoadSizeType from the base address.
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ if (DL.isLittleEndian() && Size != 1) {
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
+ Intrinsic::bswap, LoadSizeType);
+ LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
+ LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
+ }
+
+ // TODO: Instead of comparing ULT, just subtract and return the difference?
+ Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+ Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
+ Type *I32 = Builder.getInt32Ty();
+ Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1),
+ ConstantInt::get(I32, 1));
+ return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0));
+}
+
// This function expands the memcmp call into an inline expansion and returns
// the memcmp result.
Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
@@ -2114,6 +2133,10 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) :
getMemCmpExpansionZeroCase(Size);
+ // TODO: Handle more than one load pair per block in getMemCmpOneBlock().
+ if (NumBlocks == 1 && NumLoadsPerBlock == 1)
+ return getMemCmpOneBlock(Size);
+
// This loop calls emitLoadCompareBlock for comparing Size bytes of the two
// memcmp sources. It starts with loading using the maximum load size set by
// the target. It processes any remaining bytes using a load size which is the
@@ -2218,7 +2241,6 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
const TargetLowering *TLI, const DataLayout *DL) {
NumMemCmpCalls++;
- IRBuilder<> Builder(CI->getContext());
// TTI call to check if target would like to expand memcmp. Also, get the
// MaxLoadSize.
@@ -4378,14 +4400,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// If the real base value actually came from an inttoptr, then the matcher
// will look through it and provide only the integer value. In that case,
// use it here.
- if (!ResultPtr && AddrMode.BaseReg) {
- ResultPtr =
- Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
- AddrMode.BaseReg = nullptr;
- } else if (!ResultPtr && AddrMode.Scale == 1) {
- ResultPtr =
- Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
- AddrMode.Scale = 0;
+ if (!DL->isNonIntegralPointerType(Addr->getType())) {
+ if (!ResultPtr && AddrMode.BaseReg) {
+ ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
+ "sunkaddr");
+ AddrMode.BaseReg = nullptr;
+ } else if (!ResultPtr && AddrMode.Scale == 1) {
+ ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
+ "sunkaddr");
+ AddrMode.Scale = 0;
+ }
}
if (!ResultPtr &&
@@ -4466,6 +4490,19 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
}
} else {
+ // We'd require a ptrtoint/inttoptr down the line, which we can't do for
+ // non-integral pointers, so in that case bail out now.
+ Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
+ Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
+ PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
+ PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
+ if (DL->isNonIntegralPointerType(Addr->getType()) ||
+ (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
+ (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
+ (AddrMode.BaseGV &&
+ DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
+ return false;
+
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
@@ -6367,7 +6404,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
// Update PHI nodes in both successors. The original BB needs to be
- // replaced in one succesor's PHI nodes, because the branch comes now from
+ // replaced in one successor's PHI nodes, because the branch comes now from
// the newly generated BB (NewBB). In the other successor we need to add one
// incoming edge to the PHI nodes, because both branch instructions target
// now the same successor. Depending on the original branch condition
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 239bad2f5355..521037f9d206 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator --*- C++ -*-==//
+//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,34 +11,69 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
-
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
#define DEBUG_TYPE "irtranslator"
using namespace llvm;
char IRTranslator::ID = 0;
+
INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
@@ -62,7 +97,7 @@ static void reportTranslationError(MachineFunction &MF,
ORE.emit(R);
}
-IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) {
+IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
}
@@ -71,7 +106,6 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-
unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
unsigned &ValReg = ValToVReg[&Val];
@@ -686,6 +720,26 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
.addUse(getOrCreateVReg(*CI.getArgOperand(0)))
.addUse(getOrCreateVReg(*CI.getArgOperand(1)));
return true;
+ case Intrinsic::exp:
+ MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::exp2:
+ MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::log:
+ MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::log2:
+ MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
case Intrinsic::fma:
MIRBuilder.buildInstr(TargetOpcode::G_FMA)
.addDef(getOrCreateVReg(CI))
@@ -834,7 +888,6 @@ bool IRTranslator::translateInvoke(const User &U,
if (!isa<LandingPadInst>(EHPadBB->front()))
return false;
-
// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
// the region covered by the try.
MCSymbol *BeginSymbol = Context.createTempSymbol();
@@ -1195,7 +1248,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
TPC = &getAnalysis<TargetPassConfig>();
- ORE = make_unique<OptimizationRemarkEmitter>(&F);
+ ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
assert(PendingPHIs.empty() && "stale PHIs");
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 5466efd7e90f..860fc9a4f8b6 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp -----------*- C++ -*-==//
+//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,19 +11,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
#define DEBUG_TYPE "instructionselector"
using namespace llvm;
-InstructionSelector::InstructionSelector() {}
+InstructionSelector::InstructionSelector() = default;
bool InstructionSelector::constrainOperandRegToRegClass(
MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC,
@@ -33,8 +36,8 @@ bool InstructionSelector::constrainOperandRegToRegClass(
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
- return llvm::constrainRegToClass(MRI, TII, RBI, I,
- I.getOperand(OpIdx).getReg(), RC);
+ return
+ constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC);
}
bool InstructionSelector::constrainSelectedInstRegOperands(
@@ -84,7 +87,6 @@ bool InstructionSelector::constrainSelectedInstRegOperands(
bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
-
if (MO.isReg() && MO.getReg())
if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI))
return *VRegVal == Value;
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 1d0d3dffa4c5..84b0a0ac4157 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -158,7 +158,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
// FIXME: Don't know how to handle secondary types yet.
- if (TypeIdx != 0)
+ if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT)
return UnableToLegalize;
MIRBuilder.setInstr(MI);
@@ -166,6 +166,20 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
+ NarrowTy.getSizeInBits();
+
+ SmallVector<unsigned, 2> DstRegs;
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildUndef(Dst);
+ DstRegs.push_back(Dst);
+ }
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_ADD: {
// Expand in terms of carry-setting/consuming G_ADDE instructions.
int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
@@ -193,6 +207,58 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_EXTRACT: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ int64_t NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() / NarrowSize;
+
+ SmallVector<unsigned, 2> SrcRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ unsigned OpReg = MI.getOperand(0).getReg();
+ int64_t OpStart = MI.getOperand(2).getImm();
+ int64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned SrcStart = i * NarrowSize;
+
+ if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
+ // No part of the extract uses this subregister, ignore it.
+ continue;
+ } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is extracted, forward the value.
+ DstRegs.push_back(SrcRegs[i]);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset, SegSize;
+ if (OpStart < SrcStart) {
+ ExtractOffset = 0;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
+ } else {
+ ExtractOffset = OpStart - SrcStart;
+ SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
+ }
+
+ unsigned SegReg = SrcRegs[i];
+ if (ExtractOffset != 0 || SegSize != NarrowSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
+ }
+
+ DstRegs.push_back(SegReg);
+ }
+
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_INSERT: {
if (TypeIdx != 0)
return UnableToLegalize;
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 595802f2228b..76917aa9660d 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -1,4 +1,4 @@
-//===---- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer -------==//
+//===- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,16 +18,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/Type.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOpcodes.h"
+#include <algorithm>
+#include <cassert>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
-LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
+LegalizerInfo::LegalizerInfo() {
+ DefaultActions[TargetOpcode::G_IMPLICIT_DEF] = NarrowScalar;
+
// FIXME: these two can be legalized to the fundamental load/store Jakob
// proposed. Once loads & stores are supported.
DefaultActions[TargetOpcode::G_ANYEXT] = Legal;
@@ -42,6 +51,7 @@ LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar;
DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar;
+ DefaultActions[TargetOpcode::G_EXTRACT] = NarrowScalar;
DefaultActions[TargetOpcode::G_FNEG] = Lower;
}
@@ -75,8 +85,7 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const {
// FIXME: the long-term plan calls for expansion in terms of load/store (if
// they're not legal).
- if (Aspect.Opcode == TargetOpcode::G_EXTRACT ||
- Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
+ if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES)
return std::make_pair(Legal, Aspect.Type);
@@ -172,21 +181,21 @@ Optional<LLT> LegalizerInfo::findLegalType(const InstrAspect &Aspect,
case Custom:
return Aspect.Type;
case NarrowScalar: {
- return findLegalType(Aspect,
- [](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
+ return findLegalizableSize(
+ Aspect, [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
}
case WidenScalar: {
- return findLegalType(Aspect, [](LLT Ty) -> LLT {
+ return findLegalizableSize(Aspect, [&](LLT Ty) -> LLT {
return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize();
});
}
case FewerElements: {
- return findLegalType(Aspect,
- [](LLT Ty) -> LLT { return Ty.halfElements(); });
+ return findLegalizableSize(
+ Aspect, [&](LLT Ty) -> LLT { return Ty.halfElements(); });
}
case MoreElements: {
- return findLegalType(Aspect,
- [](LLT Ty) -> LLT { return Ty.doubleElements(); });
+ return findLegalizableSize(
+ Aspect, [&](LLT Ty) -> LLT { return Ty.doubleElements(); });
}
}
}
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 3c70013ea296..47c6214c0552 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -264,10 +264,13 @@ MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
}
MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
+ assert(MRI->getType(Tgt).isPointer() && "invalid branch destination");
return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
}
MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) {
+ assert(MRI->getType(Res) == LLT() || MRI->getType(Op) == LLT() ||
+ MRI->getType(Res) == MRI->getType(Op));
return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
}
@@ -364,27 +367,36 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) {
MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
unsigned Op) {
+ assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
+ assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
+
unsigned Opcode = TargetOpcode::COPY;
if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_SEXT;
else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_TRUNC;
+ else
+ assert(MRI->getType(Res) == MRI->getType(Op));
return buildInstr(Opcode).addDef(Res).addUse(Op);
}
MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res,
unsigned Op) {
+ assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
+ assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
+
unsigned Opcode = TargetOpcode::COPY;
if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_ZEXT;
else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_TRUNC;
+ else
+ assert(MRI->getType(Res) == MRI->getType(Op));
return buildInstr(Opcode).addDef(Res).addUse(Op);
}
-
MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) {
LLT SrcTy = MRI->getType(Src);
LLT DstTy = MRI->getType(Dst);
@@ -466,7 +478,7 @@ void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
}
MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) {
- return buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Res);
+ return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res);
}
MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res,
@@ -482,6 +494,9 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res,
"input operands do not cover output register");
#endif
+ if (Ops.size() == 1)
+ return buildCast(Res, Ops[0]);
+
MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES);
MIB.addDef(Res);
for (unsigned i = 0; i < Ops.size(); ++i)
@@ -511,8 +526,11 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
unsigned Op, unsigned Index) {
+ assert(Index + MRI->getType(Op).getSizeInBits() <=
+ MRI->getType(Res).getSizeInBits() &&
+ "insertion past the end of a register");
+
if (MRI->getType(Res).getSizeInBits() == MRI->getType(Op).getSizeInBits()) {
- assert(Index == 0 && "insertion past the end of a register");
return buildCast(Res, Op);
}
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 2eb3cdee694d..677941dbbf6d 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect -*- C++ -*-==//
+//==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -12,18 +12,39 @@
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
#define DEBUG_TYPE "regbankselect"
@@ -37,6 +58,7 @@ static cl::opt<RegBankSelect::Mode> RegBankSelectMode(
"Use the Greedy mode (best local mapping)")));
char RegBankSelect::ID = 0;
+
INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers",
false, false);
@@ -48,8 +70,7 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
false)
RegBankSelect::RegBankSelect(Mode RunningMode)
- : MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr),
- MBFI(nullptr), MBPI(nullptr), OptMode(RunningMode) {
+ : MachineFunctionPass(ID), OptMode(RunningMode) {
initializeRegBankSelectPass(*PassRegistry::getPassRegistry());
if (RegBankSelectMode.getNumOccurrences() != 0) {
OptMode = RegBankSelectMode;
@@ -72,7 +93,7 @@ void RegBankSelect::init(MachineFunction &MF) {
MBPI = nullptr;
}
MIRBuilder.setMF(MF);
- MORE = make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
+ MORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
}
void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -133,9 +154,11 @@ bool RegBankSelect::repairReg(
TargetRegisterInfo::isPhysicalRegister(Dst)) &&
"We are about to create several defs for Dst");
- // Build the instruction used to repair, then clone it at the right places.
- MachineInstr *MI = MIRBuilder.buildCopy(Dst, Src);
- MI->removeFromParent();
+ // Build the instruction used to repair, then clone it at the right
+ // places. Avoiding buildCopy bypasses the check that Src and Dst have the
+ // same types because the type is a placeholder when this function is called.
+ MachineInstr *MI =
+ MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src);
DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst)
<< '\n');
// TODO:
@@ -202,11 +225,11 @@ uint64_t RegBankSelect::getRepairCost(
RBI->copyCost(*DesiredRegBrank, *CurRegBank,
RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI));
// TODO: use a dedicated constant for ImpossibleCost.
- if (Cost != UINT_MAX)
+ if (Cost != std::numeric_limits<unsigned>::max())
return Cost;
// Return the legalization cost of that repairing.
}
- return UINT_MAX;
+ return std::numeric_limits<unsigned>::max();
}
const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
@@ -352,7 +375,7 @@ void RegBankSelect::tryAvoidingSplit(
// the repairing cost because of the PHIs already proceeded
// as already stated.
// Though the code will be correct.
- assert(0 && "Repairing cost may not be accurate");
+ assert(false && "Repairing cost may not be accurate");
} else {
// We need to do non-local repairing. Basically, patch all
// the uses (i.e., phis) that we already proceeded.
@@ -450,7 +473,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
uint64_t RepairCost = getRepairCost(MO, ValMapping);
// This is an impossible to repair cost.
- if (RepairCost == UINT_MAX)
+ if (RepairCost == std::numeric_limits<unsigned>::max())
continue;
// Bias used for splitting: 5%.
@@ -535,9 +558,11 @@ bool RegBankSelect::applyMapping(
llvm_unreachable("Other kind should not happen");
}
}
+
// Second, rewrite the instruction.
DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
RBI->applyMapping(OpdMapper);
+
return true;
}
@@ -638,11 +663,8 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo &TRI, Pass &P,
RepairingPlacement::RepairingKind Kind)
// Default is, we are going to insert code to repair OpIdx.
- : Kind(Kind),
- OpIdx(OpIdx),
- CanMaterialize(Kind != RepairingKind::Impossible),
- HasSplit(false),
- P(P) {
+ : Kind(Kind), OpIdx(OpIdx),
+ CanMaterialize(Kind != RepairingKind::Impossible), P(P) {
const MachineOperand &MO = MI.getOperand(OpIdx);
assert(MO.isReg() && "Trying to repair a non-reg operand");
@@ -847,7 +869,7 @@ bool RegBankSelect::EdgeInsertPoint::canMaterialize() const {
}
RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq)
- : LocalCost(0), NonLocalCost(0), LocalFreq(LocalFreq.getFrequency()) {}
+ : LocalFreq(LocalFreq.getFrequency()) {}
bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) {
// Check if this overflows.
@@ -920,7 +942,6 @@ bool RegBankSelect::MappingCost::operator<(const MappingCost &Cost) const {
OtherLocalAdjust = Cost.LocalCost - LocalCost;
else
ThisLocalAdjust = LocalCost - Cost.LocalCost;
-
} else {
ThisLocalAdjust = LocalCost;
OtherLocalAdjust = Cost.LocalCost;
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 398066bf8903..8c43c9f3f884 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -20,11 +20,14 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
+// Reserve an address that indicates a value that is known to be "undef".
+static VNInfo UndefVNI(0xbad, SlotIndex());
+
void LiveRangeCalc::resetLiveOutMap() {
unsigned NumBlocks = MF->getNumBlockIDs();
Seen.clear();
Seen.resize(NumBlocks);
- EntryInfoMap.clear();
+ EntryInfos.clear();
Map.resize(NumBlocks);
}
@@ -283,8 +286,11 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
// Determine if the exit from the block is reached by some def.
unsigned N = WorkList[i];
MachineBasicBlock &B = *MF->getBlockNumbered(N);
- if (Seen[N] && Map[&B].first != nullptr)
- return MarkDefined(B);
+ if (Seen[N]) {
+ const LiveOutPair &LOB = Map[&B];
+ if (LOB.first != nullptr && LOB.first != &UndefVNI)
+ return MarkDefined(B);
+ }
SlotIndex Begin, End;
std::tie(Begin, End) = Indexes->getMBBRange(&B);
// Treat End as not belonging to B.
@@ -365,10 +371,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
#endif
FoundUndef |= MBB->pred_empty();
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *Pred = *PI;
-
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
// Is this a known live-out block?
if (Seen.test(Pred->getNumber())) {
if (VNInfo *VNI = Map[Pred].first) {
@@ -387,7 +390,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
auto EP = LR.extendInBlock(Undefs, Start, End);
VNInfo *VNI = EP.first;
FoundUndef |= EP.second;
- setLiveOutValue(Pred, VNI);
+ setLiveOutValue(Pred, EP.second ? &UndefVNI : VNI);
if (VNI) {
if (TheVNI && TheVNI != VNI)
UniqueVNI = false;
@@ -406,7 +409,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
}
LiveIn.clear();
- FoundUndef |= (TheVNI == nullptr);
+ FoundUndef |= (TheVNI == nullptr || TheVNI == &UndefVNI);
if (Undefs.size() > 0 && FoundUndef)
UniqueVNI = false;
@@ -417,7 +420,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
// If a unique reaching def was found, blit in the live ranges immediately.
if (UniqueVNI) {
- assert(TheVNI != nullptr);
+ assert(TheVNI != nullptr && TheVNI != &UndefVNI);
LiveRangeUpdater Updater(&LR);
for (unsigned BN : WorkList) {
SlotIndex Start, End;
@@ -433,22 +436,26 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
}
// Prepare the defined/undefined bit vectors.
- auto EF = EntryInfoMap.find(&LR);
- if (EF == EntryInfoMap.end()) {
+ EntryInfoMap::iterator Entry;
+ bool DidInsert;
+ std::tie(Entry, DidInsert) = EntryInfos.insert(
+ std::make_pair(&LR, std::make_pair(BitVector(), BitVector())));
+ if (DidInsert) {
+ // Initialize newly inserted entries.
unsigned N = MF->getNumBlockIDs();
- EF = EntryInfoMap.insert({&LR, {BitVector(), BitVector()}}).first;
- EF->second.first.resize(N);
- EF->second.second.resize(N);
+ Entry->second.first.resize(N);
+ Entry->second.second.resize(N);
}
- BitVector &DefOnEntry = EF->second.first;
- BitVector &UndefOnEntry = EF->second.second;
+ BitVector &DefOnEntry = Entry->second.first;
+ BitVector &UndefOnEntry = Entry->second.second;
// Multiple values were found, so transfer the work list to the LiveIn array
// where UpdateSSA will use it as a work list.
LiveIn.reserve(WorkList.size());
for (unsigned BN : WorkList) {
MachineBasicBlock *MBB = MF->getBlockNumbered(BN);
- if (Undefs.size() > 0 && !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
+ if (Undefs.size() > 0 &&
+ !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
continue;
addLiveInBlock(LR, DomTree->getNode(MBB));
if (MBB == &UseMBB)
@@ -466,9 +473,9 @@ void LiveRangeCalc::updateSSA() {
assert(DomTree && "Missing dominator tree");
// Interate until convergence.
- unsigned Changes;
+ bool Changed;
do {
- Changes = 0;
+ Changed = false;
// Propagate live-out values down the dominator tree, inserting phi-defs
// when necessary.
for (LiveInBlock &I : LiveIn) {
@@ -491,15 +498,20 @@ void LiveRangeCalc::updateSSA() {
IDomValue = Map[IDom->getBlock()];
// Cache the DomTree node that defined the value.
- if (IDomValue.first && !IDomValue.second)
+ if (IDomValue.first && IDomValue.first != &UndefVNI &&
+ !IDomValue.second) {
Map[IDom->getBlock()].second = IDomValue.second =
DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def));
+ }
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- LiveOutPair &Value = Map[*PI];
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ LiveOutPair &Value = Map[Pred];
if (!Value.first || Value.first == IDomValue.first)
continue;
+ if (Value.first == &UndefVNI) {
+ needPHI = true;
+ break;
+ }
// Cache the DomTree node that defined the value.
if (!Value.second)
@@ -523,7 +535,7 @@ void LiveRangeCalc::updateSSA() {
// Create a phi-def if required.
if (needPHI) {
- ++Changes;
+ Changed = true;
assert(Alloc && "Need VNInfo allocator to create PHI-defs");
SlotIndex Start, End;
std::tie(Start, End) = Indexes->getMBBRange(MBB);
@@ -542,7 +554,7 @@ void LiveRangeCalc::updateSSA() {
LR.addSegment(LiveInterval::Segment(Start, End, VNI));
LOP = LiveOutPair(VNI, Node);
}
- } else if (IDomValue.first) {
+ } else if (IDomValue.first && IDomValue.first != &UndefVNI) {
// No phi-def here. Remember incoming value.
I.Value = IDomValue.first;
@@ -554,9 +566,9 @@ void LiveRangeCalc::updateSSA() {
// MBB is live-out and doesn't define its own value.
if (LOP.first == IDomValue.first)
continue;
- ++Changes;
+ Changed = true;
LOP = IDomValue;
}
}
- } while (Changes);
+ } while (Changed);
}
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 1a7598f8044a..d41b782d9bdf 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -24,6 +24,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/LiveInterval.h"
@@ -65,7 +66,8 @@ class LiveRangeCalc {
/// registers do not overlap), but the defined/undefined information must
/// be kept separate for each individual range.
/// By convention, EntryInfoMap[&LR] = { Defined, Undefined }.
- std::map<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+ typedef DenseMap<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+ EntryInfoMap EntryInfos;
/// Map each basic block where a live range is live out to the live-out value
/// and its defining block.
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index f58d1f8b83ae..c58d192284dd 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -579,12 +579,12 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB,
//
// is equivalent to
// liveins: %edi, %esi
- bool ExplicitSuccesors = false;
+ bool ExplicitSuccessors = false;
while (true) {
if (Token.is(MIToken::kw_successors)) {
if (parseBasicBlockSuccessors(MBB))
return true;
- ExplicitSuccesors = true;
+ ExplicitSuccessors = true;
} else if (Token.is(MIToken::kw_liveins)) {
if (parseBasicBlockLiveins(MBB))
return true;
@@ -636,7 +636,7 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB,
}
// Construct successor list by searching for basic block machine operands.
- if (!ExplicitSuccesors) {
+ if (!ExplicitSuccessors) {
SmallVector<MachineBasicBlock*,4> Successors;
bool IsFallthrough;
guessSuccessors(MBB, Successors, IsFallthrough);
diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 6b6b5f2814a9..73c3428a6e53 100644
--- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -52,6 +52,14 @@ void MachineOptimizationRemarkEmitter::emit(
computeHotness(OptDiag);
LLVMContext &Ctx = MF.getFunction()->getContext();
+
+ // If a diagnostic has a hotness value, then only emit it if its hotness
+ // meets the threshold.
+ if (OptDiag.getHotness() &&
+ *OptDiag.getHotness() < Ctx.getDiagnosticsHotnessThreshold()) {
+ return;
+ }
+
yaml::Output *Out = Ctx.getDiagnosticsOutputFile();
if (Out) {
auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiagCommon);
@@ -73,7 +81,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction(
MachineFunction &MF) {
MachineBlockFrequencyInfo *MBFI;
- if (MF.getFunction()->getContext().getDiagnosticHotnessRequested())
+ if (MF.getFunction()->getContext().getDiagnosticsHotnessRequested())
MBFI = &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
else
MBFI = nullptr;
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
index 45ea0e4c39ab..5e279b065bbd 100644
--- a/lib/CodeGen/MacroFusion.cpp
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -1,4 +1,4 @@
-//===- MacroFusion.cpp - Macro Fusion ----------------------===//
+//===- MacroFusion.cpp - Macro Fusion -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,8 +13,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#define DEBUG_TYPE "misched"
@@ -26,8 +33,6 @@ using namespace llvm;
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
cl::desc("Enable scheduling for macro fusion."), cl::init(true));
-namespace {
-
static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
SUnit &SecondSU) {
// Create a single weak edge between the adjacent instrs. The only effect is
@@ -66,6 +71,7 @@ static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
++NumFused;
}
+namespace {
/// \brief Post-process the DAG to create cluster edges between instrs that may
/// be fused by the processor into a single operation.
@@ -81,6 +87,8 @@ public:
void apply(ScheduleDAGInstrs *DAGInstrs) override;
};
+} // end anonymous namespace
+
void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
@@ -128,23 +136,18 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
return false;
}
-} // end anonymous namespace
-
-
-namespace llvm {
-
std::unique_ptr<ScheduleDAGMutation>
-createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) {
+llvm::createMacroFusionDAGMutation(
+ ShouldSchedulePredTy shouldScheduleAdjacent) {
if(EnableMacroFusion)
return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
return nullptr;
}
std::unique_ptr<ScheduleDAGMutation>
-createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) {
+llvm::createBranchMacroFusionDAGMutation(
+ ShouldSchedulePredTy shouldScheduleAdjacent) {
if(EnableMacroFusion)
return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
return nullptr;
}
-
-} // end namespace llvm
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index da8fac6d3834..b13f6b68c420 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -76,6 +76,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -119,6 +120,14 @@ static cl::opt<unsigned> RewritePHILimit(
"rewrite-phi-limit", cl::Hidden, cl::init(10),
cl::desc("Limit the length of PHI chains to lookup"));
+// Limit the length of recurrence chain when evaluating the benefit of
+// commuting operands.
+static cl::opt<unsigned> MaxRecurrenceChain(
+ "recurrence-chain-limit", cl::Hidden, cl::init(3),
+ cl::desc("Maximum length of recurrence chain when evaluating the benefit "
+ "of commuting operands"));
+
+
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
@@ -131,12 +140,14 @@ STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
namespace {
class ValueTrackerResult;
+ class RecurrenceInstr;
class PeepholeOptimizer : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
MachineDominatorTree *DT; // Machine dominator tree
+ MachineLoopInfo *MLI;
public:
static char ID; // Pass identification
@@ -150,6 +161,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
if (Aggressive) {
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@@ -160,6 +173,9 @@ namespace {
typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
RewriteMapTy;
+ /// \brief Sequence of instructions that formulate recurrence cycle.
+ typedef SmallVector<RecurrenceInstr, 4> RecurrenceCycle;
+
private:
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
@@ -170,6 +186,7 @@ namespace {
bool optimizeCoalescableCopy(MachineInstr *MI);
bool optimizeUncoalescableCopy(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+ bool optimizeRecurrence(MachineInstr &PHI);
bool findNextSource(unsigned Reg, unsigned SubReg,
RewriteMapTy &RewriteMap);
bool isMoveImmediate(MachineInstr *MI,
@@ -178,6 +195,13 @@ namespace {
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ /// \brief Finds recurrence cycles, but only ones that formulated around
+ /// a def operand and a use operand that are tied. If there is a use
+ /// operand commutable with the tied use operand, find recurrence cycle
+ /// along that operand as well.
+ bool findTargetRecurrence(unsigned Reg,
+ const SmallSet<unsigned, 2> &TargetReg,
+ RecurrenceCycle &RC);
/// \brief If copy instruction \p MI is a virtual register copy, track it in
/// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
@@ -222,6 +246,28 @@ namespace {
}
};
+ /// \brief Helper class to hold instructions that are inside recurrence
+ /// cycles. The recurrence cycle is formulated around 1) a def operand and its
+ /// tied use operand, or 2) a def operand and a use operand that is commutable
+ /// with another use operand which is tied to the def operand. In the latter
+ /// case, index of the tied use operand and the commutable use operand are
+ /// maintained with CommutePair.
+ class RecurrenceInstr {
+ public:
+ typedef std::pair<unsigned, unsigned> IndexPair;
+
+ RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
+ RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
+ : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
+
+ MachineInstr *getMI() const { return MI; }
+ Optional<IndexPair> getCommutePair() const { return CommutePair; }
+
+ private:
+ MachineInstr *MI;
+ Optional<IndexPair> CommutePair;
+ };
+
/// \brief Helper class to hold a reply for ValueTracker queries. Contains the
/// returned sources for a given search and the instructions where the sources
/// were tracked from.
@@ -412,6 +458,7 @@ char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
@@ -1487,6 +1534,113 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
return false;
}
+/// \bried Returns true if \p MO is a virtual register operand.
+static bool isVirtualRegisterOperand(MachineOperand &MO) {
+ if (!MO.isReg())
+ return false;
+ return TargetRegisterInfo::isVirtualRegister(MO.getReg());
+}
+
+bool PeepholeOptimizer::findTargetRecurrence(
+ unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs,
+ RecurrenceCycle &RC) {
+ // Recurrence found if Reg is in TargetRegs.
+ if (TargetRegs.count(Reg))
+ return true;
+
+ // TODO: Curerntly, we only allow the last instruction of the recurrence
+ // cycle (the instruction that feeds the PHI instruction) to have more than
+ // one uses to guarantee that commuting operands does not tie registers
+ // with overlapping live range. Once we have actual live range info of
+ // each register, this constraint can be relaxed.
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ // Give up if the reccurrence chain length is longer than the limit.
+ if (RC.size() >= MaxRecurrenceChain)
+ return false;
+
+ MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
+ unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
+
+ // Only interested in recurrences whose instructions have only one def, which
+ // is a virtual register.
+ if (MI.getDesc().getNumDefs() != 1)
+ return false;
+
+ MachineOperand &DefOp = MI.getOperand(0);
+ if (!isVirtualRegisterOperand(DefOp))
+ return false;
+
+ // Check if def operand of MI is tied to any use operand. We are only
+ // interested in the case that all the instructions in the recurrence chain
+ // have there def operand tied with one of the use operand.
+ unsigned TiedUseIdx;
+ if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
+ return false;
+
+ if (Idx == TiedUseIdx) {
+ RC.push_back(RecurrenceInstr(&MI));
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
+ } else {
+ // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
+ unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
+ RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
+ }
+ }
+
+ return false;
+}
+
+/// \brief Phi instructions will eventually be lowered to copy instructions. If
+/// phi is in a loop header, a recurrence may formulated around the source and
+/// destination of the phi. For such case commuting operands of the instructions
+/// in the recurrence may enable coalescing of the copy instruction generated
+/// from the phi. For example, if there is a recurrence of
+///
+/// LoopHeader:
+/// %vreg1 = phi(%vreg0, %vreg100)
+/// LoopLatch:
+/// %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1
+///
+/// , the fact that vreg0 and vreg2 are in the same tied operands set makes
+/// the coalescing of copy instruction generated from the phi in
+/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and
+/// %vreg2 have overlapping live range. This introduces additional move
+/// instruction to the final assembly. However, if we commute %vreg2 and
+/// %vreg1 of ADD instruction, the redundant move instruction can be
+/// avoided.
+bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
+ SmallSet<unsigned, 2> TargetRegs;
+ for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
+ MachineOperand &MO = PHI.getOperand(Idx);
+ assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
+ TargetRegs.insert(MO.getReg());
+ }
+
+ bool Changed = false;
+ RecurrenceCycle RC;
+ if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
+ // Commutes operands of instructions in RC if necessary so that the copy to
+ // be generated from PHI can be coalesced.
+ DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
+ for (auto &RI : RC) {
+ DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
+ auto CP = RI.getCommutePair();
+ if (CP) {
+ Changed = true;
+ TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
+ (*CP).second);
+ DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
+ }
+ }
+ }
+
+ return Changed;
+}
+
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@@ -1501,6 +1655,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
+ MLI = &getAnalysis<MachineLoopInfo>();
bool Changed = false;
@@ -1529,6 +1684,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SmallSet<unsigned, 4> CopySrcRegs;
DenseMap<unsigned, MachineInstr *> CopySrcMIs;
+ bool IsLoopHeader = MLI->isLoopHeader(&MBB);
+
for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
MII != MIE; ) {
MachineInstr *MI = &*MII;
@@ -1540,9 +1697,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->isDebugValue())
continue;
- if (MI->isPosition() || MI->isPHI())
+ if (MI->isPosition())
continue;
+ if (IsLoopHeader && MI->isPHI()) {
+ if (optimizeRecurrence(*MI)) {
+ Changed = true;
+ continue;
+ }
+ }
+
if (!MI->isCopy()) {
for (const auto &Op : MI->operands()) {
// Visit all operands: definitions can be implicit or explicit.
@@ -1667,7 +1831,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MRI->markUsesInDebugValueAsUndef(FoldedReg);
FoldAsLoadDefCandidates.erase(FoldedReg);
++NumLoadFold;
-
+
// MI is replaced with FoldMI so we can continue trying to fold
Changed = true;
MI = FoldMI;
@@ -1675,7 +1839,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
}
}
}
-
+
// If we run into an instruction we can't fold across, discard
// the load candidates. Note: We might be able to fold *into* this
// instruction, so this needs to be after the folding logic.
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 50d241bff23d..9562652556ac 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -2622,7 +2622,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
}
// If we couldn't allocate a register from spilling, there is probably some
- // invalid inline assembly. The base class wil report it.
+ // invalid inline assembly. The base class will report it.
if (Stage >= RS_Done || !VirtReg.isSpillable())
return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
Depth);
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 7b3a5d5c5ff7..ff9bca092dbe 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -979,6 +979,11 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
for (LiveInterval::SubRange &SR : IntB.subranges())
SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
+
+ // If the newly created Instruction has an address of an instruction that was
+ // deleted before (object recycled by the allocator) it needs to be removed from
+ // the deleted list.
+ ErasedInstrs.erase(NewCopyMI);
} else {
DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#"
<< MBB.getNumber() << '\t' << CopyMI);
@@ -989,6 +994,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
// While updating the live-ranges, we only look at slot indices and
// never go back to the instruction.
LIS->RemoveMachineInstrFromMaps(CopyMI);
+ // Mark instructions as deleted.
+ ErasedInstrs.insert(&CopyMI);
CopyMI.eraseFromParent();
// Update the liveness.
@@ -3095,7 +3102,7 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
continue;
// Skip instruction pointers that have already been erased, for example by
// dead code elimination.
- if (ErasedInstrs.erase(CurrList[i])) {
+ if (ErasedInstrs.count(CurrList[i])) {
CurrList[i] = nullptr;
continue;
}
diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp
index d2eff950d861..bd5ecbd28f29 100644
--- a/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -243,10 +243,14 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
unsigned VReg = Intervals[ID]->reg;
MO.setReg(VReg);
- if (MO.isTied()) {
+
+ if (MO.isTied() && Reg != VReg) {
/// Undef use operands are not tracked in the equivalence class but need
/// to be update if they are tied.
MO.getParent()->substituteRegister(Reg, VReg, 0, TRI);
+
+ // substituteRegister breaks the iterator, so restart.
+ I = MRI->reg_nodbg_begin(Reg);
}
}
// TODO: We could attempt to recompute new register classes while visiting
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 7dd66d799be4..0f70b0e9ca07 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1089,7 +1089,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
// Things that are available after the instruction are killed by it.
bool IsKill = LiveRegs.available(MRI, Reg);
MO.setIsKill(IsKill);
- if (IsKill && addToLiveRegs)
+ if (addToLiveRegs)
LiveRegs.addReg(Reg);
}
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d02dcb6f4439..d901af727686 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4915,7 +4915,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
// Loads must share the same base address
- BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr());
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
int64_t ByteOffsetFromBase = 0;
if (!Base)
Base = Ptr;
@@ -8210,18 +8210,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
- if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t Amt = CAmt->getZExtValue();
- unsigned Size = VT.getScalarSizeInBits();
-
- if (Amt < Size) {
- SDLoc SL(N);
- EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Amt = N0.getOperand(1);
+ KnownBits Known;
+ DAG.computeKnownBits(Amt, Known);
+ unsigned Size = VT.getScalarSizeInBits();
+ if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ SDLoc SL(N);
+ EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
- return DAG.getNode(ISD::SHL, SL, VT, Trunc,
- DAG.getConstant(Amt, SL, AmtVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ if (AmtVT != Amt.getValueType()) {
+ Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
+ AddToWorklist(Amt.getNode());
}
+ return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
}
}
@@ -9751,6 +9753,52 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
}
+ // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
+ // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
+ if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
+ (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
+ TLI.isOperationLegal(ISD::FABS, VT)) {
+ SDValue Select = N0, X = N1;
+ if (Select.getOpcode() != ISD::SELECT)
+ std::swap(Select, X);
+
+ SDValue Cond = Select.getOperand(0);
+ auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
+ auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
+
+ if (TrueOpnd && FalseOpnd &&
+ Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
+ isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
+ cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ std::swap(TrueOpnd, FalseOpnd);
+ // Fall through
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
+ TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT,
+ DAG.getNode(ISD::FABS, DL, VT, X));
+ if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
+ return DAG.getNode(ISD::FABS, DL, VT, X);
+
+ break;
+ }
+ }
+ }
+
// FMUL -> FMA combines:
if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
AddToWorklist(Fused.getNode());
@@ -12394,7 +12442,7 @@ void DAGCombiner::getStoreMergeCandidates(
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
EVT MemVT = St->getMemoryVT();
// We must have a base and an offset.
@@ -12414,8 +12462,8 @@ void DAGCombiner::getStoreMergeCandidates(
BaseIndexOffset LBasePtr;
// Match on loadbaseptr if relevant.
if (IsLoadSrc)
- LBasePtr =
- BaseIndexOffset::match(cast<LoadSDNode>(St->getValue())->getBasePtr());
+ LBasePtr = BaseIndexOffset::match(
+ cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
@@ -12429,7 +12477,7 @@ void DAGCombiner::getStoreMergeCandidates(
if (IsLoadSrc) {
// The Load's Base Ptr must also match
if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
- auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr());
+ auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
} else
@@ -12443,7 +12491,7 @@ void DAGCombiner::getStoreMergeCandidates(
if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
return false;
- Ptr = BaseIndexOffset::match(Other->getBasePtr());
+ Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
// We looking for a root node which is an ancestor to all mergable
@@ -12786,7 +12834,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (Ld->getMemoryVT() != MemVT)
break;
- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
// If this is not the first ptr that we check.
int64_t LdOffset = 0;
if (LdBasePtr.getBase().getNode()) {
@@ -12829,6 +12877,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// This variable refers to the size and not index in the array.
unsigned LastLegalVectorType = 1;
unsigned LastLegalIntegerType = 1;
+ bool isDereferenceable = true;
bool DoIntegerTruncate = false;
StartAddress = LoadNodes[0].OffsetFromBase;
SDValue FirstChain = FirstLoad->getChain();
@@ -12841,6 +12890,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
LastConsecutiveLoad = i;
+
+ if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
+ isDereferenceable = false;
+
// Find a legal type for the vector store.
EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
bool IsFastSt, IsFastLd;
@@ -12926,11 +12979,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
AddToWorklist(NewStoreChain.getNode());
+ MachineMemOperand::Flags MMOFlags = isDereferenceable ?
+ MachineMemOperand::MODereferenceable:
+ MachineMemOperand::MONone;
+
SDValue NewLoad, NewStore;
if (UseVectorTy || !DoIntegerTruncate) {
NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), FirstLoadAlign);
+ FirstLoad->getPointerInfo(), FirstLoadAlign,
+ MMOFlags);
NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), FirstStoreAlign);
@@ -12940,7 +12998,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
NewLoad =
DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
- JointMemOpVT, FirstLoadAlign);
+ JointMemOpVT, FirstLoadAlign, MMOFlags);
NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), JointMemOpVT,
@@ -15013,6 +15071,11 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
unsigned NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
+ unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
+
+ if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
+ return SDValue();
+ unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
// (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
// (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
@@ -15034,11 +15097,10 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
if (EltSizeInBits != ExtSrcSizeInBits)
return SDValue();
- // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for
- // power-of-2 truncations as they are the most likely.
- for (unsigned Scale = 2; Scale < NumElts; Scale *= 2)
- if (isTruncate(Scale))
- return DAG.getBitcast(VT, N00);
+ // We can remove *extend_vector_inreg only if the truncation happens at
+ // the same scale as the extension.
+ if (isTruncate(ExtScale))
+ return DAG.getBitcast(VT, N00);
return SDValue();
}
@@ -16540,8 +16602,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
// Check for BaseIndexOffset matching.
- BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr());
- BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr());
+ BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
int64_t PtrDiff;
if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
@@ -16751,7 +16813,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
@@ -16777,7 +16839,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
// Check that the base pointer is the same as the original one.
if (!BasePtr.equalBaseIndex(Ptr, DAG))
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 75fec7bd1d48..ac3247948169 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1827,11 +1827,10 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
ISD::UADDO : ISD::USUBO,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
- TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
-
if (hasOVF) {
EVT OvfVT = getSetCCResultType(NVT);
SDVTList VTList = DAG.getVTList(NVT, OvfVT);
+ TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
int RevOpc;
if (N->getOpcode() == ISD::ADD) {
RevOpc = ISD::SUB;
@@ -1864,13 +1863,6 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
ISD::SETULT);
-
- if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) {
- SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
- return;
- }
-
SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
DAG.getConstant(1, dl, NVT),
DAG.getConstant(0, dl, NVT));
@@ -1885,14 +1877,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
SDValue Cmp =
DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
LoOps[0], LoOps[1], ISD::SETULT);
-
- SDValue Borrow;
- if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
- Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT);
- else
- Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
- DAG.getConstant(0, dl, NVT));
-
+ SDValue Borrow = DAG.getSelect(dl, NVT, Cmp,
+ DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
}
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index d2e0dbbf88ec..4e899ae6668e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -18,28 +19,41 @@ namespace llvm {
bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
const SelectionDAG &DAG, int64_t &Off) {
- // Obvious equivalent
+ // Initial Offset difference.
Off = Other.Offset - Offset;
- if (Other.Base == Base && Other.Index == Index &&
- Other.IsIndexSignExt == IsIndexSignExt)
- return true;
- // Match GlobalAddresses
- if (Index == Other.Index)
- if (GlobalAddressSDNode *A = dyn_cast<GlobalAddressSDNode>(Base))
- if (GlobalAddressSDNode *B = dyn_cast<GlobalAddressSDNode>(Other.Base))
+ if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) {
+ // Trivial match.
+ if (Other.Base == Base)
+ return true;
+
+ // Match GlobalAddresses
+ if (auto *A = dyn_cast<GlobalAddressSDNode>(Base))
+ if (auto *B = dyn_cast<GlobalAddressSDNode>(Other.Base))
if (A->getGlobal() == B->getGlobal()) {
Off += B->getOffset() - A->getOffset();
return true;
}
- // TODO: we should be able to add FrameIndex analysis improvements here.
+ const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ // Match non-equal FrameIndexes - a FrameIndex stemming from an
+ // alloca will not have it's ObjectOffset set until post-DAG and
+ // as such we must assume the two framesIndices are incomparable.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(Base))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base))
+ if (!MFI.getObjectAllocation(A->getIndex()) &&
+ !MFI.getObjectAllocation(B->getIndex())) {
+ Off += MFI.getObjectOffset(B->getIndex()) -
+ MFI.getObjectOffset(A->getIndex());
+ return true;
+ }
+ }
return false;
}
/// Parses tree in Ptr for base, index, offset addresses.
-BaseIndexOffset BaseIndexOffset::match(SDValue Ptr) {
+BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
// (((B + I*M) + c)) + c ...
SDValue Base = Ptr;
SDValue Index = SDValue();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f9f431db55be..acf68fbbdedf 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3375,7 +3375,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue IdxN = getValue(Idx);
if (!IdxN.getValueType().isVector() && VectorWidth) {
- MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
+ EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
}
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index b1918b19e1df..817e58ce59e1 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -1,4 +1,4 @@
-//===-- TargetPassConfig.cpp - Target independent code generation passes --===//
+//===- TargetPassConfig.cpp - Target independent code generation passes ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,29 +13,37 @@
//===---------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetPassConfig.h"
-
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterUsageInfo.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Threading.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include <cassert>
+#include <string>
using namespace llvm;
@@ -225,6 +233,7 @@ char TargetPassConfig::EarlyTailDuplicateID = 0;
char TargetPassConfig::PostRAMachineLICMID = 0;
namespace {
+
struct InsertedPass {
AnalysisID TargetPassID;
IdentifyingPassPtr InsertedPassID;
@@ -245,9 +254,11 @@ struct InsertedPass {
return NP;
}
};
-}
+
+} // end anonymous namespace
namespace llvm {
+
class PassConfigImpl {
public:
// List of passes explicitly substituted by this target. Normally this is
@@ -263,7 +274,8 @@ public:
/// is inserted after each instance of the first one.
SmallVector<InsertedPass, 4> InsertedPasses;
};
-} // namespace llvm
+
+} // end namespace llvm
// Out of line virtual method.
TargetPassConfig::~TargetPassConfig() {
@@ -273,11 +285,7 @@ TargetPassConfig::~TargetPassConfig() {
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
- : ImmutablePass(ID), PM(&pm), Started(true), Stopped(false),
- AddingMachinePasses(false), TM(&TM), Impl(nullptr), Initialized(false),
- DisableVerify(false), EnableTailMerge(true),
- RequireCodeGenSCCOrder(false) {
-
+ : ImmutablePass(ID), PM(&pm), TM(&TM) {
Impl = new PassConfigImpl();
// Register all target independent codegen passes to activate their PassIDs,
@@ -325,7 +333,7 @@ TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetPassConfig::TargetPassConfig()
- : ImmutablePass(ID), PM(nullptr) {
+ : ImmutablePass(ID) {
report_fatal_error("Trying to construct TargetPassConfig without a target "
"machine. Scheduling a CodeGen pass without a target "
"triple set?");
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 552a89f76ca2..83c00e24d14f 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -68,6 +68,13 @@ EnableRescheduling("twoaddr-reschedule",
cl::desc("Coalesce copies by rescheduling (default=true)"),
cl::init(true), cl::Hidden);
+// Limit the number of dataflow edges to traverse when evaluating the benefit
+// of commuting operands.
+static cl::opt<unsigned> MaxDataFlowEdge(
+ "dataflow-edge-limit", cl::Hidden, cl::init(3),
+ cl::desc("Maximum number of dataflow edges to traverse when evaluating "
+ "the benefit of commuting operands"));
+
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
MachineFunction *MF;
@@ -637,10 +644,10 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// To more generally minimize register copies, ideally the logic of two addr
// instruction pass should be integrated with register allocation pass where
// interference graph is available.
- if (isRevCopyChain(regC, regA, 3))
+ if (isRevCopyChain(regC, regA, MaxDataFlowEdge))
return true;
- if (isRevCopyChain(regB, regA, 3))
+ if (isRevCopyChain(regB, regA, MaxDataFlowEdge))
return false;
// Since there are no intervening uses for both registers, then commute
diff --git a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
index d058f4864975..e0c7ef58c304 100644
--- a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
@@ -29,10 +29,8 @@ static Error visitKnownRecord(CVSymbol &Record,
return Error::success();
}
-Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record) {
- if (auto EC = Callbacks.visitSymbolBegin(Record))
- return EC;
-
+static Error finishVisitation(CVSymbol &Record,
+ SymbolVisitorCallbacks &Callbacks) {
switch (Record.Type) {
default:
if (auto EC = Callbacks.visitUnknownSymbol(Record))
@@ -55,6 +53,18 @@ Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record) {
return Error::success();
}
+Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record) {
+ if (auto EC = Callbacks.visitSymbolBegin(Record))
+ return EC;
+ return finishVisitation(Record, Callbacks);
+}
+
+Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record, uint32_t Offset) {
+ if (auto EC = Callbacks.visitSymbolBegin(Record, Offset))
+ return EC;
+ return finishVisitation(Record, Callbacks);
+}
+
Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols) {
for (auto I : Symbols) {
if (auto EC = visitSymbolRecord(I))
@@ -62,3 +72,13 @@ Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols) {
}
return Error::success();
}
+
+Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols,
+ uint32_t InitialOffset) {
+ for (auto I : Symbols) {
+ if (auto EC = visitSymbolRecord(I, InitialOffset))
+ return EC;
+ InitialOffset += I.length();
+ }
+ return Error::success();
+}
diff --git a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
index c31b8d1c96d5..ccc20eb74887 100644
--- a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugChecksumsSubsection.cpp ----------------------*- C++ -*-===//
+//===- DebugChecksumsSubsection.cpp ---------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,10 +8,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
-
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <cstring>
using namespace llvm;
using namespace llvm::codeview;
@@ -25,7 +32,7 @@ struct FileChecksumEntryHeader {
// Checksum bytes follow.
};
-Error llvm::VarStreamArrayExtractor<FileChecksumEntry>::
+Error VarStreamArrayExtractor<FileChecksumEntry>::
operator()(BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) {
BinaryStreamReader Reader(Stream);
@@ -48,6 +55,7 @@ Error DebugChecksumsSubsectionRef::initialize(BinaryStreamReader Reader) {
return Error::success();
}
+
Error DebugChecksumsSubsectionRef::initialize(BinaryStreamRef Section) {
BinaryStreamReader Reader(Section);
return initialize(Reader);
diff --git a/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
index 21e2cc56075b..cef27787cfd1 100644
--- a/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugCrossExSubsection.cpp -------------------------------*- C++ -*-===//
+//===- DebugCrossExSubsection.cpp -----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,8 +8,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
-
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
using namespace llvm;
using namespace llvm::codeview;
diff --git a/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
index 2c4a0b779342..88c0076915b5 100644
--- a/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugCrossImpSubsection.cpp ------------------------------*- C++ -*-===//
+//===- DebugCrossImpSubsection.cpp ----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,14 +8,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h"
-
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cstdint>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace llvm::codeview;
-namespace llvm {
Error VarStreamArrayExtractor<CrossModuleImportItem>::
operator()(BinaryStreamRef Stream, uint32_t &Len,
codeview::CrossModuleImportItem &Item) {
@@ -34,7 +41,6 @@ operator()(BinaryStreamRef Stream, uint32_t &Len,
return EC;
return Error::success();
}
-}
Error DebugCrossModuleImportsSubsectionRef::initialize(
BinaryStreamReader Reader) {
diff --git a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
index e7719d05dbdc..077c103a615b 100644
--- a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugInlineeLinesSubsection.cpp ------------------------*- C++-*-===//
+//===- DebugInlineeLinesSubsection.cpp ------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,11 +8,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
-
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
using namespace llvm::codeview;
diff --git a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
index fbcad61d60a6..57ad40819fbc 100644
--- a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugLinesSubsection.cpp -------------------------------*- C++-*-===//
+//===- DebugLinesSubsection.cpp -------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,18 +8,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
-
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Error.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
using namespace llvm::codeview;
Error LineColumnExtractor::operator()(BinaryStreamRef Stream, uint32_t &Len,
LineColumnEntry &Item) {
- using namespace codeview;
const LineBlockFragmentHeader *BlockHeader;
BinaryStreamReader Reader(Stream);
if (auto EC = Reader.readObject(BlockHeader))
diff --git a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
index de02525270c4..d723282eb715 100644
--- a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugStringTableSubsection.cpp - CodeView String Table ---*- C++ -*-===//
+//===- DebugStringTableSubsection.cpp - CodeView String Table -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,10 +8,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
-
-#include "llvm/Support/BinaryStream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
using namespace llvm::codeview;
@@ -23,6 +27,7 @@ Error DebugStringTableSubsectionRef::initialize(BinaryStreamRef Contents) {
Stream = Contents;
return Error::success();
}
+
Error DebugStringTableSubsectionRef::initialize(BinaryStreamReader &Reader) {
return Reader.readStreamRef(Stream);
}
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
index d69eca018e0c..55f343c11e7f 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
@@ -1,4 +1,4 @@
-//===- DebugSubsectionRecord.cpp -----------------------------*- C++-*-===//
+//===- DebugSubsectionRecord.cpp ------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,16 +8,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
-
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
using namespace llvm::codeview;
-DebugSubsectionRecord::DebugSubsectionRecord()
- : Container(CodeViewContainer::ObjectFile),
- Kind(DebugSubsectionKind::None) {}
+DebugSubsectionRecord::DebugSubsectionRecord() = default;
DebugSubsectionRecord::DebugSubsectionRecord(DebugSubsectionKind Kind,
BinaryStreamRef Data,
diff --git a/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
index 5f91b68f3ad8..60fbf9d747b2 100644
--- a/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
@@ -1,4 +1,4 @@
-//===- DebugSymbolRVASubsection.cpp ------------------------------*- C++-*-===//
+//===- DebugSymbolRVASubsection.cpp ---------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,6 +8,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include <cstdint>
using namespace llvm;
using namespace llvm::codeview;
diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp
index ec00af28395e..4cfb55a31b35 100644
--- a/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -1,4 +1,4 @@
-//===- EnumTables.cpp - Enum to string conversion tables --------*- C++ -*-===//
+//===- EnumTables.cpp - Enum to string conversion tables ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,6 +8,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include <type_traits>
using namespace llvm;
using namespace codeview;
@@ -333,6 +335,7 @@ static const EnumEntry<COFF::SectionCharacteristics>
namespace llvm {
namespace codeview {
+
ArrayRef<EnumEntry<SymbolKind>> getSymbolTypeNames() {
return makeArrayRef(SymbolTypeNames);
}
@@ -348,48 +351,63 @@ ArrayRef<EnumEntry<uint16_t>> getRegisterNames() {
ArrayRef<EnumEntry<uint32_t>> getPublicSymFlagNames() {
return makeArrayRef(PublicSymFlagNames);
}
+
ArrayRef<EnumEntry<uint8_t>> getProcSymFlagNames() {
return makeArrayRef(ProcSymFlagNames);
}
+
ArrayRef<EnumEntry<uint16_t>> getLocalFlagNames() {
return makeArrayRef(LocalFlags);
}
+
ArrayRef<EnumEntry<uint8_t>> getFrameCookieKindNames() {
return makeArrayRef(FrameCookieKinds);
}
+
ArrayRef<EnumEntry<SourceLanguage>> getSourceLanguageNames() {
return makeArrayRef(SourceLanguages);
}
+
ArrayRef<EnumEntry<uint32_t>> getCompileSym2FlagNames() {
return makeArrayRef(CompileSym2FlagNames);
}
+
ArrayRef<EnumEntry<uint32_t>> getCompileSym3FlagNames() {
return makeArrayRef(CompileSym3FlagNames);
}
+
ArrayRef<EnumEntry<uint32_t>> getFileChecksumNames() {
return makeArrayRef(FileChecksumNames);
}
+
ArrayRef<EnumEntry<unsigned>> getCPUTypeNames() {
return makeArrayRef(CPUTypeNames);
}
+
ArrayRef<EnumEntry<uint32_t>> getFrameProcSymFlagNames() {
return makeArrayRef(FrameProcSymFlagNames);
}
+
ArrayRef<EnumEntry<uint16_t>> getExportSymFlagNames() {
return makeArrayRef(ExportSymFlagNames);
}
+
ArrayRef<EnumEntry<uint32_t>> getModuleSubstreamKindNames() {
return makeArrayRef(ModuleSubstreamKindNames);
}
+
ArrayRef<EnumEntry<uint8_t>> getThunkOrdinalNames() {
return makeArrayRef(ThunkOrdinalNames);
}
+
ArrayRef<EnumEntry<uint16_t>> getTrampolineNames() {
return makeArrayRef(TrampolineNames);
}
+
ArrayRef<EnumEntry<COFF::SectionCharacteristics>>
getImageSectionCharacteristicNames() {
return makeArrayRef(ImageSectionCharacteristicNames);
}
-}
-}
+
+} // end namespace codeview
+} // end namespace llvm
diff --git a/lib/DebugInfo/CodeView/Formatters.cpp b/lib/DebugInfo/CodeView/Formatters.cpp
index ef00bd8570fa..1fa8d219d6ac 100644
--- a/lib/DebugInfo/CodeView/Formatters.cpp
+++ b/lib/DebugInfo/CodeView/Formatters.cpp
@@ -1,4 +1,4 @@
-//===- Formatters.cpp -------------------------------------------*- C++ -*-===//
+//===- Formatters.cpp -----------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,6 +8,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/Formatters.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
using namespace llvm;
using namespace llvm::codeview;
@@ -19,7 +23,7 @@ GuidAdapter::GuidAdapter(StringRef Guid)
GuidAdapter::GuidAdapter(ArrayRef<uint8_t> Guid)
: FormatAdapter(std::move(Guid)) {}
-void GuidAdapter::format(llvm::raw_ostream &Stream, StringRef Style) {
+void GuidAdapter::format(raw_ostream &Stream, StringRef Style) {
static const char *Lookup = "0123456789ABCDEF";
assert(Item.size() == 16 && "Expected 16-byte GUID");
diff --git a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
index 20f7e72c3af3..5aaf3f1453a8 100644
--- a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
+++ b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
@@ -1,4 +1,4 @@
-//===- LazyRandomTypeCollection.cpp ---------------------------- *- C++--*-===//
+//===- LazyRandomTypeCollection.cpp ---------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,12 +8,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
-
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/TypeName.h"
-#include "llvm/DebugInfo/CodeView/TypeServerHandler.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
using namespace llvm::codeview;
diff --git a/lib/DebugInfo/CodeView/StringsAndChecksums.cpp b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
index 928bf8c94f73..306af1d1ef6b 100644
--- a/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
+++ b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
@@ -1,4 +1,4 @@
-//===- StringsAndChecksums.cpp ----------------------------------*- C++ -*-===//
+//===- StringsAndChecksums.cpp --------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,14 +8,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/Support/Error.h"
+#include <cassert>
using namespace llvm;
using namespace llvm::codeview;
-StringsAndChecksumsRef::StringsAndChecksumsRef() {}
+StringsAndChecksumsRef::StringsAndChecksumsRef() = default;
StringsAndChecksumsRef::StringsAndChecksumsRef(
const DebugStringTableSubsectionRef &Strings)
diff --git a/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
index 9f2d619d1a1c..9a2e776feb75 100644
--- a/lib/DebugInfo/CodeView/SymbolSerializer.cpp
+++ b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
@@ -1,4 +1,4 @@
-//===- SymbolSerializer.cpp -------------------------------------*- C++ -*-===//
+//===- SymbolSerializer.cpp -----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,6 +8,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <cassert>
+#include <cstdint>
+#include <cstring>
using namespace llvm;
using namespace llvm::codeview;
@@ -15,7 +22,7 @@ using namespace llvm::codeview;
SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator,
CodeViewContainer Container)
: Storage(Allocator), RecordBuffer(MaxRecordLength),
- Stream(RecordBuffer, llvm::support::little), Writer(Stream),
+ Stream(RecordBuffer, support::little), Writer(Stream),
Mapping(Writer, Container) {}
Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) {
diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
index 1226d5be3f3c..72cb9e2e3544 100644
--- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
+++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
@@ -438,6 +438,25 @@ void llvm::codeview::discoverTypeIndices(const CVType &Type,
::discoverTypeIndices(Type.content(), Type.kind(), Refs);
}
+void llvm::codeview::discoverTypeIndices(const CVType &Type,
+ SmallVectorImpl<TypeIndex> &Indices) {
+
+ Indices.clear();
+
+ SmallVector<TiReference, 4> Refs;
+ discoverTypeIndices(Type, Refs);
+ if (Refs.empty())
+ return;
+
+ BinaryStreamReader Reader(Type.content(), support::little);
+ for (const auto &Ref : Refs) {
+ Reader.setOffset(Ref.Offset);
+ FixedStreamArray<TypeIndex> Run;
+ cantFail(Reader.readArray(Run, Ref.Count));
+ Indices.append(Run.begin(), Run.end());
+ }
+}
+
void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData,
SmallVectorImpl<TiReference> &Refs) {
const RecordPrefix *P =
diff --git a/lib/DebugInfo/CodeView/TypeSerializer.cpp b/lib/DebugInfo/CodeView/TypeSerializer.cpp
index 93c1198e36ce..003c13b4a20d 100644
--- a/lib/DebugInfo/CodeView/TypeSerializer.cpp
+++ b/lib/DebugInfo/CodeView/TypeSerializer.cpp
@@ -1,4 +1,4 @@
-//===- TypeSerialzier.cpp ---------------------------------------*- C++ -*-===//
+//===- TypeSerialzier.cpp -------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,16 +8,27 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/TypeSerializer.h"
-
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
-
-#include <string.h>
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
using namespace llvm;
using namespace llvm::codeview;
namespace {
+
struct HashedType {
uint64_t Hash;
const uint8_t *Data;
@@ -30,20 +41,26 @@ struct HashedType {
struct HashedTypePtr {
HashedTypePtr() = default;
HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {}
+
HashedType *Ptr = nullptr;
};
-} // namespace
+
+} // end anonymous namespace
namespace llvm {
+
template <> struct DenseMapInfo<HashedTypePtr> {
static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); }
+
static inline HashedTypePtr getTombstoneKey() {
return HashedTypePtr(reinterpret_cast<HashedType *>(1));
}
+
static unsigned getHashValue(HashedTypePtr Val) {
assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr);
return Val.Ptr->Hash;
}
+
static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) {
HashedType *LHS = LHSP.Ptr;
HashedType *RHS = RHSP.Ptr;
@@ -54,7 +71,8 @@ template <> struct DenseMapInfo<HashedTypePtr> {
return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0;
}
};
-}
+
+} // end namespace llvm
/// Private implementation so that we don't leak our DenseMap instantiations to
/// users.
@@ -159,13 +177,13 @@ TypeSerializer::addPadding(MutableArrayRef<uint8_t> Record) {
TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage, bool Hash)
: RecordStorage(Storage), RecordBuffer(MaxRecordLength * 2),
- Stream(RecordBuffer, llvm::support::little), Writer(Stream),
+ Stream(RecordBuffer, support::little), Writer(Stream),
Mapping(Writer) {
// RecordBuffer needs to be able to hold enough data so that if we are 1
// byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes,
// we won't overflow.
if (Hash)
- Hasher = make_unique<TypeHasher>(Storage);
+ Hasher = llvm::make_unique<TypeHasher>(Storage);
}
TypeSerializer::~TypeSerializer() = default;
@@ -331,7 +349,7 @@ Error TypeSerializer::visitMemberEnd(CVMemberRecord &Record) {
uint8_t *SegmentBytes = RecordStorage.Allocate<uint8_t>(LengthWithSize);
auto SavedSegment = MutableArrayRef<uint8_t>(SegmentBytes, LengthWithSize);
- MutableBinaryByteStream CS(SavedSegment, llvm::support::little);
+ MutableBinaryByteStream CS(SavedSegment, support::little);
BinaryStreamWriter CW(CS);
if (auto EC = CW.writeBytes(CopyData))
return EC;
diff --git a/lib/DebugInfo/DWARF/CMakeLists.txt b/lib/DebugInfo/DWARF/CMakeLists.txt
index 6ca6e64bd8e6..11f94509e8fa 100644
--- a/lib/DebugInfo/DWARF/CMakeLists.txt
+++ b/lib/DebugInfo/DWARF/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMDebugInfoDWARF
DWARFAcceleratorTable.cpp
DWARFCompileUnit.cpp
DWARFContext.cpp
+ DWARFDataExtractor.cpp
DWARFDebugAbbrev.cpp
DWARFDebugArangeSet.cpp
DWARFDebugAranges.cpp
diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 87009bf1b6a1..9ae7c9a07f76 100644
--- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -62,6 +62,45 @@ uint32_t DWARFAcceleratorTable::getHeaderDataLength() {
return Hdr.HeaderDataLength;
}
+ArrayRef<std::pair<DWARFAcceleratorTable::HeaderData::AtomType,
+ DWARFAcceleratorTable::HeaderData::Form>>
+DWARFAcceleratorTable::getAtomsDesc() {
+ return HdrData.Atoms;
+}
+
+bool DWARFAcceleratorTable::validateForms() {
+ for (auto Atom : getAtomsDesc()) {
+ DWARFFormValue FormValue(Atom.second);
+ switch (Atom.first) {
+ case dwarf::DW_ATOM_die_offset:
+ if ((!FormValue.isFormClass(DWARFFormValue::FC_Constant) &&
+ !FormValue.isFormClass(DWARFFormValue::FC_Flag)) ||
+ FormValue.getForm() == dwarf::DW_FORM_sdata)
+ return false;
+ default:
+ break;
+ }
+ }
+ return true;
+}
+
+uint32_t DWARFAcceleratorTable::readAtoms(uint32_t &HashDataOffset) {
+ uint32_t DieOffset = dwarf::DW_INVALID_OFFSET;
+
+ for (auto Atom : getAtomsDesc()) {
+ DWARFFormValue FormValue(Atom.second);
+ FormValue.extractValue(AccelSection, &HashDataOffset, NULL);
+ switch (Atom.first) {
+ case dwarf::DW_ATOM_die_offset:
+ DieOffset = *FormValue.getAsUnsignedConstant();
+ break;
+ default:
+ break;
+ }
+ }
+ return DieOffset;
+}
+
LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
// Dump the header.
OS << "Magic = " << format("0x%08x", Hdr.Magic) << '\n'
@@ -121,8 +160,7 @@ LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
continue;
}
while (AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) {
- unsigned StringOffset =
- getRelocatedValue(AccelSection, 4, &DataOffset, &Relocs);
+ unsigned StringOffset = AccelSection.getRelocatedValue(4, &DataOffset);
if (!StringOffset)
break;
OS << format(" Name: %08x \"%s\"\n", StringOffset,
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 381479461750..a18d4efec07a 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -59,26 +59,13 @@ using DWARFLineTable = DWARFDebugLine::LineTable;
using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind;
using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind;
-uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size,
- uint32_t *Off, const RelocAddrMap *Relocs,
- uint64_t *SectionIndex) {
- if (!Relocs)
- return Data.getUnsigned(Off, Size);
- RelocAddrMap::const_iterator AI = Relocs->find(*Off);
- if (AI == Relocs->end())
- return Data.getUnsigned(Off, Size);
- if (SectionIndex)
- *SectionIndex = AI->second.SectionIndex;
- return Data.getUnsigned(Off, Size) + AI->second.Value;
-}
-
static void dumpAccelSection(raw_ostream &OS, StringRef Name,
const DWARFSection& Section, StringRef StringSection,
bool LittleEndian) {
- DataExtractor AccelSection(Section.Data, LittleEndian, 0);
+ DWARFDataExtractor AccelSection(Section, LittleEndian, 0);
DataExtractor StrData(StringSection, LittleEndian, 0);
OS << "\n." << Name << " contents:\n";
- DWARFAcceleratorTable Accel(AccelSection, StrData, Section.Relocs);
+ DWARFAcceleratorTable Accel(AccelSection, StrData);
if (!Accel.extract())
return;
Accel.dump(OS);
@@ -88,7 +75,7 @@ static void
dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName,
const DWARFSection &StringOffsetsSection,
StringRef StringSection, bool LittleEndian) {
- DataExtractor StrOffsetExt(StringOffsetsSection.Data, LittleEndian, 0);
+ DWARFDataExtractor StrOffsetExt(StringOffsetsSection, LittleEndian, 0);
uint32_t Offset = 0;
uint64_t SectionSize = StringOffsetsSection.Data.size();
@@ -144,8 +131,8 @@ dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName,
while (Offset - ContributionBase < ContributionSize) {
OS << format("0x%8.8x: ", Offset);
// FIXME: We can only extract strings in DWARF32 format at the moment.
- uint64_t StringOffset = getRelocatedValue(
- StrOffsetExt, EntrySize, &Offset, &StringOffsetsSection.Relocs);
+ uint64_t StringOffset =
+ StrOffsetExt.getRelocatedValue(EntrySize, &Offset);
if (Format == DWARF32) {
OS << format("%8.8x ", StringOffset);
uint32_t StringOffset32 = (uint32_t)StringOffset;
@@ -287,11 +274,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
if (!CUDIE)
continue;
if (auto StmtOffset = toSectionOffset(CUDIE.find(DW_AT_stmt_list))) {
- DataExtractor lineData(getLineSection().Data, isLittleEndian(),
- savedAddressByteSize);
+ DWARFDataExtractor lineData(getLineSection(), isLittleEndian(),
+ savedAddressByteSize);
DWARFDebugLine::LineTable LineTable;
uint32_t Offset = *StmtOffset;
- LineTable.parse(lineData, &getLineSection().Relocs, &Offset);
+ LineTable.parse(lineData, &Offset);
LineTable.dump(OS);
}
}
@@ -310,8 +297,8 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
if (DumpType == DIDT_All || DumpType == DIDT_LineDwo) {
OS << "\n.debug_line.dwo contents:\n";
unsigned stmtOffset = 0;
- DataExtractor lineData(getLineDWOSection().Data, isLittleEndian(),
- savedAddressByteSize);
+ DWARFDataExtractor lineData(getLineDWOSection(), isLittleEndian(),
+ savedAddressByteSize);
DWARFDebugLine::LineTable LineTable;
while (LineTable.Prologue.parse(lineData, &stmtOffset)) {
LineTable.dump(OS);
@@ -348,11 +335,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
// sizes, but for simplicity we just use the address byte size of the last
// compile unit (there is no easy and fast way to associate address range
// list and the compile unit it describes).
- DataExtractor rangesData(getRangeSection().Data, isLittleEndian(),
- savedAddressByteSize);
+ DWARFDataExtractor rangesData(getRangeSection(), isLittleEndian(),
+ savedAddressByteSize);
offset = 0;
DWARFDebugRangeList rangeList;
- while (rangeList.extract(rangesData, &offset, getRangeSection().Relocs))
+ while (rangeList.extract(rangesData, &offset))
rangeList.dump(OS);
}
@@ -499,11 +486,13 @@ const DWARFDebugLoc *DWARFContext::getDebugLoc() {
if (Loc)
return Loc.get();
- DataExtractor LocData(getLocSection().Data, isLittleEndian(), 0);
- Loc.reset(new DWARFDebugLoc(getLocSection().Relocs));
+ Loc.reset(new DWARFDebugLoc);
// assume all compile units have the same address byte size
- if (getNumCompileUnits())
- Loc->parse(LocData, getCompileUnitAtIndex(0)->getAddressByteSize());
+ if (getNumCompileUnits()) {
+ DWARFDataExtractor LocData(getLocSection(), isLittleEndian(),
+ getCompileUnitAtIndex(0)->getAddressByteSize());
+ Loc->parse(LocData);
+ }
return Loc.get();
}
@@ -570,7 +559,7 @@ const DWARFDebugMacro *DWARFContext::getDebugMacro() {
const DWARFLineTable *
DWARFContext::getLineTableForUnit(DWARFUnit *U) {
if (!Line)
- Line.reset(new DWARFDebugLine(&getLineSection().Relocs));
+ Line.reset(new DWARFDebugLine);
auto UnitDIE = U->getUnitDIE();
if (!UnitDIE)
@@ -586,12 +575,12 @@ DWARFContext::getLineTableForUnit(DWARFUnit *U) {
return lt;
// Make sure the offset is good before we try to parse.
- if (stmtOffset >= U->getLineSection().size())
+ if (stmtOffset >= U->getLineSection().Data.size())
return nullptr;
// We have to parse it first.
- DataExtractor lineData(U->getLineSection(), isLittleEndian(),
- U->getAddressByteSize());
+ DWARFDataExtractor lineData(U->getLineSection(), isLittleEndian(),
+ U->getAddressByteSize());
return Line->getOrParseLineTable(lineData, stmtOffset);
}
@@ -870,13 +859,13 @@ static Expected<SymInfo> getSymbolInfo(const object::ObjectFile &Obj,
Expected<uint64_t> SymAddrOrErr = Sym->getAddress();
if (!SymAddrOrErr)
- return createError("error: failed to compute symbol address: ",
+ return createError("failed to compute symbol address: ",
SymAddrOrErr.takeError());
// Also remember what section this symbol is in for later
auto SectOrErr = Sym->getSection();
if (!SectOrErr)
- return createError("error: failed to get symbol section: ",
+ return createError("failed to get symbol section: ",
SectOrErr.takeError());
RSec = *SectOrErr;
@@ -937,8 +926,14 @@ Error DWARFContextInMemory::maybeDecompress(const SectionRef &Sec,
return Error::success();
}
-DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
- const LoadedObjectInfo *L)
+ErrorPolicy DWARFContextInMemory::defaultErrorHandler(Error E) {
+ errs() << "error: " + toString(std::move(E)) << '\n';
+ return ErrorPolicy::Continue;
+}
+
+DWARFContextInMemory::DWARFContextInMemory(
+ const object::ObjectFile &Obj, const LoadedObjectInfo *L,
+ function_ref<ErrorPolicy(Error)> HandleError)
: FileName(Obj.getFileName()), IsLittleEndian(Obj.isLittleEndian()),
AddressSize(Obj.getBytesInAddress()) {
for (const SectionRef &Section : Obj.sections()) {
@@ -961,9 +956,10 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
Section.getContents(data);
if (auto Err = maybeDecompress(Section, name, data)) {
- errs() << "error: failed to decompress '" + name + "', " +
- toString(std::move(Err))
- << '\n';
+ ErrorPolicy EP = HandleError(
+ createError("failed to decompress '" + name + "', ", std::move(Err)));
+ if (EP == ErrorPolicy::Halt)
+ return;
continue;
}
@@ -1055,7 +1051,8 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
Expected<SymInfo> SymInfoOrErr = getSymbolInfo(Obj, Reloc, L, AddrCache);
if (!SymInfoOrErr) {
- errs() << toString(SymInfoOrErr.takeError()) << '\n';
+ if (HandleError(SymInfoOrErr.takeError()) == ErrorPolicy::Halt)
+ return;
continue;
}
@@ -1064,7 +1061,11 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
if (V.error()) {
SmallString<32> Name;
Reloc.getTypeName(Name);
- errs() << "error: failed to compute relocation: " << Name << "\n";
+ ErrorPolicy EP = HandleError(
+ createError("failed to compute relocation: " + Name + ", ",
+ errorCodeToError(object_error::parse_failed)));
+ if (EP == ErrorPolicy::Halt)
+ return;
continue;
}
RelocAddrEntry Rel = {SymInfoOrErr->SectionIndex, Val};
diff --git a/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
new file mode 100644
index 000000000000..001097e56c71
--- /dev/null
+++ b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
@@ -0,0 +1,24 @@
+//===- DWARFDataExtractor.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+
+using namespace llvm;
+
+uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off,
+ uint64_t *SecNdx) const {
+ if (!RelocMap)
+ return getUnsigned(Off, Size);
+ RelocAddrMap::const_iterator AI = RelocMap->find(*Off);
+ if (AI == RelocMap->end())
+ return getUnsigned(Off, Size);
+ if (SecNdx)
+ *SecNdx = AI->second.SectionIndex;
+ return getUnsigned(Off, Size) + AI->second.Value;
+}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 1551974b822a..976bc4651ae6 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -21,13 +21,13 @@ using namespace dwarf;
bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U,
uint32_t *OffsetPtr) {
- DataExtractor DebugInfoData = U.getDebugInfoExtractor();
+ DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor();
const uint32_t UEndOffset = U.getNextUnitOffset();
return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset, 0);
}
bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr,
- const DataExtractor &DebugInfoData,
+ const DWARFDataExtractor &DebugInfoData,
uint32_t UEndOffset, uint32_t D) {
Offset = *OffsetPtr;
Depth = D;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index ad5647f3e03d..7d180564e9f7 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -94,8 +94,8 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
// Parse v2-v4 directory and file tables.
static void
-parseV2DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr,
- uint64_t EndPrologueOffset,
+parseV2DirFileTables(const DWARFDataExtractor &DebugLineData,
+ uint32_t *OffsetPtr, uint64_t EndPrologueOffset,
std::vector<StringRef> &IncludeDirectories,
std::vector<DWARFDebugLine::FileNameEntry> &FileNames) {
while (*OffsetPtr < EndPrologueOffset) {
@@ -122,7 +122,7 @@ parseV2DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr,
// Returns the descriptors, or an empty vector if we did not find a path or
// ran off the end of the prologue.
static ContentDescriptors
-parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr,
+parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
uint64_t EndPrologueOffset) {
ContentDescriptors Descriptors;
int FormatCount = DebugLineData.getU8(OffsetPtr);
@@ -142,8 +142,8 @@ parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr,
}
static bool
-parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr,
- uint64_t EndPrologueOffset,
+parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
+ uint32_t *OffsetPtr, uint64_t EndPrologueOffset,
const DWARFFormParams &FormParams,
std::vector<StringRef> &IncludeDirectories,
std::vector<DWARFDebugLine::FileNameEntry> &FileNames) {
@@ -212,7 +212,7 @@ parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr,
return true;
}
-bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData,
+bool DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
uint32_t *OffsetPtr) {
const uint64_t PrologueOffset = *OffsetPtr;
@@ -381,20 +381,19 @@ DWARFDebugLine::getLineTable(uint32_t Offset) const {
}
const DWARFDebugLine::LineTable *
-DWARFDebugLine::getOrParseLineTable(DataExtractor DebugLineData,
+DWARFDebugLine::getOrParseLineTable(const DWARFDataExtractor &DebugLineData,
uint32_t Offset) {
std::pair<LineTableIter, bool> Pos =
LineTableMap.insert(LineTableMapTy::value_type(Offset, LineTable()));
LineTable *LT = &Pos.first->second;
if (Pos.second) {
- if (!LT->parse(DebugLineData, RelocMap, &Offset))
+ if (!LT->parse(DebugLineData, &Offset))
return nullptr;
}
return LT;
}
-bool DWARFDebugLine::LineTable::parse(DataExtractor DebugLineData,
- const RelocAddrMap *RMap,
+bool DWARFDebugLine::LineTable::parse(const DWARFDataExtractor &DebugLineData,
uint32_t *OffsetPtr) {
const uint32_t DebugLineOffset = *OffsetPtr;
@@ -443,8 +442,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor DebugLineData,
// relocatable address. All of the other statement program opcodes
// that affect the address register add a delta to it. This instruction
// stores a relocatable value into it instead.
- State.Row.Address = getRelocatedValue(
- DebugLineData, DebugLineData.getAddressSize(), OffsetPtr, RMap);
+ State.Row.Address = DebugLineData.getRelocatedAddress(OffsetPtr);
break;
case DW_LNE_define_file:
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index 2178bef65d1d..c240dd7406d9 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -40,9 +40,9 @@ void DWARFDebugLoc::dump(raw_ostream &OS) const {
}
}
-void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) {
+void DWARFDebugLoc::parse(const DWARFDataExtractor &data) {
uint32_t Offset = 0;
- while (data.isValidOffset(Offset+AddressSize-1)) {
+ while (data.isValidOffset(Offset+data.getAddressSize()-1)) {
Locations.resize(Locations.size() + 1);
LocationList &Loc = Locations.back();
Loc.Offset = Offset;
@@ -51,8 +51,8 @@ void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) {
while (true) {
// A beginning and ending address offsets.
Entry E;
- E.Begin = getRelocatedValue(data, AddressSize, &Offset, &RelocMap);
- E.End = getRelocatedValue(data, AddressSize, &Offset, &RelocMap);
+ E.Begin = data.getRelocatedAddress(&Offset);
+ E.End = data.getRelocatedAddress(&Offset);
// The end of any given location list is marked by an end of list entry,
// which consists of a 0 for the beginning address offset and a 0 for the
diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index 43201293fe60..0b6ae86fd94b 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -23,8 +23,8 @@ void DWARFDebugRangeList::clear() {
Entries.clear();
}
-bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr,
- const RelocAddrMap &Relocs) {
+bool DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
+ uint32_t *offset_ptr) {
clear();
if (!data.isValidOffset(*offset_ptr))
return false;
@@ -35,10 +35,9 @@ bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr,
while (true) {
RangeListEntry entry;
uint32_t prev_offset = *offset_ptr;
- entry.StartAddress = getRelocatedValue(data, AddressSize, offset_ptr,
- &Relocs, &entry.SectionIndex);
- entry.EndAddress =
- getRelocatedValue(data, AddressSize, offset_ptr, &Relocs);
+ entry.StartAddress =
+ data.getRelocatedAddress(offset_ptr, &entry.SectionIndex);
+ entry.EndAddress = data.getRelocatedAddress(offset_ptr);
// Check that both values were extracted correctly.
if (*offset_ptr != prev_offset + 2 * AddressSize) {
diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp
index b4b682dd11b5..ef416f72ad17 100644
--- a/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -308,7 +308,7 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, unsigned Indent,
DIDumpOptions DumpOpts) const {
if (!isValid())
return;
- DataExtractor debug_info_data = U->getDebugInfoExtractor();
+ DWARFDataExtractor debug_info_data = U->getDebugInfoExtractor();
const uint32_t Offset = getOffset();
uint32_t offset = Offset;
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 861114bde1f2..83a7792e1244 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -275,7 +275,7 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
FC == FC_SectionOffset;
}
-bool DWARFFormValue::extractValue(const DataExtractor &Data,
+bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
uint32_t *OffsetPtr, const DWARFUnit *CU) {
U = CU;
bool Indirect = false;
@@ -290,10 +290,9 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data,
case DW_FORM_ref_addr: {
if (!U)
return false;
- uint16_t AddrSize = (Form == DW_FORM_addr) ? U->getAddressByteSize()
- : U->getRefAddrByteSize();
- Value.uval = getRelocatedValue(Data, AddrSize, OffsetPtr,
- U->getRelocMap(), &Value.SectionIndex);
+ uint16_t Size = (Form == DW_FORM_addr) ? U->getAddressByteSize()
+ : U->getRefAddrByteSize();
+ Value.uval = Data.getRelocatedValue(Size, OffsetPtr, &Value.SectionIndex);
break;
}
case DW_FORM_exprloc:
@@ -333,11 +332,9 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data,
case DW_FORM_ref4:
case DW_FORM_ref_sup4:
case DW_FORM_strx4:
- case DW_FORM_addrx4: {
- const RelocAddrMap *RelocMap = U ? U->getRelocMap() : nullptr;
- Value.uval = getRelocatedValue(Data, 4, OffsetPtr, RelocMap);
+ case DW_FORM_addrx4:
+ Value.uval = Data.getRelocatedValue(4, OffsetPtr);
break;
- }
case DW_FORM_data8:
case DW_FORM_ref8:
case DW_FORM_ref_sup8:
@@ -365,8 +362,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data,
case DW_FORM_strp_sup: {
if (!U)
return false;
- Value.uval = getRelocatedValue(Data, U->getDwarfOffsetByteSize(),
- OffsetPtr, U->getRelocMap());
+ Value.uval =
+ Data.getRelocatedValue(U->getDwarfOffsetByteSize(), OffsetPtr);
break;
}
case DW_FORM_flag_present:
@@ -576,7 +573,6 @@ Optional<const char *> DWARFFormValue::getAsCString() const {
uint64_t StrOffset;
if (!U->getStringOffsetSectionItem(Offset, StrOffset))
return None;
- StrOffset += U->getStringOffsetSectionRelocation(Offset);
Offset = StrOffset;
}
if (const char *Str = U->getStringExtractor().getCStr(&Offset)) {
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index fd9c7c2b1d46..043bdb874f43 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -32,8 +32,7 @@ using namespace dwarf;
void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
parseImpl(C, Section, C.getDebugAbbrev(), &C.getRangeSection(),
C.getStringSection(), C.getStringOffsetSection(),
- &C.getAddrSection(), C.getLineSection().Data, C.isLittleEndian(),
- false);
+ &C.getAddrSection(), C.getLineSection(), C.isLittleEndian(), false);
}
void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
@@ -41,15 +40,15 @@ void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
DWARFUnitIndex *Index) {
parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), &C.getRangeDWOSection(),
C.getStringDWOSection(), C.getStringOffsetDWOSection(),
- &C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian(),
+ &C.getAddrSection(), C.getLineDWOSection(), C.isLittleEndian(),
true);
}
DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, const DWARFSection *RS,
StringRef SS, const DWARFSection &SOS,
- const DWARFSection *AOS, StringRef LS, bool LE, bool IsDWO,
- const DWARFUnitSectionBase &UnitSection,
+ const DWARFSection *AOS, const DWARFSection &LS, bool LE,
+ bool IsDWO, const DWARFUnitSectionBase &UnitSection,
const DWARFUnitIndex::Entry *IndexEntry)
: Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS),
LineSection(LS), StringSection(SS), StringOffsetSection(SOS),
@@ -65,33 +64,23 @@ bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index,
uint32_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize();
if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize())
return false;
- DataExtractor DA(AddrOffsetSection->Data, isLittleEndian,
- getAddressByteSize());
- Result = getRelocatedValue(DA, getAddressByteSize(), &Offset,
- &AddrOffsetSection->Relocs);
+ DWARFDataExtractor DA(*AddrOffsetSection, isLittleEndian,
+ getAddressByteSize());
+ Result = DA.getRelocatedAddress(&Offset);
return true;
}
bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index,
uint64_t &Result) const {
- unsigned ItemSize = getFormat() == DWARF64 ? 8 : 4;
+ unsigned ItemSize = getDwarfOffsetByteSize();
uint32_t Offset = StringOffsetSectionBase + Index * ItemSize;
if (StringOffsetSection.Data.size() < Offset + ItemSize)
return false;
- DataExtractor DA(StringOffsetSection.Data, isLittleEndian, 0);
- Result = ItemSize == 4 ? DA.getU32(&Offset) : DA.getU64(&Offset);
+ DWARFDataExtractor DA(StringOffsetSection, isLittleEndian, 0);
+ Result = DA.getRelocatedValue(ItemSize, &Offset);
return true;
}
-uint64_t DWARFUnit::getStringOffsetSectionRelocation(uint32_t Index) const {
- unsigned ItemSize = getFormat() == DWARF64 ? 8 : 4;
- uint64_t ByteOffset = StringOffsetSectionBase + Index * ItemSize;
- RelocAddrMap::const_iterator AI = getStringOffsetsRelocMap().find(ByteOffset);
- if (AI != getStringOffsetsRelocMap().end())
- return AI->second.Value;
- return 0;
-}
-
bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
Length = debug_info.getU32(offset_ptr);
// FIXME: Support DWARF64.
@@ -149,14 +138,13 @@ bool DWARFUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) {
}
bool DWARFUnit::extractRangeList(uint32_t RangeListOffset,
- DWARFDebugRangeList &RangeList) const {
+ DWARFDebugRangeList &RangeList) const {
// Require that compile unit is extracted.
assert(!DieArray.empty());
- DataExtractor RangesData(RangeSection->Data, isLittleEndian,
- getAddressByteSize());
+ DWARFDataExtractor RangesData(*RangeSection, isLittleEndian,
+ getAddressByteSize());
uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset;
- return RangeList.extract(RangesData, &ActualRangeListOffset,
- RangeSection->Relocs);
+ return RangeList.extract(RangesData, &ActualRangeListOffset);
}
void DWARFUnit::clear() {
@@ -190,7 +178,7 @@ void DWARFUnit::extractDIEsToVector(
uint32_t DIEOffset = Offset + getHeaderSize();
uint32_t NextCUOffset = getNextUnitOffset();
DWARFDebugInfoEntry DIE;
- DataExtractor DebugInfoData = getDebugInfoExtractor();
+ DWARFDataExtractor DebugInfoData = getDebugInfoExtractor();
uint32_t Depth = 0;
bool IsCUDie = true;
diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 41907e570563..0a10e6b78911 100644
--- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -280,11 +280,10 @@ bool DWARFVerifier::handleDebugLine() {
bool DWARFVerifier::handleAppleNames() {
NumAppleNamesErrors = 0;
- DataExtractor AppleNamesSection(DCtx.getAppleNamesSection().Data,
- DCtx.isLittleEndian(), 0);
+ DWARFDataExtractor AppleNamesSection(DCtx.getAppleNamesSection(),
+ DCtx.isLittleEndian(), 0);
DataExtractor StrData(DCtx.getStringSection(), DCtx.isLittleEndian(), 0);
- DWARFAcceleratorTable AppleNames(AppleNamesSection, StrData,
- DCtx.getAppleNamesSection().Relocs);
+ DWARFAcceleratorTable AppleNames(AppleNamesSection, StrData);
if (!AppleNames.extract()) {
return true;
@@ -292,20 +291,80 @@ bool DWARFVerifier::handleAppleNames() {
OS << "Verifying .apple_names...\n";
- // Verify that all buckets have a valid hash index or are empty
+ // Verify that all buckets have a valid hash index or are empty.
uint32_t NumBuckets = AppleNames.getNumBuckets();
uint32_t NumHashes = AppleNames.getNumHashes();
uint32_t BucketsOffset =
AppleNames.getSizeHdr() + AppleNames.getHeaderDataLength();
+ uint32_t HashesBase = BucketsOffset + NumBuckets * 4;
+ uint32_t OffsetsBase = HashesBase + NumHashes * 4;
for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) {
uint32_t HashIdx = AppleNamesSection.getU32(&BucketsOffset);
if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) {
- OS << format("error: Bucket[%d] has invalid hash index: [%d]\n",
- BucketIdx, HashIdx);
+ OS << format("error: Bucket[%d] has invalid hash index: %u\n", BucketIdx,
+ HashIdx);
++NumAppleNamesErrors;
}
}
+
+ uint32_t NumAtoms = AppleNames.getAtomsDesc().size();
+ if (NumAtoms == 0) {
+ OS << "error: no atoms; failed to read HashData\n";
+ ++NumAppleNamesErrors;
+ return false;
+ }
+
+ if (!AppleNames.validateForms()) {
+ OS << "error: unsupported form; failed to read HashData\n";
+ ++NumAppleNamesErrors;
+ return false;
+ }
+
+ for (uint32_t HashIdx = 0; HashIdx < NumHashes; ++HashIdx) {
+ uint32_t HashOffset = HashesBase + 4 * HashIdx;
+ uint32_t DataOffset = OffsetsBase + 4 * HashIdx;
+ uint32_t Hash = AppleNamesSection.getU32(&HashOffset);
+ uint32_t HashDataOffset = AppleNamesSection.getU32(&DataOffset);
+ if (!AppleNamesSection.isValidOffsetForDataOfSize(HashDataOffset,
+ sizeof(uint64_t))) {
+ OS << format("error: Hash[%d] has invalid HashData offset: 0x%08x\n",
+ HashIdx, HashDataOffset);
+ ++NumAppleNamesErrors;
+ }
+
+ uint32_t StrpOffset;
+ uint32_t StringOffset;
+ uint32_t StringCount = 0;
+ uint32_t DieOffset = dwarf::DW_INVALID_OFFSET;
+
+ while ((StrpOffset = AppleNamesSection.getU32(&HashDataOffset)) != 0) {
+ const uint32_t NumHashDataObjects =
+ AppleNamesSection.getU32(&HashDataOffset);
+ for (uint32_t HashDataIdx = 0; HashDataIdx < NumHashDataObjects;
+ ++HashDataIdx) {
+ DieOffset = AppleNames.readAtoms(HashDataOffset);
+ if (!DCtx.getDIEForOffset(DieOffset)) {
+ const uint32_t BucketIdx =
+ NumBuckets ? (Hash % NumBuckets) : UINT32_MAX;
+ StringOffset = StrpOffset;
+ const char *Name = StrData.getCStr(&StringOffset);
+ if (!Name)
+ Name = "<NULL>";
+
+ OS << format(
+ "error: .apple_names Bucket[%d] Hash[%d] = 0x%08x "
+ "Str[%u] = 0x%08x "
+ "DIE[%d] = 0x%08x is not a valid DIE offset for \"%s\".\n",
+ BucketIdx, HashIdx, Hash, StringCount, StrpOffset, HashDataIdx,
+ DieOffset, Name);
+
+ ++NumAppleNamesErrors;
+ }
+ }
+ ++StringCount;
+ }
+ }
return NumAppleNamesErrors == 0;
}
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleList.cpp b/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
index 434f775097e0..eea70b229c67 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
@@ -1,4 +1,4 @@
-//===- DbiModuleList.cpp - PDB module information list ----------*- C++ -*-===//
+//===- DbiModuleList.cpp - PDB module information list --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,10 +6,17 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/lib/DebugInfo/PDB/Native/Hash.cpp b/lib/DebugInfo/PDB/Native/Hash.cpp
index 2ad3f55dc5c3..61188ece2dcb 100644
--- a/lib/DebugInfo/PDB/Native/Hash.cpp
+++ b/lib/DebugInfo/PDB/Native/Hash.cpp
@@ -8,10 +8,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/Hash.h"
-
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/JamCRC.h"
+#include <cstdint>
using namespace llvm;
using namespace llvm::support;
diff --git a/lib/DebugInfo/PDB/Native/HashTable.cpp b/lib/DebugInfo/PDB/Native/HashTable.cpp
index ebf8c9c04db1..439217f91d04 100644
--- a/lib/DebugInfo/PDB/Native/HashTable.cpp
+++ b/lib/DebugInfo/PDB/Native/HashTable.cpp
@@ -1,4 +1,4 @@
-//===- HashTable.cpp - PDB Hash Table ---------------------------*- C++ -*-===//
+//===- HashTable.cpp - PDB Hash Table -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,12 +8,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
-
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
-
-#include <assert.h>
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <utility>
using namespace llvm;
using namespace llvm::pdb;
@@ -106,9 +110,11 @@ void HashTable::clear() {
}
uint32_t HashTable::capacity() const { return Buckets.size(); }
+
uint32_t HashTable::size() const { return Present.count(); }
HashTableIterator HashTable::begin() const { return HashTableIterator(*this); }
+
HashTableIterator HashTable::end() const {
return HashTableIterator(*this, 0, true);
}
diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
index 83c56574a16e..2e1f61c7a25d 100644
--- a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
+++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
@@ -9,11 +9,11 @@
#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
@@ -97,7 +97,7 @@ ModuleDebugStreamRef::symbols(bool *HadError) const {
return make_range(SymbolArray.begin(HadError), SymbolArray.end());
}
-llvm::iterator_range<ModuleDebugStreamRef::DebugSubsectionIterator>
+iterator_range<ModuleDebugStreamRef::DebugSubsectionIterator>
ModuleDebugStreamRef::subsections() const {
return make_range(Subsections.begin(), Subsections.end());
}
diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp
+++ /dev/null
diff --git a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
index 4f90cd9cd8ac..354b8c0e07ff 100644
--- a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
+++ b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
@@ -1,4 +1,4 @@
-//===- NamedStreamMap.cpp - PDB Named Stream Map ----------------*- C++ -*-===//
+//===- NamedStreamMap.cpp - PDB Named Stream Map --------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,17 +8,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
-
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <algorithm>
+#include <cassert>
#include <cstdint>
+#include <tuple>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
index c23120041164..a65782e2d4fc 100644
--- a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
@@ -32,9 +32,7 @@ std::unique_ptr<PDBSymbol>
NativeEnumModules::getChildAtIndex(uint32_t Index) const {
if (Index >= Modules.getModuleCount())
return nullptr;
- return std::unique_ptr<PDBSymbol>(new PDBSymbolCompiland(
- Session, std::unique_ptr<IPDBRawSymbol>(new NativeCompilandSymbol(
- Session, 0, Modules.getModuleDescriptor(Index)))));
+ return Session.createCompilandSymbol(Modules.getModuleDescriptor(Index));
}
std::unique_ptr<PDBSymbol> NativeEnumModules::getNext() {
diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
index ed6db63edbab..b4f5c96ce66b 100644
--- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -1,4 +1,4 @@
-//===- NativeRawSymbol.cpp - Native implementation of IPDBRawSymbol -*- C++ -*-===//
+//===- NativeRawSymbol.cpp - Native implementation of IPDBRawSymbol -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,16 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/DebugInfo/PDB/PDBExtras.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
-#include "llvm/Support/ConvertUTF.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::pdb;
@@ -49,7 +40,7 @@ NativeRawSymbol::findInlineFramesByRVA(uint32_t RVA) const {
return nullptr;
}
-void NativeRawSymbol::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) const {
+void NativeRawSymbol::getDataBytes(SmallVector<uint8_t, 32> &bytes) const {
bytes.clear();
}
@@ -109,7 +100,7 @@ uint32_t NativeRawSymbol::getClassParentId() const {
}
std::string NativeRawSymbol::getCompilerName() const {
- return 0;
+ return {};
}
uint32_t NativeRawSymbol::getCount() const {
@@ -136,7 +127,7 @@ uint32_t NativeRawSymbol::getLexicalParentId() const {
}
std::string NativeRawSymbol::getLibraryName() const {
- return "";
+ return {};
}
uint32_t NativeRawSymbol::getLiveRangeStartAddressOffset() const {
@@ -164,7 +155,7 @@ uint32_t NativeRawSymbol::getMemorySpaceKind() const {
}
std::string NativeRawSymbol::getName() const {
- return 0;
+ return {};
}
uint32_t NativeRawSymbol::getNumberOfAcceleratorPointerTags() const {
@@ -188,7 +179,7 @@ uint32_t NativeRawSymbol::getNumberOfRows() const {
}
std::string NativeRawSymbol::getObjectFileName() const {
- return "";
+ return {};
}
uint32_t NativeRawSymbol::getOemId() const {
@@ -240,7 +231,7 @@ uint32_t NativeRawSymbol::getSlot() const {
}
std::string NativeRawSymbol::getSourceFileName() const {
- return 0;
+ return {};
}
uint32_t NativeRawSymbol::getStride() const {
@@ -251,7 +242,7 @@ uint32_t NativeRawSymbol::getSubTypeId() const {
return 0;
}
-std::string NativeRawSymbol::getSymbolsFileName() const { return ""; }
+std::string NativeRawSymbol::getSymbolsFileName() const { return {}; }
uint32_t NativeRawSymbol::getSymIndexId() const { return SymbolId; }
@@ -292,7 +283,7 @@ uint32_t NativeRawSymbol::getUavSlot() const {
}
std::string NativeRawSymbol::getUndecoratedName() const {
- return 0;
+ return {};
}
uint32_t NativeRawSymbol::getUnmodifiedTypeId() const {
@@ -701,5 +692,5 @@ bool NativeRawSymbol::wasInlined() const {
}
std::string NativeRawSymbol::getUnused() const {
- return "";
+ return {};
}
diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 3ab381e76e62..93d43d9ef341 100644
--- a/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -13,6 +13,7 @@
#include "llvm/DebugInfo/PDB/GenericError.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
+#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
@@ -23,8 +24,10 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
+
#include <algorithm>
#include <memory>
+#include <utility>
using namespace llvm;
using namespace llvm::msf;
@@ -66,12 +69,23 @@ Error NativeSession::createFromExe(StringRef Path,
return make_error<RawError>(raw_error_code::feature_unsupported);
}
+std::unique_ptr<PDBSymbolCompiland>
+NativeSession::createCompilandSymbol(DbiModuleDescriptor MI) {
+ const auto Id = static_cast<uint32_t>(SymbolCache.size());
+ SymbolCache.push_back(
+ llvm::make_unique<NativeCompilandSymbol>(*this, Id, MI));
+ return llvm::make_unique<PDBSymbolCompiland>(
+ *this, std::unique_ptr<IPDBRawSymbol>(SymbolCache[Id]->clone()));
+}
+
uint64_t NativeSession::getLoadAddress() const { return 0; }
void NativeSession::setLoadAddress(uint64_t Address) {}
std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() {
- auto RawSymbol = llvm::make_unique<NativeExeSymbol>(*this, 0);
+ const auto Id = static_cast<uint32_t>(SymbolCache.size());
+ SymbolCache.push_back(llvm::make_unique<NativeExeSymbol>(*this, Id));
+ auto RawSymbol = SymbolCache[Id]->clone();
auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol)));
std::unique_ptr<PDBSymbolExe> ExeSymbol(
static_cast<PDBSymbolExe *>(PdbSymbol.release()));
@@ -80,7 +94,10 @@ std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() {
std::unique_ptr<PDBSymbol>
NativeSession::getSymbolById(uint32_t SymbolId) const {
- return nullptr;
+ // If the caller has a SymbolId, it'd better be in our SymbolCache.
+ return SymbolId < SymbolCache.size()
+ ? PDBSymbol::create(*this, SymbolCache[SymbolId]->clone())
+ : nullptr;
}
std::unique_ptr<PDBSymbol>
diff --git a/lib/DebugInfo/PDB/PDB.cpp b/lib/DebugInfo/PDB/PDB.cpp
index 7e3acc1165f3..501d4f5985b7 100644
--- a/lib/DebugInfo/PDB/PDB.cpp
+++ b/lib/DebugInfo/PDB/PDB.cpp
@@ -1,4 +1,4 @@
-//===- PDB.cpp - base header file for creating a PDB reader -----*- C++ -*-===//
+//===- PDB.cpp - base header file for creating a PDB reader ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,18 +8,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/PDB.h"
-
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include "llvm/DebugInfo/PDB/GenericError.h"
-#include "llvm/DebugInfo/PDB/IPDBSession.h"
-#include "llvm/DebugInfo/PDB/PDB.h"
#if LLVM_ENABLE_DIA_SDK
#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
#endif
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Error.h"
using namespace llvm;
using namespace llvm::pdb;
@@ -33,7 +29,7 @@ Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path,
#if LLVM_ENABLE_DIA_SDK
return DIASession::createFromPdb(Path, Session);
#else
- return llvm::make_error<GenericError>("DIA is not installed on the system");
+ return make_error<GenericError>("DIA is not installed on the system");
#endif
}
@@ -46,6 +42,6 @@ Error llvm::pdb::loadDataForEXE(PDB_ReaderType Type, StringRef Path,
#if LLVM_ENABLE_DIA_SDK
return DIASession::createFromExe(Path, Session);
#else
- return llvm::make_error<GenericError>("DIA is not installed on the system");
+ return make_error<GenericError>("DIA is not installed on the system");
#endif
}
diff --git a/lib/DebugInfo/PDB/PDBExtras.cpp b/lib/DebugInfo/PDB/PDBExtras.cpp
index dc22a30facab..faf1142ddf17 100644
--- a/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -1,4 +1,4 @@
-//===- PDBExtras.cpp - helper functions and classes for PDBs -----*- C++-*-===//
+//===- PDBExtras.cpp - helper functions and classes for PDBs --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/PDBExtras.h"
-
#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/CodeView/Formatters.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::pdb;
diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp
index da353cb6977c..5f4390bbaf12 100644
--- a/lib/DebugInfo/PDB/UDTLayout.cpp
+++ b/lib/DebugInfo/PDB/UDTLayout.cpp
@@ -1,4 +1,4 @@
-//===- UDTLayout.cpp --------------------------------------------*- C++ -*-===//
+//===- UDTLayout.cpp ------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,20 +8,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/UDTLayout.h"
-
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
-
-#include <utility>
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
using namespace llvm;
using namespace llvm::pdb;
@@ -176,7 +181,6 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
else
Bases.push_back(std::move(Base));
}
-
else if (auto Data = unique_dyn_cast<PDBSymbolData>(Child)) {
if (Data->getDataKind() == PDB_DataKind::Member)
Members.push_back(std::move(Data));
@@ -296,4 +300,4 @@ void UDTLayoutBase::addChildToLayout(std::unique_ptr<LayoutItemBase> Child) {
}
ChildStorage.push_back(std::move(Child));
-} \ No newline at end of file
+}
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index b20690c7caaf..690276232a6f 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -193,11 +193,11 @@ public:
}
auto *MPtr = M.release();
ShouldDelete[MPtr] = true;
- auto Deleter =
- [this](Module *Mod) {
- if (ShouldDelete[Mod])
- delete Mod;
- };
+ auto Deleter = [this](Module *Mod) {
+ auto I = ShouldDelete.find(Mod);
+ if (I != ShouldDelete.end() && I->second)
+ delete Mod;
+ };
LocalModules.push_back(std::shared_ptr<Module>(MPtr, std::move(Deleter)));
LazyEmitLayer.addModule(LocalModules.back(), &MemMgr, &Resolver);
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 3d12eadea4dd..8b6f9bef66df 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -78,11 +78,11 @@ public:
void updateSymbolAddress(const SymbolRef &SymRef, uint64_t Addr);
// Methods for type inquiry through isa, cast and dyn_cast
- static inline bool classof(const Binary *v) {
+ static bool classof(const Binary *v) {
return (isa<ELFObjectFile<ELFT>>(v) &&
classof(cast<ELFObjectFile<ELFT>>(v)));
}
- static inline bool classof(const ELFObjectFile<ELFT> *v) {
+ static bool classof(const ELFObjectFile<ELFT> *v) {
return v->isDyldType();
}
};
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 27150a89d9b2..d387a6f0ecb9 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -716,7 +716,7 @@ bool ConstantFP::isExactlyValue(const APFloat &V) const {
/// Remove the constant from the constant table.
void ConstantFP::destroyConstantImpl() {
- llvm_unreachable("You can't ConstantInt->destroyConstantImpl()!");
+ llvm_unreachable("You can't ConstantFP->destroyConstantImpl()!");
}
//===----------------------------------------------------------------------===//
diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp
index 37e735251fdf..9bd0e297f4ef 100644
--- a/lib/IR/Dominators.cpp
+++ b/lib/IR/Dominators.cpp
@@ -63,15 +63,22 @@ bool BasicBlockEdge::isSingleEdge() const {
template class llvm::DomTreeNodeBase<BasicBlock>;
template class llvm::DominatorTreeBase<BasicBlock>;
-template void llvm::Calculate<Function, BasicBlock *>(
+template void llvm::DomTreeBuilder::Calculate<Function, BasicBlock *>(
DominatorTreeBase<
typename std::remove_pointer<GraphTraits<BasicBlock *>::NodeRef>::type>
&DT,
Function &F);
-template void llvm::Calculate<Function, Inverse<BasicBlock *>>(
+template void llvm::DomTreeBuilder::Calculate<Function, Inverse<BasicBlock *>>(
DominatorTreeBase<typename std::remove_pointer<
GraphTraits<Inverse<BasicBlock *>>::NodeRef>::type> &DT,
Function &F);
+template bool llvm::DomTreeBuilder::Verify<BasicBlock *>(
+ const DominatorTreeBase<
+ typename std::remove_pointer<GraphTraits<BasicBlock *>::NodeRef>::type>
+ &DT);
+template bool llvm::DomTreeBuilder::Verify<Inverse<BasicBlock *>>(
+ const DominatorTreeBase<typename std::remove_pointer<
+ GraphTraits<Inverse<BasicBlock *>>::NodeRef>::type> &DT);
bool DominatorTree::invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &) {
@@ -285,6 +292,13 @@ bool DominatorTree::isReachableFromEntry(const Use &U) const {
}
void DominatorTree::verifyDomTree() const {
+ // Perform the expensive checks only when VerifyDomInfo is set.
+ if (VerifyDomInfo && !verify()) {
+ errs() << "\n~~~~~~~~~~~\n\t\tDomTree verification failed!\n~~~~~~~~~~~\n";
+ print(errs());
+ abort();
+ }
+
Function &F = *getRoot()->getParent();
DominatorTree OtherDT;
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index ad0d4470c111..2e13f362344d 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -125,11 +125,18 @@ void LLVMContext::setDiagnosticHandler(DiagnosticHandlerTy DiagnosticHandler,
pImpl->RespectDiagnosticFilters = RespectFilters;
}
-void LLVMContext::setDiagnosticHotnessRequested(bool Requested) {
- pImpl->DiagnosticHotnessRequested = Requested;
+void LLVMContext::setDiagnosticsHotnessRequested(bool Requested) {
+ pImpl->DiagnosticsHotnessRequested = Requested;
}
-bool LLVMContext::getDiagnosticHotnessRequested() const {
- return pImpl->DiagnosticHotnessRequested;
+bool LLVMContext::getDiagnosticsHotnessRequested() const {
+ return pImpl->DiagnosticsHotnessRequested;
+}
+
+void LLVMContext::setDiagnosticsHotnessThreshold(uint64_t Threshold) {
+ pImpl->DiagnosticsHotnessThreshold = Threshold;
+}
+uint64_t LLVMContext::getDiagnosticsHotnessThreshold() const {
+ return pImpl->DiagnosticsHotnessThreshold;
}
yaml::Output *LLVMContext::getDiagnosticsOutputFile() {
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 4147f71ad9d2..395beb57fe37 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -1169,7 +1169,8 @@ public:
LLVMContext::DiagnosticHandlerTy DiagnosticHandler = nullptr;
void *DiagnosticContext = nullptr;
bool RespectDiagnosticFilters = false;
- bool DiagnosticHotnessRequested = false;
+ bool DiagnosticsHotnessRequested = false;
+ uint64_t DiagnosticsHotnessThreshold = 0;
std::unique_ptr<yaml::Output> DiagnosticsOutputFile;
LLVMContext::YieldCallbackTy YieldCallback = nullptr;
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 35032fdd33e1..68b8c9fcb939 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -472,6 +472,36 @@ Error LTO::addModule(InputFile &Input, unsigned ModI,
return Error::success();
}
+// Checks whether the given global value is in a non-prevailing comdat
+// (comdat containing values the linker indicated were not prevailing,
+// which we then dropped to available_externally), and if so, removes
+// it from the comdat. This is called for all global values to ensure the
+// comdat is empty rather than leaving an incomplete comdat. It is needed for
+// regular LTO modules, in case we are in a mixed-LTO mode (both regular
+// and thin LTO modules) compilation. Since the regular LTO module will be
+// linked first in the final native link, we want to make sure the linker
+// doesn't select any of these incomplete comdats that would be left
+// in the regular LTO module without this cleanup.
+static void
+handleNonPrevailingComdat(GlobalValue &GV,
+ std::set<const Comdat *> &NonPrevailingComdats) {
+ Comdat *C = GV.getComdat();
+ if (!C)
+ return;
+
+ if (!NonPrevailingComdats.count(C))
+ return;
+
+ // Additionally need to drop externally visible global values from the comdat
+ // to available_externally, so that there aren't multiply defined linker
+ // errors.
+ if (!GV.hasLocalLinkage())
+ GV.setLinkage(GlobalValue::AvailableExternallyLinkage);
+
+ if (auto GO = dyn_cast<GlobalObject>(&GV))
+ GO->setComdat(nullptr);
+}
+
// Add a regular LTO object to the link.
// The resulting module needs to be linked into the combined LTO module with
// linkRegularLTO.
@@ -523,6 +553,7 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
};
Skip();
+ std::set<const Comdat *> NonPrevailingComdats;
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
@@ -557,6 +588,8 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
// module (in linkRegularLTO), based on whether it is undefined.
Mod.Keep.push_back(GV);
GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
+ if (GV->hasComdat())
+ NonPrevailingComdats.insert(GV->getComdat());
cast<GlobalObject>(GV)->setComdat(nullptr);
}
}
@@ -574,6 +607,9 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
// FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit.
}
+ if (!M.getComdatSymbolTable().empty())
+ for (GlobalValue &GV : M.global_values())
+ handleNonPrevailingComdat(GV, NonPrevailingComdats);
assert(MsymI == MsymE);
return std::move(Mod);
}
@@ -1087,7 +1123,7 @@ lto::setupOptimizationRemarks(LLVMContext &Context,
Context.setDiagnosticsOutputFile(
llvm::make_unique<yaml::Output>(DiagnosticFile->os()));
if (LTOPassRemarksWithHotness)
- Context.setDiagnosticHotnessRequested(true);
+ Context.setDiagnosticsHotnessRequested(true);
DiagnosticFile->keep();
return std::move(DiagnosticFile);
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 92c5da0e9fef..0318d916aa49 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -261,9 +261,9 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
Value -= Offset;
}
- // Let the backend adjust the fixup value if necessary, including whether
- // we need a relocation.
- Backend.processFixupValue(*this, Fixup, Target, IsResolved);
+ // Let the backend force a relocation if needed.
+ if (IsResolved && Backend.shouldForceRelocation(*this, Fixup, Target))
+ IsResolved = false;
return IsResolved;
}
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 45534ba18212..82352cb50c70 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -265,7 +265,8 @@ private:
uint32_t NumFuncImports);
void writeCodeRelocSection();
void writeDataRelocSection(uint64_t DataSectionHeaderSize);
- void writeLinkingMetaDataSection(ArrayRef<StringRef> WeakSymbols,
+ void writeLinkingMetaDataSection(uint32_t DataSize, uint32_t DataAlignment,
+ ArrayRef<StringRef> WeakSymbols,
bool HasStackPointer,
uint32_t StackPointerGlobal);
@@ -877,11 +878,8 @@ void WasmObjectWriter::writeDataRelocSection(uint64_t DataSectionHeaderSize) {
}
void WasmObjectWriter::writeLinkingMetaDataSection(
- ArrayRef<StringRef> WeakSymbols, bool HasStackPointer,
- uint32_t StackPointerGlobal) {
- if (!HasStackPointer && WeakSymbols.empty())
- return;
-
+ uint32_t DataSize, uint32_t DataAlignment, ArrayRef<StringRef> WeakSymbols,
+ bool HasStackPointer, uint32_t StackPointerGlobal) {
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_CUSTOM, "linking");
SectionBookkeeping SubSection;
@@ -902,6 +900,16 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
endSection(SubSection);
}
+ if (DataSize > 0) {
+ startSection(SubSection, wasm::WASM_DATA_SIZE);
+ encodeULEB128(DataSize, getStream());
+ endSection(SubSection);
+
+ startSection(SubSection, wasm::WASM_DATA_ALIGNMENT);
+ encodeULEB128(DataAlignment, getStream());
+ endSection(SubSection);
+ }
+
endSection(Section);
}
@@ -923,6 +931,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
unsigned NumFuncImports = 0;
unsigned NumGlobalImports = 0;
SmallVector<char, 0> DataBytes;
+ uint32_t DataAlignment = 1;
uint32_t StackPointerGlobal = 0;
bool HasStackPointer = false;
@@ -1157,6 +1166,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
report_fatal_error("data sections must contain at most one variable");
DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment()));
+ DataAlignment = std::max(DataAlignment, DataSection.getAlignment());
DataSection.setSectionOffset(DataBytes.size());
@@ -1272,7 +1282,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
writeNameSection(Functions, Imports, NumFuncImports);
writeCodeRelocSection();
writeDataRelocSection(DataSectionHeaderSize);
- writeLinkingMetaDataSection(WeakSymbols, HasStackPointer, StackPointerGlobal);
+ writeLinkingMetaDataSection(DataBytes.size(), DataAlignment, WeakSymbols, HasStackPointer, StackPointerGlobal);
// TODO: Translate the .comment section to the output.
// TODO: Translate debug sections to the output.
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 1d08a9efd8b3..fd5e7707c541 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -27,4 +27,5 @@ add_llvm_library(LLVMObject
DEPENDS
intrinsics_gen
+ llvm_vcsrevision_h
)
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 9a760d86e7e2..1e9b0c5b0454 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -883,7 +883,7 @@ base_reloc_iterator COFFObjectFile::base_reloc_end() const {
}
uint8_t COFFObjectFile::getBytesInAddress() const {
- return getArch() == Triple::x86_64 ? 8 : 4;
+ return getArch() == Triple::x86_64 || getArch() == Triple::aarch64 ? 8 : 4;
}
StringRef COFFObjectFile::getFileFormatName() const {
@@ -1216,6 +1216,29 @@ void COFFObjectFile::getRelocationTypeName(
Res = "Unknown";
}
break;
+ case COFF::IMAGE_FILE_MACHINE_ARM64:
+ switch (Reloc->Type) {
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ABSOLUTE);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32NB);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH26);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEBASE_REL21);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_REL21);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEOFFSET_12A);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEOFFSET_12L);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_LOW12A);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_HIGH12A);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_LOW12L);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_TOKEN);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECTION);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR64);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH19);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH14);
+ default:
+ Res = "Unknown";
+ }
+ break;
case COFF::IMAGE_FILE_MACHINE_I386:
switch (Reloc->Type) {
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE);
diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp
index 7bca032a7be1..7a6424a76a98 100644
--- a/lib/Object/IRSymtab.cpp
+++ b/lib/Object/IRSymtab.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <string>
@@ -43,6 +44,21 @@ using namespace irsymtab;
namespace {
+const char *getExpectedProducerName() {
+ static char DefaultName[] = LLVM_VERSION_STRING
+#ifdef LLVM_REVISION
+ " " LLVM_REVISION
+#endif
+ ;
+ // Allows for testing of the irsymtab writer and upgrade mechanism. This
+ // environment variable should not be set by users.
+ if (char *OverrideName = getenv("LLVM_OVERRIDE_PRODUCER"))
+ return OverrideName;
+ return DefaultName;
+}
+
+const char *kExpectedProducerName = getExpectedProducerName();
+
/// Stores the temporary state that is required to build an IR symbol table.
struct Builder {
SmallVector<char, 0> &Symtab;
@@ -231,6 +247,8 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
storage::Header Hdr;
assert(!IRMods.empty());
+ Hdr.Version = storage::Header::kCurrentVersion;
+ setStr(Hdr.Producer, kExpectedProducerName);
setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple());
setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
TT = Triple(IRMods[0]->getTargetTriple());
@@ -300,7 +318,31 @@ Expected<FileContents> irsymtab::readBitcode(const BitcodeFileContents &BFC) {
return make_error<StringError>("Bitcode file does not contain any modules",
inconvertibleErrorCode());
- // Right now we have no on-disk representation of symbol tables, so we always
- // upgrade.
- return upgrade(BFC.Mods);
+ if (BFC.StrtabForSymtab.empty() ||
+ BFC.Symtab.size() < sizeof(storage::Header))
+ return upgrade(BFC.Mods);
+
+ // We cannot use the regular reader to read the version and producer, because
+ // it will expect the header to be in the current format. The only thing we
+ // can rely on is that the version and producer will be present as the first
+ // struct elements.
+ auto *Hdr = reinterpret_cast<const storage::Header *>(BFC.Symtab.data());
+ unsigned Version = Hdr->Version;
+ StringRef Producer = Hdr->Producer.get(BFC.StrtabForSymtab);
+ if (Version != storage::Header::kCurrentVersion ||
+ Producer != kExpectedProducerName)
+ return upgrade(BFC.Mods);
+
+ FileContents FC;
+ FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()},
+ {BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}};
+
+ // Finally, make sure that the number of modules in the symbol table matches
+ // the number of modules in the bitcode file. If they differ, it may mean that
+ // the bitcode file was created by binary concatenation, so we need to create
+ // a new symbol table from scratch.
+ if (FC.TheReader.getNumModules() != BFC.Mods.size())
+ return upgrade(std::move(BFC.Mods));
+
+ return std::move(FC);
}
diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp
index d15860674aeb..fff497ba5564 100644
--- a/lib/Object/WasmObjectFile.cpp
+++ b/lib/Object/WasmObjectFile.cpp
@@ -193,6 +193,9 @@ static Error readSection(WasmSection &Section, const uint8_t *&Ptr,
WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
: ObjectFile(Binary::ID_Wasm, Buffer) {
+ LinkingData.DataAlignment = 0;
+ LinkingData.DataSize = 0;
+
ErrorAsOutParameter ErrAsOutParam(&Err);
Header.Magic = getData().substr(0, 4);
if (Header.Magic != StringRef("\0asm", 4)) {
@@ -291,6 +294,7 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) {
Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr,
const uint8_t *End) {
+ HasLinkingSection = true;
while (Ptr < End) {
uint8_t Type = readVarint7(Ptr);
uint32_t Size = readVaruint32(Ptr);
@@ -305,7 +309,7 @@ Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr,
auto iter = SymbolMap.find(Symbol);
if (iter == SymbolMap.end()) {
return make_error<GenericBinaryError>(
- "Invalid symbol name in linking section",
+ "Invalid symbol name in linking section: " + Symbol,
object_error::parse_failed);
}
uint32_t SymIndex = iter->second;
@@ -318,6 +322,12 @@ Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr,
}
break;
}
+ case wasm::WASM_DATA_SIZE:
+ LinkingData.DataSize = readVaruint32(Ptr);
+ break;
+ case wasm::WASM_DATA_ALIGNMENT:
+ LinkingData.DataAlignment = readVaruint32(Ptr);
+ break;
case wasm::WASM_STACK_POINTER:
default:
Ptr += Size;
@@ -941,7 +951,9 @@ SubtargetFeatures WasmObjectFile::getFeatures() const {
return SubtargetFeatures();
}
-bool WasmObjectFile::isRelocatableObject() const { return false; }
+bool WasmObjectFile::isRelocatableObject() const {
+ return HasLinkingSection;
+}
const WasmSection &WasmObjectFile::getWasmSection(DataRefImpl Ref) const {
assert(Ref.d.a < Sections.size());
diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp
index ff9b9ca35eb5..1371eacdf8f2 100644
--- a/lib/Object/WindowsResource.cpp
+++ b/lib/Object/WindowsResource.cpp
@@ -563,7 +563,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() {
Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset);
strncpy(Symbol->Name.ShortName, RelocationName, (size_t)COFF::NameSize);
Symbol->Value = DataOffsets[i];
- Symbol->SectionNumber = 1;
+ Symbol->SectionNumber = 2;
Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL;
Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
Symbol->NumberOfAuxSymbols = 0;
diff --git a/lib/ObjectYAML/COFFYAML.cpp b/lib/ObjectYAML/COFFYAML.cpp
index c8cbea1490f6..1103159fc98d 100644
--- a/lib/ObjectYAML/COFFYAML.cpp
+++ b/lib/ObjectYAML/COFFYAML.cpp
@@ -12,17 +12,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/COFFYAML.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cstdint>
+#include <cstring>
#define ECase(X) IO.enumCase(Value, #X, COFF::X);
+
namespace llvm {
namespace COFFYAML {
+
Section::Section() { memset(&Header, 0, sizeof(COFF::section)); }
Symbol::Symbol() { memset(&Header, 0, sizeof(COFF::symbol)); }
Object::Object() { memset(&Header, 0, sizeof(COFF::header)); }
-}
+
+} // end namespace COFFYAML
namespace yaml {
+
void ScalarEnumerationTraits<COFFYAML::COMDATType>::enumeration(
IO &IO, COFFYAML::COMDATType &Value) {
IO.enumCase(Value, "0", 0);
@@ -172,20 +180,20 @@ void ScalarEnumerationTraits<COFF::RelocationTypeAMD64>::enumeration(
void ScalarEnumerationTraits<COFF::WindowsSubsystem>::enumeration(
IO &IO, COFF::WindowsSubsystem &Value) {
- ECase(IMAGE_SUBSYSTEM_UNKNOWN);
- ECase(IMAGE_SUBSYSTEM_NATIVE);
- ECase(IMAGE_SUBSYSTEM_WINDOWS_GUI);
- ECase(IMAGE_SUBSYSTEM_WINDOWS_CUI);
- ECase(IMAGE_SUBSYSTEM_OS2_CUI);
- ECase(IMAGE_SUBSYSTEM_POSIX_CUI);
- ECase(IMAGE_SUBSYSTEM_NATIVE_WINDOWS);
- ECase(IMAGE_SUBSYSTEM_WINDOWS_CE_GUI);
- ECase(IMAGE_SUBSYSTEM_EFI_APPLICATION);
- ECase(IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER);
- ECase(IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER);
- ECase(IMAGE_SUBSYSTEM_EFI_ROM);
- ECase(IMAGE_SUBSYSTEM_XBOX);
- ECase(IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION);
+ ECase(IMAGE_SUBSYSTEM_UNKNOWN);
+ ECase(IMAGE_SUBSYSTEM_NATIVE);
+ ECase(IMAGE_SUBSYSTEM_WINDOWS_GUI);
+ ECase(IMAGE_SUBSYSTEM_WINDOWS_CUI);
+ ECase(IMAGE_SUBSYSTEM_OS2_CUI);
+ ECase(IMAGE_SUBSYSTEM_POSIX_CUI);
+ ECase(IMAGE_SUBSYSTEM_NATIVE_WINDOWS);
+ ECase(IMAGE_SUBSYSTEM_WINDOWS_CE_GUI);
+ ECase(IMAGE_SUBSYSTEM_EFI_APPLICATION);
+ ECase(IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER);
+ ECase(IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER);
+ ECase(IMAGE_SUBSYSTEM_EFI_ROM);
+ ECase(IMAGE_SUBSYSTEM_XBOX);
+ ECase(IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION);
}
#undef ECase
@@ -252,12 +260,15 @@ void ScalarBitSetTraits<COFF::DLLCharacteristics>::bitset(
#undef BCase
namespace {
+
struct NSectionSelectionType {
NSectionSelectionType(IO &)
: SelectionType(COFFYAML::COMDATType(0)) {}
NSectionSelectionType(IO &, uint8_t C)
: SelectionType(COFFYAML::COMDATType(C)) {}
+
uint8_t denormalize(IO &) { return SelectionType; }
+
COFFYAML::COMDATType SelectionType;
};
@@ -266,7 +277,9 @@ struct NWeakExternalCharacteristics {
: Characteristics(COFFYAML::WeakExternalCharacteristics(0)) {}
NWeakExternalCharacteristics(IO &, uint32_t C)
: Characteristics(COFFYAML::WeakExternalCharacteristics(C)) {}
+
uint32_t denormalize(IO &) { return Characteristics; }
+
COFFYAML::WeakExternalCharacteristics Characteristics;
};
@@ -275,7 +288,9 @@ struct NSectionCharacteristics {
: Characteristics(COFF::SectionCharacteristics(0)) {}
NSectionCharacteristics(IO &, uint32_t C)
: Characteristics(COFF::SectionCharacteristics(C)) {}
+
uint32_t denormalize(IO &) { return Characteristics; }
+
COFF::SectionCharacteristics Characteristics;
};
@@ -284,13 +299,16 @@ struct NAuxTokenType {
: AuxType(COFFYAML::AuxSymbolType(0)) {}
NAuxTokenType(IO &, uint8_t C)
: AuxType(COFFYAML::AuxSymbolType(C)) {}
+
uint32_t denormalize(IO &) { return AuxType; }
+
COFFYAML::AuxSymbolType AuxType;
};
struct NStorageClass {
NStorageClass(IO &) : StorageClass(COFF::SymbolStorageClass(0)) {}
NStorageClass(IO &, uint8_t S) : StorageClass(COFF::SymbolStorageClass(S)) {}
+
uint8_t denormalize(IO &) { return StorageClass; }
COFF::SymbolStorageClass StorageClass;
@@ -299,7 +317,9 @@ struct NStorageClass {
struct NMachine {
NMachine(IO &) : Machine(COFF::MachineTypes(0)) {}
NMachine(IO &, uint16_t M) : Machine(COFF::MachineTypes(M)) {}
+
uint16_t denormalize(IO &) { return Machine; }
+
COFF::MachineTypes Machine;
};
@@ -307,6 +327,7 @@ struct NHeaderCharacteristics {
NHeaderCharacteristics(IO &) : Characteristics(COFF::Characteristics(0)) {}
NHeaderCharacteristics(IO &, uint16_t C)
: Characteristics(COFF::Characteristics(C)) {}
+
uint16_t denormalize(IO &) { return Characteristics; }
COFF::Characteristics Characteristics;
@@ -316,13 +337,16 @@ template <typename RelocType>
struct NType {
NType(IO &) : Type(RelocType(0)) {}
NType(IO &, uint16_t T) : Type(RelocType(T)) {}
+
uint16_t denormalize(IO &) { return Type; }
+
RelocType Type;
};
struct NWindowsSubsystem {
NWindowsSubsystem(IO &) : Subsystem(COFF::WindowsSubsystem(0)) {}
NWindowsSubsystem(IO &, uint16_t C) : Subsystem(COFF::WindowsSubsystem(C)) {}
+
uint16_t denormalize(IO &) { return Subsystem; }
COFF::WindowsSubsystem Subsystem;
@@ -332,12 +356,13 @@ struct NDLLCharacteristics {
NDLLCharacteristics(IO &) : Characteristics(COFF::DLLCharacteristics(0)) {}
NDLLCharacteristics(IO &, uint16_t C)
: Characteristics(COFF::DLLCharacteristics(C)) {}
+
uint16_t denormalize(IO &) { return Characteristics; }
COFF::DLLCharacteristics Characteristics;
};
-}
+} // end anonymous namespace
void MappingTraits<COFFYAML::Relocation>::mapping(IO &IO,
COFFYAML::Relocation &Rel) {
@@ -509,5 +534,6 @@ void MappingTraits<COFFYAML::Object>::mapping(IO &IO, COFFYAML::Object &Obj) {
IO.mapRequired("symbols", Obj.Symbols);
}
-}
-}
+} // end namespace yaml
+
+} // end namespace llvm
diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
index d194420d5ef4..60b0ea28030a 100644
--- a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
@@ -13,9 +13,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/CodeViewYAMLDebugSections.h"
-
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
@@ -24,15 +26,29 @@
#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h"
#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h"
-#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/ObjectYAML/CodeViewYAMLSymbols.h"
-#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
using namespace llvm;
using namespace llvm::codeview;
using namespace llvm::CodeViewYAML;
@@ -48,9 +64,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeSite)
LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(CrossModuleExport)
LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLCrossModuleImport)
-LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLFrameData)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
LLVM_YAML_DECLARE_SCALAR_TRAITS(HexFormattedString, false)
LLVM_YAML_DECLARE_ENUM_TRAITS(DebugSubsectionKind)
@@ -70,21 +84,25 @@ LLVM_YAML_DECLARE_MAPPING_TRAITS(InlineeSite)
namespace llvm {
namespace CodeViewYAML {
namespace detail {
+
struct YAMLSubsectionBase {
explicit YAMLSubsectionBase(DebugSubsectionKind Kind) : Kind(Kind) {}
- DebugSubsectionKind Kind;
- virtual ~YAMLSubsectionBase() {}
+ virtual ~YAMLSubsectionBase() = default;
virtual void map(IO &IO) = 0;
virtual std::shared_ptr<DebugSubsection>
toCodeViewSubsection(BumpPtrAllocator &Allocator,
const codeview::StringsAndChecksums &SC) const = 0;
+
+ DebugSubsectionKind Kind;
};
-}
-}
-}
+
+} // end namespace detail
+} // end namespace CodeViewYAML
+} // end namespace llvm
namespace {
+
struct YAMLChecksumsSubsection : public YAMLSubsectionBase {
YAMLChecksumsSubsection()
: YAMLSubsectionBase(DebugSubsectionKind::FileChecksums) {}
@@ -215,7 +233,8 @@ struct YAMLCoffSymbolRVASubsection : public YAMLSubsectionBase {
std::vector<uint32_t> RVAs;
};
-}
+
+} // end anonymous namespace
void ScalarBitSetTraits<LineFlags>::bitset(IO &io, LineFlags &Flags) {
io.bitSetCase(Flags, "HasColumnInfo", LF_HaveColumns);
@@ -743,8 +762,9 @@ llvm::CodeViewYAML::toCodeViewSubsectionList(
}
namespace {
+
struct SubsectionConversionVisitor : public DebugSubsectionVisitor {
- SubsectionConversionVisitor() {}
+ SubsectionConversionVisitor() = default;
Error visitUnknown(DebugUnknownSubsectionRef &Unknown) override;
Error visitLines(DebugLinesSubsectionRef &Lines,
@@ -769,6 +789,8 @@ struct SubsectionConversionVisitor : public DebugSubsectionVisitor {
YAMLDebugSubsection Subsection;
};
+} // end anonymous namespace
+
Error SubsectionConversionVisitor::visitUnknown(
DebugUnknownSubsectionRef &Unknown) {
return make_error<CodeViewError>(cv_error_code::operation_unsupported);
@@ -865,7 +887,6 @@ Error SubsectionConversionVisitor::visitCOFFSymbolRVAs(
Subsection.Subsection = *Result;
return Error::success();
}
-}
Expected<YAMLDebugSubsection>
YAMLDebugSubsection::fromCodeViewSubection(const StringsAndChecksumsRef &SC,
diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 83f3d55b8e55..dbe4e2a6d6fd 100644
--- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -13,13 +13,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/CodeViewYAMLSymbols.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <vector>
using namespace llvm;
using namespace llvm::codeview;
@@ -27,7 +39,6 @@ using namespace llvm::CodeViewYAML;
using namespace llvm::CodeViewYAML::detail;
using namespace llvm::yaml;
-LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(TypeIndex)
// We only need to declare these, the definitions are in CodeViewYAMLTypes.cpp
@@ -49,15 +60,16 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(RegisterId)
LLVM_YAML_DECLARE_ENUM_TRAITS(TrampolineType)
LLVM_YAML_DECLARE_ENUM_TRAITS(ThunkOrdinal)
-LLVM_YAML_STRONG_TYPEDEF(llvm::StringRef, TypeName)
+LLVM_YAML_STRONG_TYPEDEF(StringRef, TypeName)
LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeName, true)
StringRef ScalarTraits<TypeName>::input(StringRef S, void *V, TypeName &T) {
return ScalarTraits<StringRef>::input(S, V, T.value);
}
+
void ScalarTraits<TypeName>::output(const TypeName &T, void *V,
- llvm::raw_ostream &R) {
+ raw_ostream &R) {
ScalarTraits<StringRef>::output(T.value, V, R);
}
@@ -174,9 +186,10 @@ namespace detail {
struct SymbolRecordBase {
codeview::SymbolKind Kind;
+
explicit SymbolRecordBase(codeview::SymbolKind K) : Kind(K) {}
+ virtual ~SymbolRecordBase() = default;
- virtual ~SymbolRecordBase() {}
virtual void map(yaml::IO &io) = 0;
virtual codeview::CVSymbol
toCodeViewSymbol(BumpPtrAllocator &Allocator,
@@ -195,6 +208,7 @@ template <typename T> struct SymbolRecordImpl : public SymbolRecordBase {
CodeViewContainer Container) const override {
return SymbolSerializer::writeOneSymbol(Symbol, Allocator, Container);
}
+
Error fromCodeViewSymbol(codeview::CVSymbol CVS) override {
return SymbolDeserializer::deserializeAs<T>(CVS, Symbol);
}
@@ -218,6 +232,7 @@ struct UnknownSymbolRecord : public SymbolRecordBase {
::memcpy(Buffer + sizeof(RecordPrefix), Data.data(), Data.size());
return CVSymbol(Kind, ArrayRef<uint8_t>(Buffer, TotalLen));
}
+
Error fromCodeViewSymbol(CVSymbol CVS) override {
this->Kind = CVS.kind();
Data = CVS.RecordData.drop_front(sizeof(RecordPrefix));
@@ -497,9 +512,10 @@ template <> void SymbolRecordImpl<ThreadLocalDataSym>::map(IO &IO) {
IO.mapOptional("Segment", Symbol.Segment, uint16_t(0));
IO.mapRequired("DisplayName", Symbol.Name);
}
-}
-}
-}
+
+} // end namespace detail
+} // end namespace CodeViewYAML
+} // end namespace llvm
CVSymbol CodeViewYAML::SymbolRecord::toCodeViewSymbol(
BumpPtrAllocator &Allocator, CodeViewContainer Container) const {
@@ -508,11 +524,13 @@ CVSymbol CodeViewYAML::SymbolRecord::toCodeViewSymbol(
namespace llvm {
namespace yaml {
+
template <> struct MappingTraits<SymbolRecordBase> {
static void mapping(IO &io, SymbolRecordBase &Record) { Record.map(io); }
};
-}
-}
+
+} // end namespace yaml
+} // end namespace llvm
template <typename SymbolType>
static inline Expected<CodeViewYAML::SymbolRecord>
diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
index 2d1cb4b1b27b..0b2ea61c5fe0 100644
--- a/lib/ObjectYAML/CodeViewYAMLTypes.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
@@ -13,14 +13,29 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/CodeViewYAMLTypes.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/EnumTables.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <vector>
using namespace llvm;
using namespace llvm::codeview;
@@ -29,7 +44,6 @@ using namespace llvm::CodeViewYAML::detail;
using namespace llvm::yaml;
LLVM_YAML_IS_SEQUENCE_VECTOR(OneMethodRecord)
-LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
LLVM_YAML_IS_SEQUENCE_VECTOR(VFTableSlotKind)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(TypeIndex)
@@ -63,9 +77,10 @@ namespace detail {
struct LeafRecordBase {
TypeLeafKind Kind;
+
explicit LeafRecordBase(TypeLeafKind K) : Kind(K) {}
+ virtual ~LeafRecordBase() = default;
- virtual ~LeafRecordBase() {}
virtual void map(yaml::IO &io) = 0;
virtual CVType toCodeViewRecord(TypeTableBuilder &TTB) const = 0;
virtual Error fromCodeViewRecord(CVType Type) = 0;
@@ -101,9 +116,10 @@ template <> struct LeafRecordImpl<FieldListRecord> : public LeafRecordBase {
struct MemberRecordBase {
TypeLeafKind Kind;
+
explicit MemberRecordBase(TypeLeafKind K) : Kind(K) {}
+ virtual ~MemberRecordBase() = default;
- virtual ~MemberRecordBase() {}
virtual void map(yaml::IO &io) = 0;
virtual void writeTo(FieldListRecordBuilder &FLRB) = 0;
};
@@ -111,6 +127,7 @@ struct MemberRecordBase {
template <typename T> struct MemberRecordImpl : public MemberRecordBase {
explicit MemberRecordImpl(TypeLeafKind K)
: MemberRecordBase(K), Record(static_cast<TypeRecordKind>(K)) {}
+
void map(yaml::IO &io) override;
void writeTo(FieldListRecordBuilder &FLRB) override {
@@ -119,12 +136,13 @@ template <typename T> struct MemberRecordImpl : public MemberRecordBase {
mutable T Record;
};
-}
-}
-}
+
+} // end namespace detail
+} // end namespace CodeViewYAML
+} // end namespace llvm
void ScalarTraits<TypeIndex>::output(const TypeIndex &S, void *,
- llvm::raw_ostream &OS) {
+ raw_ostream &OS) {
OS << S.getIndex();
}
@@ -136,8 +154,7 @@ StringRef ScalarTraits<TypeIndex>::input(StringRef Scalar, void *Ctx,
return Result;
}
-void ScalarTraits<APSInt>::output(const APSInt &S, void *,
- llvm::raw_ostream &OS) {
+void ScalarTraits<APSInt>::output(const APSInt &S, void *, raw_ostream &OS) {
S.print(OS, S.isSigned());
}
@@ -346,6 +363,7 @@ void MappingTraits<MemberPointerInfo>::mapping(IO &IO, MemberPointerInfo &MPI) {
namespace llvm {
namespace CodeViewYAML {
namespace detail {
+
template <> void LeafRecordImpl<ModifierRecord>::map(IO &IO) {
IO.mapRequired("ModifiedType", Record.ModifiedType);
IO.mapRequired("Modifiers", Record.Modifiers);
@@ -404,11 +422,13 @@ template <> void LeafRecordImpl<ArrayRecord>::map(IO &IO) {
void LeafRecordImpl<FieldListRecord>::map(IO &IO) {
IO.mapRequired("FieldList", Members);
}
-}
-}
-}
+
+} // end namespace detail
+} // end namespace CodeViewYAML
+} // end namespace llvm
namespace {
+
class MemberRecordConversionVisitor : public TypeVisitorCallbacks {
public:
explicit MemberRecordConversionVisitor(std::vector<MemberRecord> &Records)
@@ -433,7 +453,8 @@ private:
std::vector<MemberRecord> &Records;
};
-}
+
+} // end anonymous namespace
Error LeafRecordImpl<FieldListRecord>::fromCodeViewRecord(CVType Type) {
MemberRecordConversionVisitor V(Members);
@@ -461,13 +482,13 @@ void MappingTraits<OneMethodRecord>::mapping(IO &io, OneMethodRecord &Record) {
namespace llvm {
namespace CodeViewYAML {
namespace detail {
+
template <> void LeafRecordImpl<ClassRecord>::map(IO &IO) {
IO.mapRequired("MemberCount", Record.MemberCount);
IO.mapRequired("Options", Record.Options);
IO.mapRequired("FieldList", Record.FieldList);
IO.mapRequired("Name", Record.Name);
IO.mapRequired("UniqueName", Record.UniqueName);
-
IO.mapRequired("DerivationList", Record.DerivationList);
IO.mapRequired("VTableShape", Record.VTableShape);
IO.mapRequired("Size", Record.Size);
@@ -479,7 +500,6 @@ template <> void LeafRecordImpl<UnionRecord>::map(IO &IO) {
IO.mapRequired("FieldList", Record.FieldList);
IO.mapRequired("Name", Record.Name);
IO.mapRequired("UniqueName", Record.UniqueName);
-
IO.mapRequired("Size", Record.Size);
}
@@ -489,7 +509,6 @@ template <> void LeafRecordImpl<EnumRecord>::map(IO &IO) {
IO.mapRequired("FieldList", Record.FieldList);
IO.mapRequired("Name", Record.Name);
IO.mapRequired("UniqueName", Record.UniqueName);
-
IO.mapRequired("UnderlyingType", Record.UnderlyingType);
}
@@ -603,9 +622,10 @@ template <> void MemberRecordImpl<VirtualBaseClassRecord>::map(IO &IO) {
template <> void MemberRecordImpl<ListContinuationRecord>::map(IO &IO) {
IO.mapRequired("ContinuationIndex", Record.ContinuationIndex);
}
-}
-}
-}
+
+} // end namespace detail
+} // end namespace CodeViewYAML
+} // end namespace llvm
template <typename T>
static inline Expected<LeafRecord> fromCodeViewRecordImpl(CVType Type) {
@@ -628,7 +648,8 @@ Expected<LeafRecord> LeafRecord::fromCodeViewRecord(CVType Type) {
#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName)
switch (Type.kind()) {
#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
- default: { llvm_unreachable("Unknown leaf kind!"); }
+ default:
+ llvm_unreachable("Unknown leaf kind!");
}
return make_error<CodeViewError>(cv_error_code::corrupt_record);
}
@@ -644,6 +665,7 @@ CVType LeafRecord::toCodeViewRecord(TypeTableBuilder &TTB) const {
namespace llvm {
namespace yaml {
+
template <> struct MappingTraits<LeafRecordBase> {
static void mapping(IO &io, LeafRecordBase &Record) { Record.map(io); }
};
@@ -651,8 +673,9 @@ template <> struct MappingTraits<LeafRecordBase> {
template <> struct MappingTraits<MemberRecordBase> {
static void mapping(IO &io, MemberRecordBase &Record) { Record.map(io); }
};
-}
-}
+
+} // end namespace yaml
+} // end namespace llvm
template <typename ConcreteType>
static void mapLeafRecordImpl(IO &IO, const char *Class, TypeLeafKind Kind,
diff --git a/lib/ObjectYAML/DWARFEmitter.cpp b/lib/ObjectYAML/DWARFEmitter.cpp
index 91c928771a65..89fc652035ca 100644
--- a/lib/ObjectYAML/DWARFEmitter.cpp
+++ b/lib/ObjectYAML/DWARFEmitter.cpp
@@ -13,15 +13,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/DWARFEmitter.h"
+#include "DWARFVisitor.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ObjectYAML/DWARFYAML.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/Host.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
-
-#include "DWARFVisitor.h"
-
#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
using namespace llvm;
@@ -127,7 +137,7 @@ class DumpVisitor : public DWARFYAML::ConstVisitor {
raw_ostream &OS;
protected:
- virtual void onStartCompileUnit(const DWARFYAML::Unit &CU) {
+ void onStartCompileUnit(const DWARFYAML::Unit &CU) override {
writeInitialLength(CU.Length, OS, DebugInfo.IsLittleEndian);
writeInteger((uint16_t)CU.Version, OS, DebugInfo.IsLittleEndian);
if(CU.Version >= 5) {
@@ -141,41 +151,43 @@ protected:
}
- virtual void onStartDIE(const DWARFYAML::Unit &CU,
- const DWARFYAML::Entry &DIE) {
+ void onStartDIE(const DWARFYAML::Unit &CU,
+ const DWARFYAML::Entry &DIE) override {
encodeULEB128(DIE.AbbrCode, OS);
}
- virtual void onValue(const uint8_t U) {
+ void onValue(const uint8_t U) override {
writeInteger(U, OS, DebugInfo.IsLittleEndian);
}
- virtual void onValue(const uint16_t U) {
+ void onValue(const uint16_t U) override {
writeInteger(U, OS, DebugInfo.IsLittleEndian);
}
- virtual void onValue(const uint32_t U) {
+
+ void onValue(const uint32_t U) override {
writeInteger(U, OS, DebugInfo.IsLittleEndian);
}
- virtual void onValue(const uint64_t U, const bool LEB = false) {
+
+ void onValue(const uint64_t U, const bool LEB = false) override {
if (LEB)
encodeULEB128(U, OS);
else
writeInteger(U, OS, DebugInfo.IsLittleEndian);
}
- virtual void onValue(const int64_t S, const bool LEB = false) {
+ void onValue(const int64_t S, const bool LEB = false) override {
if (LEB)
encodeSLEB128(S, OS);
else
writeInteger(S, OS, DebugInfo.IsLittleEndian);
}
- virtual void onValue(const StringRef String) {
+ void onValue(const StringRef String) override {
OS.write(String.data(), String.size());
OS.write('\0');
}
- virtual void onValue(const MemoryBufferRef MBR) {
+ void onValue(const MemoryBufferRef MBR) override {
OS.write(MBR.getBufferStart(), MBR.getBufferSize());
}
@@ -280,7 +292,7 @@ void DWARFYAML::EmitDebugLine(raw_ostream &OS, const DWARFYAML::Data &DI) {
}
}
-typedef void (*EmitFuncType)(raw_ostream &, const DWARFYAML::Data &);
+using EmitFuncType = void (*)(raw_ostream &, const DWARFYAML::Data &);
static void
EmitDebugSectionImpl(const DWARFYAML::Data &DI, EmitFuncType EmitFunc,
diff --git a/lib/ObjectYAML/DWARFYAML.cpp b/lib/ObjectYAML/DWARFYAML.cpp
index edb9545f14b1..d6c09e1a35d7 100644
--- a/lib/ObjectYAML/DWARFYAML.cpp
+++ b/lib/ObjectYAML/DWARFYAML.cpp
@@ -171,6 +171,6 @@ void MappingTraits<DWARFYAML::InitialLength>::mapping(
IO.mapRequired("TotalLength64", InitialLength.TotalLength64);
}
-} // namespace llvm::yaml
+} // end namespace yaml
-} // namespace llvm
+} // end namespace llvm
diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp
index dbd5498e003d..39741dab327a 100644
--- a/lib/ObjectYAML/ELFYAML.cpp
+++ b/lib/ObjectYAML/ELFYAML.cpp
@@ -12,12 +12,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/ELFYAML.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MipsABIFlags.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cassert>
+#include <cstdint>
namespace llvm {
-ELFYAML::Section::~Section() {}
+ELFYAML::Section::~Section() = default;
namespace yaml {
@@ -542,6 +548,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
llvm_unreachable("Unsupported architecture");
}
#undef ELF_RELOC
+ IO.enumFallback<Hex32>(Value);
}
void ScalarEnumerationTraits<ELFYAML::MIPS_AFL_REG>::enumeration(
@@ -643,6 +650,7 @@ void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
}
namespace {
+
struct NormalizedOther {
NormalizedOther(IO &)
: Visibility(ELFYAML::ELF_STV(0)), Other(ELFYAML::ELF_STO(0)) {}
@@ -654,7 +662,8 @@ struct NormalizedOther {
ELFYAML::ELF_STV Visibility;
ELFYAML::ELF_STO Other;
};
-}
+
+} // end anonymous namespace
void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
IO.mapOptional("Name", Symbol.Name, StringRef());
@@ -777,6 +786,7 @@ StringRef MappingTraits<std::unique_ptr<ELFYAML::Section>>::validate(
}
namespace {
+
struct NormalizedMips64RelType {
NormalizedMips64RelType(IO &)
: Type(ELFYAML::ELF_REL(ELF::R_MIPS_NONE)),
@@ -797,7 +807,8 @@ struct NormalizedMips64RelType {
ELFYAML::ELF_REL Type3;
ELFYAML::ELF_RSS SpecSym;
};
-}
+
+} // end anonymous namespace
void MappingTraits<ELFYAML::Relocation>::mapping(IO &IO,
ELFYAML::Relocation &Rel) {
@@ -838,4 +849,5 @@ LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_ASE)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_FLAGS1)
} // end namespace yaml
+
} // end namespace llvm
diff --git a/lib/ObjectYAML/MachOYAML.cpp b/lib/ObjectYAML/MachOYAML.cpp
index 461684827872..ab452a7bf6ef 100644
--- a/lib/ObjectYAML/MachOYAML.cpp
+++ b/lib/ObjectYAML/MachOYAML.cpp
@@ -12,16 +12,19 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/MachOYAML.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Host.h"
-
-#include <string.h> // For memcpy, memset and strnlen.
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cinttypes>
+#include <cstdint>
+#include <cstring>
namespace llvm {
-MachOYAML::LoadCommand::~LoadCommand() {}
+MachOYAML::LoadCommand::~LoadCommand() = default;
bool MachOYAML::LinkEditData::isEmpty() const {
return 0 ==
@@ -33,7 +36,7 @@ bool MachOYAML::LinkEditData::isEmpty() const {
namespace yaml {
void ScalarTraits<char_16>::output(const char_16 &Val, void *,
- llvm::raw_ostream &Out) {
+ raw_ostream &Out) {
auto Len = strnlen(&Val[0], 16);
Out << StringRef(&Val[0], Len);
}
@@ -51,8 +54,7 @@ StringRef ScalarTraits<char_16>::input(StringRef Scalar, void *, char_16 &Val) {
bool ScalarTraits<char_16>::mustQuote(StringRef S) { return needsQuotes(S); }
-void ScalarTraits<uuid_t>::output(const uuid_t &Val, void *,
- llvm::raw_ostream &Out) {
+void ScalarTraits<uuid_t>::output(const uuid_t &Val, void *, raw_ostream &Out) {
for (int Idx = 0; Idx < 16; ++Idx) {
Out << format("%02" PRIX32, Val[Idx]);
if (Idx == 3 || Idx == 5 || Idx == 7 || Idx == 9)
@@ -154,7 +156,7 @@ void MappingTraits<MachOYAML::LinkEditData>::mapping(
IO.mapOptional("BindOpcodes", LinkEditData.BindOpcodes);
IO.mapOptional("WeakBindOpcodes", LinkEditData.WeakBindOpcodes);
IO.mapOptional("LazyBindOpcodes", LinkEditData.LazyBindOpcodes);
- if(LinkEditData.ExportTrie.Children.size() > 0 || !IO.outputting())
+ if (!LinkEditData.ExportTrie.Children.empty() || !IO.outputting())
IO.mapOptional("ExportTrie", LinkEditData.ExportTrie);
IO.mapOptional("NameList", LinkEditData.NameList);
IO.mapOptional("StringTable", LinkEditData.StringTable);
@@ -308,13 +310,11 @@ void MappingTraits<MachO::dylib_command>::mapping(
void MappingTraits<MachO::dylinker_command>::mapping(
IO &IO, MachO::dylinker_command &LoadCommand) {
-
IO.mapRequired("name", LoadCommand.name);
}
void MappingTraits<MachO::dysymtab_command>::mapping(
IO &IO, MachO::dysymtab_command &LoadCommand) {
-
IO.mapRequired("ilocalsym", LoadCommand.ilocalsym);
IO.mapRequired("nlocalsym", LoadCommand.nlocalsym);
IO.mapRequired("iextdefsym", LoadCommand.iextdefsym);
@@ -337,7 +337,6 @@ void MappingTraits<MachO::dysymtab_command>::mapping(
void MappingTraits<MachO::encryption_info_command>::mapping(
IO &IO, MachO::encryption_info_command &LoadCommand) {
-
IO.mapRequired("cryptoff", LoadCommand.cryptoff);
IO.mapRequired("cryptsize", LoadCommand.cryptsize);
IO.mapRequired("cryptid", LoadCommand.cryptid);
@@ -345,7 +344,6 @@ void MappingTraits<MachO::encryption_info_command>::mapping(
void MappingTraits<MachO::encryption_info_command_64>::mapping(
IO &IO, MachO::encryption_info_command_64 &LoadCommand) {
-
IO.mapRequired("cryptoff", LoadCommand.cryptoff);
IO.mapRequired("cryptsize", LoadCommand.cryptsize);
IO.mapRequired("cryptid", LoadCommand.cryptid);
@@ -354,14 +352,12 @@ void MappingTraits<MachO::encryption_info_command_64>::mapping(
void MappingTraits<MachO::entry_point_command>::mapping(
IO &IO, MachO::entry_point_command &LoadCommand) {
-
IO.mapRequired("entryoff", LoadCommand.entryoff);
IO.mapRequired("stacksize", LoadCommand.stacksize);
}
void MappingTraits<MachO::fvmfile_command>::mapping(
IO &IO, MachO::fvmfile_command &LoadCommand) {
-
IO.mapRequired("name", LoadCommand.name);
IO.mapRequired("header_addr", LoadCommand.header_addr);
}
@@ -374,7 +370,6 @@ void MappingTraits<MachO::fvmlib>::mapping(IO &IO, MachO::fvmlib &FVMLib) {
void MappingTraits<MachO::fvmlib_command>::mapping(
IO &IO, MachO::fvmlib_command &LoadCommand) {
-
IO.mapRequired("fvmlib", LoadCommand.fvmlib);
}
@@ -383,20 +378,17 @@ void MappingTraits<MachO::ident_command>::mapping(
void MappingTraits<MachO::linkedit_data_command>::mapping(
IO &IO, MachO::linkedit_data_command &LoadCommand) {
-
IO.mapRequired("dataoff", LoadCommand.dataoff);
IO.mapRequired("datasize", LoadCommand.datasize);
}
void MappingTraits<MachO::linker_option_command>::mapping(
IO &IO, MachO::linker_option_command &LoadCommand) {
-
IO.mapRequired("count", LoadCommand.count);
}
void MappingTraits<MachO::prebind_cksum_command>::mapping(
IO &IO, MachO::prebind_cksum_command &LoadCommand) {
-
IO.mapRequired("cksum", LoadCommand.cksum);
}
@@ -405,7 +397,6 @@ void MappingTraits<MachO::load_command>::mapping(
void MappingTraits<MachO::prebound_dylib_command>::mapping(
IO &IO, MachO::prebound_dylib_command &LoadCommand) {
-
IO.mapRequired("name", LoadCommand.name);
IO.mapRequired("nmodules", LoadCommand.nmodules);
IO.mapRequired("linked_modules", LoadCommand.linked_modules);
@@ -413,7 +404,6 @@ void MappingTraits<MachO::prebound_dylib_command>::mapping(
void MappingTraits<MachO::routines_command>::mapping(
IO &IO, MachO::routines_command &LoadCommand) {
-
IO.mapRequired("init_address", LoadCommand.init_address);
IO.mapRequired("init_module", LoadCommand.init_module);
IO.mapRequired("reserved1", LoadCommand.reserved1);
@@ -426,7 +416,6 @@ void MappingTraits<MachO::routines_command>::mapping(
void MappingTraits<MachO::routines_command_64>::mapping(
IO &IO, MachO::routines_command_64 &LoadCommand) {
-
IO.mapRequired("init_address", LoadCommand.init_address);
IO.mapRequired("init_module", LoadCommand.init_module);
IO.mapRequired("reserved1", LoadCommand.reserved1);
@@ -439,7 +428,6 @@ void MappingTraits<MachO::routines_command_64>::mapping(
void MappingTraits<MachO::rpath_command>::mapping(
IO &IO, MachO::rpath_command &LoadCommand) {
-
IO.mapRequired("path", LoadCommand.path);
}
@@ -475,7 +463,6 @@ void MappingTraits<MachO::section_64>::mapping(IO &IO,
void MappingTraits<MachO::segment_command>::mapping(
IO &IO, MachO::segment_command &LoadCommand) {
-
IO.mapRequired("segname", LoadCommand.segname);
IO.mapRequired("vmaddr", LoadCommand.vmaddr);
IO.mapRequired("vmsize", LoadCommand.vmsize);
@@ -489,7 +476,6 @@ void MappingTraits<MachO::segment_command>::mapping(
void MappingTraits<MachO::segment_command_64>::mapping(
IO &IO, MachO::segment_command_64 &LoadCommand) {
-
IO.mapRequired("segname", LoadCommand.segname);
IO.mapRequired("vmaddr", LoadCommand.vmaddr);
IO.mapRequired("vmsize", LoadCommand.vmsize);
@@ -503,44 +489,37 @@ void MappingTraits<MachO::segment_command_64>::mapping(
void MappingTraits<MachO::source_version_command>::mapping(
IO &IO, MachO::source_version_command &LoadCommand) {
-
IO.mapRequired("version", LoadCommand.version);
}
void MappingTraits<MachO::sub_client_command>::mapping(
IO &IO, MachO::sub_client_command &LoadCommand) {
-
IO.mapRequired("client", LoadCommand.client);
}
void MappingTraits<MachO::sub_framework_command>::mapping(
IO &IO, MachO::sub_framework_command &LoadCommand) {
-
IO.mapRequired("umbrella", LoadCommand.umbrella);
}
void MappingTraits<MachO::sub_library_command>::mapping(
IO &IO, MachO::sub_library_command &LoadCommand) {
-
IO.mapRequired("sub_library", LoadCommand.sub_library);
}
void MappingTraits<MachO::sub_umbrella_command>::mapping(
IO &IO, MachO::sub_umbrella_command &LoadCommand) {
-
IO.mapRequired("sub_umbrella", LoadCommand.sub_umbrella);
}
void MappingTraits<MachO::symseg_command>::mapping(
IO &IO, MachO::symseg_command &LoadCommand) {
-
IO.mapRequired("offset", LoadCommand.offset);
IO.mapRequired("size", LoadCommand.size);
}
void MappingTraits<MachO::symtab_command>::mapping(
IO &IO, MachO::symtab_command &LoadCommand) {
-
IO.mapRequired("symoff", LoadCommand.symoff);
IO.mapRequired("nsyms", LoadCommand.nsyms);
IO.mapRequired("stroff", LoadCommand.stroff);
@@ -552,27 +531,23 @@ void MappingTraits<MachO::thread_command>::mapping(
void MappingTraits<MachO::twolevel_hints_command>::mapping(
IO &IO, MachO::twolevel_hints_command &LoadCommand) {
-
IO.mapRequired("offset", LoadCommand.offset);
IO.mapRequired("nhints", LoadCommand.nhints);
}
void MappingTraits<MachO::uuid_command>::mapping(
IO &IO, MachO::uuid_command &LoadCommand) {
-
IO.mapRequired("uuid", LoadCommand.uuid);
}
void MappingTraits<MachO::version_min_command>::mapping(
IO &IO, MachO::version_min_command &LoadCommand) {
-
IO.mapRequired("version", LoadCommand.version);
IO.mapRequired("sdk", LoadCommand.sdk);
}
void MappingTraits<MachO::note_command>::mapping(
IO &IO, MachO::note_command &LoadCommand) {
-
IO.mapRequired("data_owner", LoadCommand.data_owner);
IO.mapRequired("offset", LoadCommand.offset);
IO.mapRequired("size", LoadCommand.size);
@@ -580,13 +555,12 @@ void MappingTraits<MachO::note_command>::mapping(
void MappingTraits<MachO::build_version_command>::mapping(
IO &IO, MachO::build_version_command &LoadCommand) {
-
IO.mapRequired("platform", LoadCommand.platform);
IO.mapRequired("minos", LoadCommand.minos);
IO.mapRequired("sdk", LoadCommand.sdk);
IO.mapRequired("ntools", LoadCommand.ntools);
}
-} // namespace llvm::yaml
+} // end namespace yaml
-} // namespace llvm
+} // end namespace llvm
diff --git a/lib/ObjectYAML/ObjectYAML.cpp b/lib/ObjectYAML/ObjectYAML.cpp
index 4b7154ebb7c1..850c1a5a06c0 100644
--- a/lib/ObjectYAML/ObjectYAML.cpp
+++ b/lib/ObjectYAML/ObjectYAML.cpp
@@ -12,7 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/ObjectYAML.h"
-#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <string>
using namespace llvm;
using namespace yaml;
@@ -53,8 +56,8 @@ void MappingTraits<YamlObjectFile>::mapping(IO &IO,
IO.setError("YAML Object File missing document type tag!");
else
IO.setError(
- llvm::Twine("YAML Object File unsupported document type tag '") +
- llvm::Twine(Tag) + llvm::Twine("'!"));
+ Twine("YAML Object File unsupported document type tag '") +
+ Twine(Tag) + Twine("'!"));
}
}
}
diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp
index 65703c6cf683..2040efdc9d11 100644
--- a/lib/ObjectYAML/WasmYAML.cpp
+++ b/lib/ObjectYAML/WasmYAML.cpp
@@ -12,9 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/WasmYAML.h"
-#include "llvm/Object/Wasm.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/MipsABIFlags.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/YAMLTraits.h"
namespace llvm {
@@ -22,7 +23,7 @@ namespace WasmYAML {
// Declared here rather than in the header to comply with:
// http://llvm.org/docs/CodingStandards.html#provide-a-virtual-method-anchor-for-classes-in-headers
-Section::~Section() {}
+Section::~Section() = default;
} // end namespace WasmYAML
@@ -56,6 +57,8 @@ static void sectionMapping(IO &IO, WasmYAML::NameSection &Section) {
static void sectionMapping(IO &IO, WasmYAML::LinkingSection &Section) {
commonSectionMapping(IO, Section);
IO.mapRequired("Name", Section.Name);
+ IO.mapRequired("DataSize", Section.DataSize);
+ IO.mapRequired("DataAlignment", Section.DataAlignment);
IO.mapRequired("SymbolInfo", Section.SymbolInfos);
}
@@ -403,4 +406,5 @@ void ScalarEnumerationTraits<WasmYAML::RelocType>::enumeration(
}
} // end namespace yaml
+
} // end namespace llvm
diff --git a/lib/ObjectYAML/YAML.cpp b/lib/ObjectYAML/YAML.cpp
index 75cf1fbccc80..67b5764eadaa 100644
--- a/lib/ObjectYAML/YAML.cpp
+++ b/lib/ObjectYAML/YAML.cpp
@@ -16,11 +16,12 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
+#include <cstdint>
using namespace llvm;
void yaml::ScalarTraits<yaml::BinaryRef>::output(
- const yaml::BinaryRef &Val, void *, llvm::raw_ostream &Out) {
+ const yaml::BinaryRef &Val, void *, raw_ostream &Out) {
Val.writeAsHex(Out);
}
@@ -34,7 +35,7 @@ StringRef yaml::ScalarTraits<yaml::BinaryRef>::input(StringRef Scalar, void *,
if (!isxdigit(Scalar[I]))
return "BinaryRef hex string must contain only hex digits.";
Val = yaml::BinaryRef(Scalar);
- return StringRef();
+ return {};
}
void yaml::BinaryRef::writeAsBinary(raw_ostream &OS) const {
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 78d5ea955e64..0380bd991d71 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -161,8 +161,8 @@ static cl::opt<bool>
cl::desc("Run NewGVN instead of GVN"));
static cl::opt<bool> EnableEarlyCSEMemSSA(
- "enable-npm-earlycse-memssa", cl::init(false), cl::Hidden,
- cl::desc("Enable the EarlyCSE w/ MemorySSA pass for the new PM (default = off)"));
+ "enable-npm-earlycse-memssa", cl::init(true), cl::Hidden,
+ cl::desc("Enable the EarlyCSE w/ MemorySSA pass for the new PM (default = on)"));
static cl::opt<bool> EnableGVNHoist(
"enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
@@ -480,6 +480,14 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
MPM.addPass(PGOInstrumentationUse(ProfileUseFile));
}
+static InlineParams
+getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
+ auto O3 = PassBuilder::O3;
+ unsigned OptLevel = Level > O3 ? 2 : Level;
+ unsigned SizeLevel = Level > O3 ? Level - O3 : 0;
+ return getInlineParams(OptLevel, SizeLevel);
+}
+
ModulePassManager
PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
bool DebugLogging) {
@@ -527,13 +535,17 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Add all the requested passes for PGO, if requested.
if (PGOOpt) {
- assert(PGOOpt->RunProfileGen || PGOOpt->SamplePGO ||
+ assert(PGOOpt->RunProfileGen || !PGOOpt->SampleProfileFile.empty() ||
!PGOOpt->ProfileUseFile.empty());
- addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen,
- PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile);
+ if (PGOOpt->SampleProfileFile.empty())
+ addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen,
+ PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile);
+ else
+ MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile));
// Indirect call promotion that promotes intra-module targes only.
- MPM.addPass(PGOIndirectCallPromotion(false, PGOOpt && PGOOpt->SamplePGO));
+ MPM.addPass(PGOIndirectCallPromotion(
+ false, PGOOpt && !PGOOpt->SampleProfileFile.empty()));
}
// Require the GlobalsAA analysis for the module so we can query it within
@@ -558,8 +570,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Run the inliner first. The theory is that we are walking bottom-up and so
// the callees have already been fully optimized, and we want to inline them
// into the callers so that our optimizations can reflect that.
- // FIXME; Customize the threshold based on optimization level.
- MainCGPipeline.addPass(InlinerPass());
+ MainCGPipeline.addPass(InlinerPass(getInlineParamsFromOptLevel(Level)));
// Now deduce any function attributes based in the current code.
MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
@@ -751,9 +762,6 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
// Reduce the size of the IR as much as possible.
MPM.addPass(GlobalOptPass());
- // Rename anon globals to be able to export them in the summary.
- MPM.addPass(NameAnonGlobalPass());
-
return MPM;
}
@@ -772,9 +780,9 @@ PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level,
// During the ThinLTO backend phase we perform early indirect call promotion
// here, before globalopt. Otherwise imported available_externally functions
// look unreferenced and are removed.
- MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */,
- PGOOpt && PGOOpt->SamplePGO &&
- !PGOOpt->ProfileUseFile.empty()));
+ MPM.addPass(PGOIndirectCallPromotion(
+ true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty() &&
+ !PGOOpt->ProfileUseFile.empty()));
// Add the core simplification pipeline.
MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging));
@@ -814,8 +822,8 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// left by the earlier promotion pass that promotes intra-module targets.
// This two-step promotion is to save the compile time. For LTO, it should
// produce the same result as if we only do promotion here.
- MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */,
- PGOOpt && PGOOpt->SamplePGO));
+ MPM.addPass(PGOIndirectCallPromotion(
+ true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty()));
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
@@ -868,7 +876,8 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// valuable as the inliner doesn't currently care whether it is inlining an
// invoke or a call.
// Run the inliner now.
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(InlinerPass()));
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+ InlinerPass(getInlineParamsFromOptLevel(Level))));
// Optimize globals again after we ran the inliner.
MPM.addPass(GlobalOptPass());
diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp
index 4534e086b39e..8c5f136ea270 100644
--- a/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -54,26 +54,26 @@ Counter CounterExpressionBuilder::get(const CounterExpression &E) {
return Counter::getExpression(I);
}
-void CounterExpressionBuilder::extractTerms(
- Counter C, int Sign, SmallVectorImpl<std::pair<unsigned, int>> &Terms) {
+void CounterExpressionBuilder::extractTerms(Counter C, int Factor,
+ SmallVectorImpl<Term> &Terms) {
switch (C.getKind()) {
case Counter::Zero:
break;
case Counter::CounterValueReference:
- Terms.push_back(std::make_pair(C.getCounterID(), Sign));
+ Terms.emplace_back(C.getCounterID(), Factor);
break;
case Counter::Expression:
const auto &E = Expressions[C.getExpressionID()];
- extractTerms(E.LHS, Sign, Terms);
- extractTerms(E.RHS, E.Kind == CounterExpression::Subtract ? -Sign : Sign,
- Terms);
+ extractTerms(E.LHS, Factor, Terms);
+ extractTerms(
+ E.RHS, E.Kind == CounterExpression::Subtract ? -Factor : Factor, Terms);
break;
}
}
Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
// Gather constant terms.
- SmallVector<std::pair<unsigned, int>, 32> Terms;
+ SmallVector<Term, 32> Terms;
extractTerms(ExpressionTree, +1, Terms);
// If there are no terms, this is just a zero. The algorithm below assumes at
@@ -82,17 +82,15 @@ Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
return Counter::getZero();
// Group the terms by counter ID.
- std::sort(Terms.begin(), Terms.end(),
- [](const std::pair<unsigned, int> &LHS,
- const std::pair<unsigned, int> &RHS) {
- return LHS.first < RHS.first;
+ std::sort(Terms.begin(), Terms.end(), [](const Term &LHS, const Term &RHS) {
+ return LHS.CounterID < RHS.CounterID;
});
// Combine terms by counter ID to eliminate counters that sum to zero.
auto Prev = Terms.begin();
for (auto I = Prev + 1, E = Terms.end(); I != E; ++I) {
- if (I->first == Prev->first) {
- Prev->second += I->second;
+ if (I->CounterID == Prev->CounterID) {
+ Prev->Factor += I->Factor;
continue;
}
++Prev;
@@ -103,24 +101,24 @@ Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
Counter C;
// Create additions. We do this before subtractions to avoid constructs like
// ((0 - X) + Y), as opposed to (Y - X).
- for (auto Term : Terms) {
- if (Term.second <= 0)
+ for (auto T : Terms) {
+ if (T.Factor <= 0)
continue;
- for (int I = 0; I < Term.second; ++I)
+ for (int I = 0; I < T.Factor; ++I)
if (C.isZero())
- C = Counter::getCounter(Term.first);
+ C = Counter::getCounter(T.CounterID);
else
C = get(CounterExpression(CounterExpression::Add, C,
- Counter::getCounter(Term.first)));
+ Counter::getCounter(T.CounterID)));
}
// Create subtractions.
- for (auto Term : Terms) {
- if (Term.second >= 0)
+ for (auto T : Terms) {
+ if (T.Factor >= 0)
continue;
- for (int I = 0; I < -Term.second; ++I)
+ for (int I = 0; I < -T.Factor; ++I)
C = get(CounterExpression(CounterExpression::Subtract, C,
- Counter::getCounter(Term.first)));
+ Counter::getCounter(T.CounterID)));
}
return C;
}
@@ -247,18 +245,6 @@ Error CoverageMapping::loadFunctionRecord(
return Error::success();
}
-Expected<std::unique_ptr<CoverageMapping>>
-CoverageMapping::load(CoverageMappingReader &CoverageReader,
- IndexedInstrProfReader &ProfileReader) {
- auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping());
-
- for (const auto &Record : CoverageReader)
- if (Error E = Coverage->loadFunctionRecord(Record, ProfileReader))
- return std::move(E);
-
- return std::move(Coverage);
-}
-
Expected<std::unique_ptr<CoverageMapping>> CoverageMapping::load(
ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders,
IndexedInstrProfReader &ProfileReader) {
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 005061c4f068..a1d18724fcd5 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -504,9 +504,11 @@ void InstrProfRecord::mergeValueProfData(uint32_t ValueKind,
SIPE.addError(instrprof_error::value_site_count_mismatch);
return;
}
+ if (!ThisNumValueSites)
+ return;
std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
- getValueSitesForKind(ValueKind);
- std::vector<InstrProfValueSiteRecord> &OtherSiteRecords =
+ getOrCreateValueSitesForKind(ValueKind);
+ MutableArrayRef<InstrProfValueSiteRecord> OtherSiteRecords =
Src.getValueSitesForKind(ValueKind);
for (uint32_t I = 0; I < ThisNumValueSites; I++)
ThisSiteRecords[I].merge(SIPE, OtherSiteRecords[I], Weight);
@@ -533,11 +535,8 @@ void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight) {
}
void InstrProfRecord::scaleValueProfData(uint32_t ValueKind, uint64_t Weight) {
- uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
- std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
- getValueSitesForKind(ValueKind);
- for (uint32_t I = 0; I < ThisNumValueSites; I++)
- ThisSiteRecords[I].scale(SIPE, Weight);
+ for (auto &R : getValueSitesForKind(ValueKind))
+ R.scale(SIPE, Weight);
}
void InstrProfRecord::scale(uint64_t Weight) {
@@ -583,7 +582,7 @@ void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site,
VData[I].Value = remapValue(VData[I].Value, ValueKind, ValueMap);
}
std::vector<InstrProfValueSiteRecord> &ValueSites =
- getValueSitesForKind(ValueKind);
+ getOrCreateValueSitesForKind(ValueKind);
if (N == 0)
ValueSites.emplace_back();
else
@@ -642,8 +641,9 @@ static ValueProfRecordClosure InstrProfRecordClosure = {
// Wrapper implementation using the closure mechanism.
uint32_t ValueProfData::getSize(const InstrProfRecord &Record) {
- InstrProfRecordClosure.Record = &Record;
- return getValueProfDataSize(&InstrProfRecordClosure);
+ auto Closure = InstrProfRecordClosure;
+ Closure.Record = &Record;
+ return getValueProfDataSize(&Closure);
}
// Wrapper implementation using the closure mechanism.
diff --git a/lib/Support/AMDGPUCodeObjectMetadata.cpp b/lib/Support/AMDGPUCodeObjectMetadata.cpp
index a00e371415a3..863093ab7def 100644
--- a/lib/Support/AMDGPUCodeObjectMetadata.cpp
+++ b/lib/Support/AMDGPUCodeObjectMetadata.cpp
@@ -20,8 +20,6 @@
using namespace llvm::AMDGPU;
using namespace llvm::AMDGPU::CodeObject;
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata)
LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 234f7439a546..232efe648b03 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -327,6 +327,7 @@ enum ProcessorSubtypes {
INTEL_COREI7_SKYLAKE_AVX512,
INTEL_ATOM_BONNELL,
INTEL_ATOM_SILVERMONT,
+ INTEL_ATOM_GOLDMONT,
INTEL_KNIGHTS_LANDING,
AMDPENTIUM_K6,
AMDPENTIUM_K62,
@@ -707,7 +708,12 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
*Type = INTEL_ATOM;
*Subtype = INTEL_ATOM_SILVERMONT;
break; // "silvermont"
-
+ // Goldmont:
+ case 0x5c:
+ case 0x5f:
+ *Type = INTEL_ATOM;
+ *Subtype = INTEL_ATOM_GOLDMONT;
+ break; // "goldmont"
case 0x57:
*Type = INTEL_XEONPHI; // knl
*Subtype = INTEL_KNIGHTS_LANDING;
@@ -1070,6 +1076,8 @@ StringRef sys::getHostCPUName() {
switch (Subtype) {
case INTEL_ATOM_BONNELL:
return "bonnell";
+ case INTEL_ATOM_GOLDMONT:
+ return "goldmont";
case INTEL_ATOM_SILVERMONT:
return "silvermont";
default:
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 227e792d83dc..85e782b2c048 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -240,11 +240,9 @@ getMemoryBufferForStream(int FD, const Twine &BufferName) {
// Read into Buffer until we hit EOF.
do {
Buffer.reserve(Buffer.size() + ChunkSize);
- ReadBytes = read(FD, Buffer.end(), ChunkSize);
- if (ReadBytes == -1) {
- if (errno == EINTR) continue;
+ ReadBytes = sys::RetryAfterSignal(-1, read, FD, Buffer.end(), ChunkSize);
+ if (ReadBytes == -1)
return std::error_code(errno, std::generic_category());
- }
Buffer.set_size(Buffer.size() + ReadBytes);
} while (ReadBytes != 0);
@@ -391,13 +389,12 @@ getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
while (BytesLeft) {
#ifdef HAVE_PREAD
- ssize_t NumRead = ::pread(FD, BufPtr, BytesLeft, MapSize-BytesLeft+Offset);
+ ssize_t NumRead = sys::RetryAfterSignal(-1, ::pread, FD, BufPtr, BytesLeft,
+ MapSize - BytesLeft + Offset);
#else
- ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
+ ssize_t NumRead = sys::RetryAfterSignal(-1, ::read, FD, BufPtr, BytesLeft);
#endif
if (NumRead == -1) {
- if (errno == EINTR)
- continue;
// Error while reading.
return std::error_code(errno, std::generic_category());
}
diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp
index b16351906a4c..13bb6f23bc83 100644
--- a/lib/Support/TargetParser.cpp
+++ b/lib/Support/TargetParser.cpp
@@ -784,6 +784,42 @@ unsigned llvm::ARM::parseArchVersion(StringRef Arch) {
return 0;
}
+StringRef llvm::ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) {
+ StringRef ArchName =
+ CPU.empty() ? TT.getArchName() : ARM::getArchName(ARM::parseCPUArch(CPU));
+
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getEnvironment() == Triple::EABI ||
+ TT.getOS() == Triple::UnknownOS ||
+ llvm::ARM::parseArchProfile(ArchName) == ARM::PK_M)
+ return "aapcs";
+ if (TT.isWatchABI())
+ return "aapcs16";
+ return "apcs-gnu";
+ } else if (TT.isOSWindows())
+ // FIXME: this is invalid for WindowsCE.
+ return "aapcs";
+
+ // Select the default based on the platform.
+ switch (TT.getEnvironment()) {
+ case Triple::Android:
+ case Triple::GNUEABI:
+ case Triple::GNUEABIHF:
+ case Triple::MuslEABI:
+ case Triple::MuslEABIHF:
+ return "aapcs-linux";
+ case Triple::EABIHF:
+ case Triple::EABI:
+ return "aapcs";
+ default:
+ if (TT.isOSNetBSD())
+ return "apcs-gnu";
+ if (TT.isOSOpenBSD())
+ return "aapcs-linux";
+ return "aapcs";
+ }
+}
+
StringRef llvm::AArch64::getCanonicalArchName(StringRef Arch) {
return ARM::getCanonicalArchName(Arch);
}
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index b6774692595b..45097eb918b7 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -737,10 +737,8 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD,
#ifdef O_CLOEXEC
OpenFlags |= O_CLOEXEC;
#endif
- while ((ResultFD = open(P.begin(), OpenFlags)) < 0) {
- if (errno != EINTR)
- return std::error_code(errno, std::generic_category());
- }
+ if ((ResultFD = sys::RetryAfterSignal(-1, open, P.begin(), OpenFlags)) < 0)
+ return std::error_code(errno, std::generic_category());
#ifndef O_CLOEXEC
int r = fcntl(ResultFD, F_SETFD, FD_CLOEXEC);
(void)r;
@@ -800,10 +798,8 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
SmallString<128> Storage;
StringRef P = Name.toNullTerminatedStringRef(Storage);
- while ((ResultFD = open(P.begin(), OpenFlags, Mode)) < 0) {
- if (errno != EINTR)
- return std::error_code(errno, std::generic_category());
- }
+ if ((ResultFD = sys::RetryAfterSignal(-1, open, P.begin(), OpenFlags, Mode)) < 0)
+ return std::error_code(errno, std::generic_category());
#ifndef O_CLOEXEC
int r = fcntl(ResultFD, F_SETFD, FD_CLOEXEC);
(void)r;
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 1d0143c6716e..2d4662094682 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -207,13 +207,10 @@ std::error_code Process::FixupStandardFileDescriptors() {
for (int StandardFD : StandardFDs) {
struct stat st;
errno = 0;
- while (fstat(StandardFD, &st) < 0) {
+ if (RetryAfterSignal(-1, fstat, StandardFD, &st) < 0) {
assert(errno && "expected errno to be set if fstat failed!");
// fstat should return EBADF if the file descriptor is closed.
- if (errno == EBADF)
- break;
- // retry fstat if we got EINTR, otherwise bubble up the failure.
- if (errno != EINTR)
+ if (errno != EBADF)
return std::error_code(errno, std::generic_category());
}
// if fstat succeeds, move on to the next FD.
@@ -222,11 +219,8 @@ std::error_code Process::FixupStandardFileDescriptors() {
assert(errno == EBADF && "expected errno to have EBADF at this point!");
if (NullFD < 0) {
- while ((NullFD = open("/dev/null", O_RDWR)) < 0) {
- if (errno == EINTR)
- continue;
+ if ((NullFD = RetryAfterSignal(-1, open, "/dev/null", O_RDWR)) < 0)
return std::error_code(errno, std::generic_category());
- }
}
if (NullFD == StandardFD)
diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp
index f27bc97ec3f3..0a948812ff33 100644
--- a/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -22,7 +22,7 @@
/// cbz w8, .LBB1_2 -> b.eq .LBB1_2
///
/// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
-/// tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
+/// tbz w8, #31, .LBB6_2 -> b.pl .LBB6_2
///
//===----------------------------------------------------------------------===//
@@ -129,11 +129,11 @@ MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) {
break;
case AArch64::TBZW:
case AArch64::TBZX:
- CC = AArch64CC::GE;
+ CC = AArch64CC::PL;
break;
case AArch64::TBNZW:
case AArch64::TBNZX:
- CC = AArch64CC::LT;
+ CC = AArch64CC::MI;
break;
}
return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc))
@@ -271,6 +271,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
}
break;
}
+ (void)NewCmp; (void)NewBr;
assert(NewCmp && NewBr && "Expected new instructions.");
DEBUG(dbgs() << " with instruction:\n ");
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 00a0111f2bd2..9eda56c825a9 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -139,6 +140,7 @@ class SSACCmpConv {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
+ const MachineBranchProbabilityInfo *MBPI;
public:
/// The first block containing a conditional branch, dominating everything
@@ -186,8 +188,10 @@ private:
public:
/// runOnMachineFunction - Initialize per-function data structures.
- void runOnMachineFunction(MachineFunction &MF) {
+ void runOnMachineFunction(MachineFunction &MF,
+ const MachineBranchProbabilityInfo *MBPI) {
this->MF = &MF;
+ this->MBPI = MBPI;
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
@@ -564,8 +568,40 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
// All CmpBB instructions are moved into Head, and CmpBB is deleted.
// Update the CFG first.
updateTailPHIs();
- Head->removeSuccessor(CmpBB, true);
- CmpBB->removeSuccessor(Tail, true);
+
+ // Save successor probabilties before removing CmpBB and Tail from their
+ // parents.
+ BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
+ BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
+
+ Head->removeSuccessor(CmpBB);
+ CmpBB->removeSuccessor(Tail);
+
+ // If Head and CmpBB had successor probabilties, udpate the probabilities to
+ // reflect the ccmp-conversion.
+ if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
+
+ // Head is allowed two successors. We've removed CmpBB, so the remaining
+ // successor is Tail. We need to increase the successor probability for
+ // Tail to account for the CmpBB path we removed.
+ //
+ // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
+ assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
+ BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
+ Head->setSuccProbability(Head->succ_begin(),
+ Head2Tail + Head2CmpBB * CmpBB2Tail);
+
+ // We will transfer successors of CmpBB to Head in a moment without
+ // normalizing the successor probabilities. Set the successor probabilites
+ // before doing so.
+ //
+ // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
+ for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
+ BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
+ CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
+ }
+ }
+
Head->transferSuccessorsAndUpdatePHIs(CmpBB);
DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
TII->removeBranch(*Head);
@@ -717,6 +753,7 @@ int SSACCmpConv::expectedCodeSizeDelta() const {
namespace {
class AArch64ConditionalCompares : public MachineFunctionPass {
+ const MachineBranchProbabilityInfo *MBPI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MCSchedModel SchedModel;
@@ -753,6 +790,7 @@ char AArch64ConditionalCompares::ID = 0;
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
"AArch64 CCMP Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
@@ -763,6 +801,7 @@ FunctionPass *llvm::createAArch64ConditionalCompares() {
}
void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
@@ -892,12 +931,13 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
MinSize = MF.getFunction()->optForMinSize();
bool Changed = false;
- CmpConv.runOnMachineFunction(MF);
+ CmpConv.runOnMachineFunction(MF, MBPI);
// Visit blocks in dominator tree pre-order. The pre-order enables multiple
// cmp-conversions from the same head block.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2965106fd270..aaf32a499bc3 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7561,8 +7561,9 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
- SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
-
+ SubVec = Builder.CreateIntToPtr(
+ SubVec, VectorType::get(SVI->getType()->getVectorElementType(),
+ VecTy->getVectorNumElements()));
SubVecs[SVI].push_back(SubVec);
}
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index ad24612239fa..6cb723d187af 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -735,7 +735,7 @@ def : ShiftAlias<"rorv", RORVWr, GPR32>;
def : ShiftAlias<"rorv", RORVXr, GPR64>;
// Multiply-add
-let AddedComplexity = 7 in {
+let AddedComplexity = 5 in {
defm MADD : MulAccum<0, "madd", add>;
defm MSUB : MulAccum<1, "msub", sub>;
@@ -752,7 +752,7 @@ def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
(MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
(MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
-} // AddedComplexity = 7
+} // AddedComplexity = 5
let AddedComplexity = 5 in {
def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 9bfd570e9a82..07ce0e863c5e 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -947,7 +947,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
if (DstRB.getID() != SrcRB.getID()) {
- DEBUG(dbgs() << "G_TRUNC input/output on different banks\n");
+ DEBUG(dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
return false;
}
@@ -964,16 +964,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
- DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
+ DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
return false;
}
if (DstRC == SrcRC) {
// Nothing to be done
+ } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
+ SrcTy == LLT::scalar(64)) {
+ llvm_unreachable("TableGen can import this case");
+ return false;
} else if (DstRC == &AArch64::GPR32RegClass &&
SrcRC == &AArch64::GPR64RegClass) {
I.getOperand(1).setSubReg(AArch64::sub_32);
} else {
+ DEBUG(dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
return false;
}
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 01196817f311..4b568f3fba2b 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -39,6 +39,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
const LLT v4s32 = LLT::vector(4, 32);
const LLT v2s64 = LLT::vector(2, 64);
+ for (auto Ty : {p0, s1, s8, s16, s32, s64})
+ setAction({G_IMPLICIT_DEF, Ty}, Legal);
+
for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) {
// These operations naturally get the right answer when used on
// GPR32, even if the actual type is narrower.
@@ -99,6 +102,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
// G_INSERT (It seems entirely reasonable that inputs shouldn't overlap).
}
+ for (auto Ty : {s1, s8, s16, s32, s64, p0})
+ setAction({G_EXTRACT, Ty}, Legal);
+
+ for (auto Ty : {s32, s64})
+ setAction({G_EXTRACT, 1, Ty}, Legal);
+
for (unsigned MemOp : {G_LOAD, G_STORE}) {
for (auto Ty : {s8, s16, s32, s64, p0, v2s32})
setAction({MemOp, Ty}, Legal);
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index 45083df7ab45..f82b9dbc2c9f 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -151,13 +151,24 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
return MCOperand::createExpr(Expr);
}
+MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+ const MCExpr *Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+ return MCOperand::createExpr(Expr);
+}
+
MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
MCSymbol *Sym) const {
if (Printer.TM.getTargetTriple().isOSDarwin())
return lowerSymbolOperandDarwin(MO, Sym);
+ if (Printer.TM.getTargetTriple().isOSBinFormatCOFF())
+ return lowerSymbolOperandCOFF(MO, Sym);
- assert(Printer.TM.getTargetTriple().isOSBinFormatELF() &&
- "Expect Darwin or ELF target");
+ assert(Printer.TM.getTargetTriple().isOSBinFormatELF() && "Invalid target");
return lowerSymbolOperandELF(MO, Sym);
}
diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h
index 1e29b80c2d62..aa30fe1fa707 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/lib/Target/AArch64/AArch64MCInstLower.h
@@ -42,6 +42,8 @@ public:
MCSymbol *Sym) const;
MCOperand lowerSymbolOperandELF(const MachineOperand &MO,
MCSymbol *Sym) const;
+ MCOperand lowerSymbolOperandCOFF(const MachineOperand &MO,
+ MCSymbol *Sym) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index baf15ac540cf..fab92e139dd0 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -94,7 +94,7 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
if (TT.isOSDarwin())
return CSR_AArch64_TLS_Darwin_RegMask;
- assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS");
+ assert(TT.isOSBinFormatELF() && "Invalid target");
return CSR_AArch64_TLS_ELF_RegMask;
}
diff --git a/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index 3654eeca530a..10df50bcf156 100644
--- a/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -1,4 +1,4 @@
-//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 Scheduling ---*- tablegen -*-=//
+//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -79,75 +79,207 @@ def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>;
// 60 entry unified scheduler.
def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2,
- THX2T99P3, THX2T99P4, THX2T99P5]> {
- let BufferSize=60;
+ THX2T99P3, THX2T99P4, THX2T99P5]> {
+ let BufferSize = 60;
}
// Define commonly used write types for InstRW specializations.
// All definitions follow the format: THX2T99Write_<NumCycles>Cyc_<Resources>.
// 3 cycles on I1.
-def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 3; }
+def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+// 1 cycles on I2.
+def THX2T99Write_1Cyc_I2 : SchedWriteRes<[THX2T99I2]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
// 4 cycles on I1.
-def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 4; }
+def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// 23 cycles on I1.
+def THX2T99Write_23Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 23;
+ let ResourceCycles = [13, 23];
+ let NumMicroOps = 4;
+}
+
+// 39 cycles on I1.
+def THX2T99Write_39Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 39;
+ let ResourceCycles = [13, 39];
+ let NumMicroOps = 4;
+}
// 1 cycle on I0, I1, or I2.
-def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { let Latency = 1; }
+def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 2 cycles on I0, I1, or I2.
+def THX2T99Write_2Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on I0, I1, or I2.
+def THX2T99Write_4Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+// 5 cycles on I0, I1, or I2.
+def THX2T99Write_5Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
// 5 cycles on F1.
-def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 5; }
+def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
// 7 cycles on F1.
-def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 7; }
+def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
// 4 cycles on F0 or F1.
-def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 4; }
+def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
// 5 cycles on F0 or F1.
-def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 5; }
+def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
// 6 cycles on F0 or F1.
-def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 6; }
+def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
// 7 cycles on F0 or F1.
-def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 7; }
+def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
// 8 cycles on F0 or F1.
-def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 8; }
+def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+// 10 cycles on F0 or F1.
+def THX2T99Write_10Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
// 16 cycles on F0 or F1.
def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
+ let NumMicroOps = 3;
let ResourceCycles = [8];
}
// 23 cycles on F0 or F1.
def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 23;
+ let NumMicroOps = 3;
let ResourceCycles = [11];
}
// 1 cycles on LS0 or LS1.
-def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 1; }
+def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 0;
+}
+
+// 1 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_1Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+// 1 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_1Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 0;
+ let NumMicroOps = 3;
+}
+
+// 2 cycles on LS0 or LS1.
+def THX2T99Write_2Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
// 4 cycles on LS0 or LS1.
-def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 4; }
+def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
// 5 cycles on LS0 or LS1.
-def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 5; }
+def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
// 6 cycles on LS0 or LS1.
-def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 6; }
+def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+// 4 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_4Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+// 4 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_4Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
// 5 cycles on LS0 or LS1 and I0, I1, or I2.
def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
let Latency = 5;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
-def THX2T99Write_6Cyc_LS01_I012_I012 :
+def THX2T99Write_5Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// 6 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_6Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+// 6 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_6Cyc_LS01_I012_I012 :
SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
let Latency = 6;
let NumMicroOps = 3;
@@ -162,25 +294,25 @@ def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
// 5 cycles on LS0 or LS1 and F0 or F1.
def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 5;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// 6 cycles on LS0 or LS1 and F0 or F1.
def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 6;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// 7 cycles on LS0 or LS1 and F0 or F1.
def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 7;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// 8 cycles on LS0 or LS1 and F0 or F1.
def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 8;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// Define commonly used read types.
@@ -195,10 +327,8 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
-
}
-
//===----------------------------------------------------------------------===//
// 3. Instruction Tables.
@@ -211,88 +341,217 @@ let SchedModel = ThunderX2T99Model in {
// Branch, immed
// Branch and link, immed
// Compare and branch
-def : WriteRes<WriteBr, [THX2T99I2]> { let Latency = 1; }
+def : WriteRes<WriteBr, [THX2T99I2]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Branch, register
+// Branch and link, register != LR
+// Branch and link, register = LR
+def : WriteRes<WriteBrReg, [THX2T99I2]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
def : WriteRes<WriteSys, []> { let Latency = 1; }
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
def : WriteRes<WriteHint, []> { let Latency = 1; }
-def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteAtomic, []> {
+ let Unsupported = 1;
+ let NumMicroOps = 2;
+}
-// Branch, register
-// Branch and link, register != LR
-// Branch and link, register = LR
-def : WriteRes<WriteBrReg, [THX2T99I2]> { let Latency = 1; }
+//---
+// Branch
+//---
+def : InstRW<[THX2T99Write_1Cyc_I2], (instrs B, BL, BR, BLR)>;
+def : InstRW<[THX2T99Write_1Cyc_I2], (instrs RET)>;
+def : InstRW<[THX2T99Write_1Cyc_I2], (instregex "^B.*")>;
+def : InstRW<[THX2T99Write_1Cyc_I2],
+ (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
//---
// 3.2 Arithmetic and Logical Instructions
// 3.3 Move and Shift Instructions
//---
+
// ALU, basic
// Conditional compare
// Conditional select
// Address generation
-def : WriteRes<WriteI, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteI, [THX2T99I012]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 3];
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[WriteI],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)",
+ "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)",
+ "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)",
+ "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)",
+ "CSNEG?(W|X)r(i|r|s|x)")>;
+
def : InstRW<[WriteI], (instrs COPY)>;
// ALU, extend and/or shift
def : WriteRes<WriteISReg, [THX2T99I012]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ResourceCycles = [2, 3];
+ let NumMicroOps = 2;
}
+def : InstRW<[WriteISReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)",
+ "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)",
+ "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)",
+ "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)",
+ "CSNEG?(W|X)r(i|r|s|x)")>;
+
def : WriteRes<WriteIEReg, [THX2T99I012]> {
- let Latency = 2;
- let ResourceCycles = [2];
+ let Latency = 1;
+ let ResourceCycles = [1, 3];
+ let NumMicroOps = 2;
}
+def : InstRW<[WriteIEReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)",
+ "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)",
+ "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)",
+ "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)",
+ "CSNEG?(W|X)r(i|r|s|x)")>;
+
// Move immed
-def : WriteRes<WriteImm, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteImm, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[THX2T99Write_1Cyc_I012],
+ (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_I012],
+ (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
// Variable shift
-def : WriteRes<WriteIS, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteIS, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
//---
// 3.4 Divide and Multiply Instructions
//---
// Divide, W-form
-// Latency range of 13-23. Take the average.
+// Latency range of 13-23/13-39.
def : WriteRes<WriteID32, [THX2T99I1]> {
- let Latency = 18;
- let ResourceCycles = [18];
+ let Latency = 39;
+ let ResourceCycles = [13, 39];
+ let NumMicroOps = 4;
}
// Divide, X-form
-// Latency range of 13-39. Take the average.
def : WriteRes<WriteID64, [THX2T99I1]> {
- let Latency = 26;
- let ResourceCycles = [26];
+ let Latency = 23;
+ let ResourceCycles = [13, 23];
+ let NumMicroOps = 4;
}
// Multiply accumulate, W-form
-def : WriteRes<WriteIM32, [THX2T99I012]> { let Latency = 5; }
+def : WriteRes<WriteIM32, [THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
// Multiply accumulate, X-form
-def : WriteRes<WriteIM64, [THX2T99I012]> { let Latency = 5; }
+def : WriteRes<WriteIM64, [THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX2T99Write_5Cyc_I012],
+// (instrs MADDWrrr, MSUBWrrr)>;
+def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
+def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
+def : InstRW<[THX2T99Write_5Cyc_I012],
+ (instregex "(S|U)(MADDL|MSUBL)rrr")>;
+
+def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
+def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
// Bitfield extract, two reg
-def : WriteRes<WriteExtr, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteExtr, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Multiply high
+def : InstRW<[THX2T99Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>;
+
+// Miscellaneous Data-Processing Instructions
+// Bitfield extract
+def : InstRW<[THX2T99Write_1Cyc_I012], (instrs EXTRWrri, EXTRXrri)>;
+
+// Bitifield move - basic
+def : InstRW<[THX2T99Write_1Cyc_I012],
+ (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
-// Bitfield move, basic
// Bitfield move, insert
-// NOTE: Handled by WriteIS.
+def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "^BFM")>;
+def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "(S|U)?BFM.*")>;
// Count leading
def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$",
- "^CLZ(W|X)r$")>;
+ "^CLZ(W|X)r$")>;
+
+// Reverse bits
+def : InstRW<[THX2T99Write_1Cyc_I012], (instrs RBITWr, RBITXr)>;
+
+// Cryptography Extensions
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES[DE]")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AESI?MC")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1SU0")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1(H|SU1)")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1[CMP]")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256SU0")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256(H|H2|SU1)")>;
+
+// CRC Instructions
+// def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>;
+def : InstRW<[THX2T99Write_4Cyc_I1],
+ (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>;
+
+def : InstRW<[THX2T99Write_4Cyc_I1],
+ (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>;
// Reverse bits/bytes
// NOTE: Handled by WriteI.
//---
-// 3.6 Load Instructions
+// 3.6 Load Instructions
// 3.10 FP Load Instructions
//---
@@ -300,13 +559,29 @@ def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$",
// Load register, unscaled immed
// Load register, immed unprivileged
// Load register, unsigned immed
-def : WriteRes<WriteLD, [THX2T99LS01]> { let Latency = 4; }
+def : WriteRes<WriteLD, [THX2T99LS01]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
// Load register, immed post-index
// NOTE: Handled by WriteLD, WriteI.
// Load register, immed pre-index
// NOTE: Handled by WriteLD, WriteAdr.
-def : WriteRes<WriteAdr, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteAdr, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Load pair, immed offset, normal
+// Load pair, immed offset, signed words, base != SP
+// Load pair, immed offset signed words, base = SP
+// LDP only breaks into *one* LS micro-op. Thus
+// the resources are handled by WriteLD.
+def : WriteRes<WriteLDHi, []> {
+ let Latency = 5;
+ let NumMicroOps = 5;
+}
// Load register offset, basic
// Load register, register offset, scale by 4/8
@@ -324,23 +599,229 @@ def THX2T99ReadAdrBase : SchedReadVariant<[
SchedVar<NoSchedPred, [ReadDefault]>]>;
def : SchedAlias<ReadAdrBase, THX2T99ReadAdrBase>;
-// Load pair, immed offset, normal
-// Load pair, immed offset, signed words, base != SP
-// Load pair, immed offset signed words, base = SP
-// LDP only breaks into *one* LS micro-op. Thus
-// the resources are handling by WriteLD.
-def : WriteRes<WriteLDHi, []> {
- let Latency = 5;
-}
-
// Load pair, immed pre-index, normal
// Load pair, immed pre-index, signed words
// Load pair, immed post-index, normal
// Load pair, immed post-index, signed words
// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPDi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPQi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPSi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPWi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPXi)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPDi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPQi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSWi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPWi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPXi)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRBui)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDui)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRHui)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRQui)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRSui)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDl)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRQl)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRWl)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRXl)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRBi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRHi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRXi)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSWi)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRXpre)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpost)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpost)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPXpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRBpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRHpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRXpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPXpre)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRXpre)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPXpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRBpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRHpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRXpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroW)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroX)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRBroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRBroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRDroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRQroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHXroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRXroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRBroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRDroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRQroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHXroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRXroX)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBBi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURDi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHHi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURQi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSWi)>;
+
+//---
+// Prefetch
+//---
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMl)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFUMi)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMui)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroW)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroX)>;
+
//--
-// 3.7 Store Instructions
+// 3.7 Store Instructions
// 3.11 FP Store Instructions
//--
@@ -382,6 +863,195 @@ def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> {
// Store pair, immed pre-index, X-form
// NOTE: Handled by WriteAdr, WriteSTP.
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBBi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURDi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHHi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURQi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURSi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURWi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURXi)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRBi)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRHi)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRWi)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRXi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPDi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPQi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPXi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPWi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPDi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPQi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPXi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPWi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRBui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRBui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRDui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRDui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRHui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRHui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRQui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRQui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRXui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRXui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRWui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRWui)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+
//---
// 3.8 FP Data Processing Instructions
//---
@@ -389,28 +1059,95 @@ def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> {
// FP absolute value
// FP min/max
// FP negate
-def : WriteRes<WriteF, [THX2T99F01]> { let Latency = 5; }
+def : WriteRes<WriteF, [THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
// FP arithmetic
def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>;
// FP compare
-def : WriteRes<WriteFCmp, [THX2T99F01]> { let Latency = 5; }
+def : WriteRes<WriteFCmp, [THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
-// FP divide, S-form
-// FP square root, S-form
-def : WriteRes<WriteFDiv, [THX2T99F01]> {
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFDiv, [THX2T99F01]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THX2T99XWriteFDiv : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFDivSP : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
let ResourceCycles = [8];
+ let NumMicroOps = 4;
}
+def THX2T99XWriteFDivDP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 23;
+ let ResourceCycles = [12];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFSqrtSP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFSqrtDP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 23;
+ let ResourceCycles = [12];
+ let NumMicroOps = 4;
+}
+
+// FP divide, S-form
+// FP square root, S-form
+def : InstRW<[THX2T99XWriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[THX2T99XWriteFSqrtSP], (instrs FSQRTSr)>;
+def : InstRW<[THX2T99XWriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[THX2T99XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSrr")>;
+
// FP divide, D-form
// FP square root, D-form
-def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>;
+def : InstRW<[THX2T99XWriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[THX2T99XWriteFSqrtDP], (instrs FSQRTDr)>;
+def : InstRW<[THX2T99XWriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[THX2T99XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDrr")>;
// FP multiply
// FP multiply accumulate
-def : WriteRes<WriteFMul, [THX2T99F01]> { let Latency = 6; }
+def : WriteRes<WriteFMul, [THX2T99F01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def THX2T99XWriteFMul : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def THX2T99XWriteFMulAcc : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def : InstRW<[THX2T99XWriteFMul], (instregex "^FMUL", "^FNMUL")>;
+def : InstRW<[THX2T99XWriteFMulAcc],
+ (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>;
// FP round to integral
def : InstRW<[THX2T99Write_7Cyc_F01],
@@ -426,15 +1163,25 @@ def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>;
// FP convert, from vec to vec reg
// FP convert, from gen to vec reg
// FP convert, from vec to gen reg
-def : WriteRes<WriteFCvt, [THX2T99F01]> { let Latency = 7; }
+def : WriteRes<WriteFCvt, [THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
// FP move, immed
// FP move, register
-def : WriteRes<WriteFImm, [THX2T99F01]> { let Latency = 4; }
+def : WriteRes<WriteFImm, [THX2T99F01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
// FP transfer, from gen to vec reg
// FP transfer, from vec to gen reg
-def : WriteRes<WriteFCopy, [THX2T99F01]> { let Latency = 4; }
+def : WriteRes<WriteFCopy, [THX2T99F01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
//---
@@ -470,19 +1217,135 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [THX2T99F01]> { let Latency = 7; }
+def : WriteRes<WriteV, [THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4, 23];
+}
// ASIMD arith, reduce, 4H/4S
// ASIMD arith, reduce, 8B/8H
// ASIMD arith, reduce, 16B
-def : InstRW<[THX2T99Write_5Cyc_F01],
- (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
// ASIMD logical (MOV, MVN, ORN, ORR)
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^ANDv", "^BICv", "^EORv", "^MOVv", "^MVNv",
+ "^ORRv", "^ORNv", "^NOTv")>;
+// ASIMD arith, reduce
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
// ASIMD polynomial (8x8) multiply long
-def : InstRW<[THX2T99Write_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD absolute diff accum, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+// ASIMD absolute diff accum, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+// ASIMD absolute diff accum long
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]ABAL")>;
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B/16H
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^[SU]?ADDL?Vv16i8v$")>;
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B/16H
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+// ASIMD multiply, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^(P?MUL|SQR?DMULH)" #
+ "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
+ "(_indexed)?$")>;
+// ASIMD multiply, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+// ASIMD shift accumulate
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv",
+ "SQSHRNv","SQSHRUNv", "UQRSHRNv",
+ "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
+// ASIMD shift by immed, complex
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SQSHLU")>;
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// ASIMD shift by register, complex, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU][QR]{1,2}SHL" #
+ "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD Arithmetic
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(ADD|SUB)HNv.*")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(RADD|RSUB)HNv.*")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
+ "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
+ "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADALP","^UADALP")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLPv","^UADDLPv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLV","^UADDLV")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SUQADDv","^USQADDv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv",
+ "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
+ "^SRHADD", "^SUBHNv", "^SUQADD",
+ "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^CMEQv","^CMGEv","^CMGTv",
+ "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv",
+ "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>;
//---
// 3.13 ASIMD Floating-point Instructions
@@ -493,7 +1356,8 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>;
// ASIMD FP arith, normal, D-form
// ASIMD FP arith, normal, Q-form
-def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FABDv", "^FADDv", "^FSUBv")>;
// ASIMD FP arith,pairwise, D-form
// ASIMD FP arith, pairwise, Q-form
@@ -503,8 +1367,15 @@ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>;
// ASIMD FP compare, Q-form
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>;
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
- "^FCMGTv", "^FCMLEv",
- "^FCMLTv")>;
+ "^FCMGTv", "^FCMLEv",
+ "^FCMLTv")>;
+
+// ASIMD FP round, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^FRINT[AIMNPXZ](v2f32)")>;
+// ASIMD FP round, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
// ASIMD FP convert, long
// ASIMD FP convert, narrow
@@ -512,14 +1383,26 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
// ASIMD FP convert, other, Q-form
// NOTE: Handled by WriteV.
+// ASIMD FP convert, long and narrow
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^FCVT(L|N|XN)v")>;
+// ASIMD FP convert, other, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD FP convert, other, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
+
// ASIMD FP divide, D-form, F32
def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv2f32")>;
// ASIMD FP divide, Q-form, F32
def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv4f32")>;
// ASIMD FP divide, Q-form, F64
def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "FDIVv2f64")>;
// ASIMD FP max/min, normal, D-form
// ASIMD FP max/min, normal, Q-form
@@ -540,20 +1423,24 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv",
// ASIMD FP multiply, Q-form, FZ
// ASIMD FP multiply, Q-form, no FZ
def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
// ASIMD FP multiply accumulate, Dform, FZ
// ASIMD FP multiply accumulate, Dform, no FZ
// ASIMD FP multiply accumulate, Qform, FZ
// ASIMD FP multiply accumulate, Qform, no FZ
def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
// ASIMD FP negate
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>;
-// ASIMD FP round, D-form
-// ASIMD FP round, Q-form
-// NOTE: Handled by WriteV.
-
//--
// 3.14 ASIMD Miscellaneous Instructions
//--
@@ -563,37 +1450,66 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>;
// ASIMD bitwise insert, D-form
// ASIMD bitwise insert, Q-form
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^BIFv", "^BITv", "^BSLv")>;
// ASIMD count, D-form
// ASIMD count, Q-form
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^CLSv", "^CLZv", "^CNTv")>;
// ASIMD duplicate, gen reg
// ASIMD duplicate, element
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>;
// ASIMD extract
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>;
// ASIMD extract narrow
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^XTNv")>;
+
// ASIMD extract narrow, saturating
-// NOTE: Handled by WriteV.
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
// ASIMD insert, element to element
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
+// ASIMD transfer, element to gen reg
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>;
+
// ASIMD move, integer immed
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>;
// ASIMD move, FP immed
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>;
+// ASIMD table lookup, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>;
+
+// ASIMD table lookup, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>;
+
+// ASIMD transpose
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>;
+
+// ASIMD unzip/zip
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
+
// ASIMD reciprocal estimate, D-form
// ASIMD reciprocal estimate, Q-form
-def : InstRW<[THX2T99Write_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
- "^FRSQRTEv", "^URSQRTEv")>;
+ "^FRSQRTEv", "^URSQRTEv")>;
// ASIMD reciprocal step, D-form, FZ
// ASIMD reciprocal step, D-form, no FZ
@@ -602,7 +1518,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01],
def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>;
// ASIMD reverse
-def : InstRW<[THX2T99Write_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^REV16v", "^REV32v", "^REV64v")>;
// ASIMD table lookup, D-form
@@ -610,135 +1526,135 @@ def : InstRW<[THX2T99Write_5Cyc_F01],
def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>;
// ASIMD transfer, element to word or word
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^UMOVv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>;
// ASIMD transfer, element to gen reg
-def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "(S|U)MOVv.*")>;
// ASIMD transfer gen reg to element
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
// ASIMD transpose
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v",
- "^UZP1v", "^UZP2v")>;
+ "^UZP1v", "^UZP2v")>;
// ASIMD unzip/zip
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
//--
-// 3.15 ASIMD Load Instructions
+// 3.15 ASIMD Load Instructions
//--
// ASIMD load, 1 element, multiple, 1 reg, D-form
// ASIMD load, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[THX2T99Write_4Cyc_LS01],
+def : InstRW<[THX2T99Write_4Cyc_LS01],
(instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
(instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg, D-form
// ASIMD load, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[THX2T99Write_4Cyc_LS01],
+def : InstRW<[THX2T99Write_4Cyc_LS01],
(instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
(instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg, D-form
// ASIMD load, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[THX2T99Write_5Cyc_LS01],
+def : InstRW<[THX2T99Write_5Cyc_LS01],
(instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr],
(instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, D-form
// ASIMD load, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[THX2T99Write_6Cyc_LS01],
+def : InstRW<[THX2T99Write_6Cyc_LS01],
(instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr],
(instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, one lane, B/H/S
// ASIMD load, 1 element, one lane, D
def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD1i(8|16|32|64)_POST$")>;
// ASIMD load, 1 element, all lanes, D-form, B/H/S
// ASIMD load, 1 element, all lanes, D-form, D
// ASIMD load, 1 element, all lanes, Q-form
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, multiple, D-form, B/H/S
// ASIMD load, 2 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, one lane, B/H
// ASIMD load, 2 element, one lane, S
// ASIMD load, 2 element, one lane, D
def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2i(8|16|32|64)_POST$")>;
// ASIMD load, 2 element, all lanes, D-form, B/H/S
// ASIMD load, 2 element, all lanes, D-form, D
// ASIMD load, 2 element, all lanes, Q-form
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, multiple, D-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
(instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
(instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, one lone, B/H
// ASIMD load, 3 element, one lane, S
// ASIMD load, 3 element, one lane, D
def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
(instregex "^LD3i(8|16|32|64)_POST$")>;
// ASIMD load, 3 element, all lanes, D-form, B/H/S
// ASIMD load, 3 element, all lanes, D-form, D
// ASIMD load, 3 element, all lanes, Q-form, B/H/S
// ASIMD load, 3 element, all lanes, Q-form, D
-def : InstRW<[THX2T99Write_7Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01],
(instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
(instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, multiple, D-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
(instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
(instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, one lane, B/H
// ASIMD load, 4 element, one lane, S
// ASIMD load, 4 element, one lane, D
def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
(instregex "^LD4i(8|16|32|64)_POST$")>;
// ASIMD load, 4 element, all lanes, D-form, B/H/S
// ASIMD load, 4 element, all lanes, D-form, D
// ASIMD load, 4 element, all lanes, Q-form, B/H/S
// ASIMD load, 4 element, all lanes, Q-form, D
-def : InstRW<[THX2T99Write_6Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01],
(instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
(instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
//--
@@ -747,106 +1663,83 @@ def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
// ASIMD store, 1 element, multiple, 1 reg, D-form
// ASIMD store, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[THX2T99Write_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 2 reg, D-form
// ASIMD store, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[THX2T99Write_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, D-form
// ASIMD store, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[THX2T99Write_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, D-form
// ASIMD store, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[THX2T99Write_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, one lane, B/H/S
// ASIMD store, 1 element, one lane, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST1i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST1i(8|16|32|64)_POST$")>;
// ASIMD store, 2 element, multiple, D-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 2 element, one lane, B/H/S
// ASIMD store, 2 element, one lane, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST2i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST2i(8|16|32|64)_POST$")>;
// ASIMD store, 3 element, multiple, D-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 3 element, one lane, B/H
// ASIMD store, 3 element, one lane, S
// ASIMD store, 3 element, one lane, D
def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST3i(8|16|32|64)_POST$")>;
// ASIMD store, 4 element, multiple, D-form, B/H/S
// ASIMD store, 4 element, multiple, Q-form, B/H/S
// ASIMD store, 4 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 4 element, one lane, B/H
// ASIMD store, 4 element, one lane, S
// ASIMD store, 4 element, one lane, D
def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4i(8|16|32|64)_POST$")>;
-//--
-// 3.17 Cryptography Extensions
-//--
-
-// Crypto AES ops
-def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES")>;
-
-// Crypto polynomial (64x64) multiply long
-def : InstRW<[THX2T99Write_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>;
-
-// Crypto SHA1 xor ops
-// Crypto SHA1 schedule acceleration ops
-// Crypto SHA256 schedule acceleration op (1 u-op)
-// Crypto SHA256 schedule acceleration op (2 u-ops)
-// Crypto SHA256 hash acceleration ops
-def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA")>;
-
-//--
-// 3.18 CRC
-//--
-
-// CRC checksum ops
-def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32")>;
-
} // SchedModel = ThunderX2T99Model
+
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 6660f0babb8a..1252f9403812 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -167,6 +167,8 @@ extern "C" void LLVMInitializeAArch64Target() {
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO())
return llvm::make_unique<AArch64_MachoTargetObjectFile>();
+ if (TT.isOSBinFormatCOFF())
+ return llvm::make_unique<AArch64_COFFTargetObjectFile>();
return llvm::make_unique<AArch64_ELFTargetObjectFile>();
}
@@ -179,6 +181,8 @@ static std::string computeDataLayout(const Triple &TT,
return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128";
if (TT.isOSBinFormatMachO())
return "e-m:o-i64:64-i128:128-n32:64-S128";
+ if (TT.isOSBinFormatCOFF())
+ return "e-m:w-i64:64-i128:128-n32:64-S128";
if (LittleEndian)
return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 2c75a3258c1c..fefa7e26b79f 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -36,6 +36,7 @@ public:
~AArch64TargetMachine() override;
const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
+ const AArch64Subtarget *getSubtargetImpl() const = delete;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index 47e3bce43f6e..9077eb7902fd 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -45,6 +45,9 @@ public:
const TargetMachine &TM) const override;
};
+/// This implementation is used for AArch64 COFF targets.
+class AArch64_COFFTargetObjectFile : public TargetLoweringObjectFileCOFF {};
+
} // end namespace llvm
#endif
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a4328682b93c..a76f080530bb 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -20,6 +20,23 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64tti"
+static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
+ cl::init(true), cl::Hidden);
+
+bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ // Inline a callee if its target-features are a subset of the callers
+ // target-features.
+ return (CallerBits & CalleeBits) == CalleeBits;
+}
+
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
@@ -631,10 +648,62 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
return ST->getMaxInterleaveFactor();
}
-void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
+// For Falkor, we want to avoid having too many strided loads in a loop since
+// that can exhaust the HW prefetcher resources. We adjust the unroller
+// MaxCount preference below to attempt to ensure unrolling doesn't create too
+// many strided loads.
+static void
+getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TargetTransformInfo::UnrollingPreferences &UP) {
+ enum { MaxStridedLoads = 7 };
+ auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
+ int StridedLoads = 0;
+ // FIXME? We could make this more precise by looking at the CFG and
+ // e.g. not counting loads in each side of an if-then-else diamond.
+ for (const auto BB : L->blocks()) {
+ for (auto &I : *BB) {
+ LoadInst *LMemI = dyn_cast<LoadInst>(&I);
+ if (!LMemI)
+ continue;
+
+ Value *PtrValue = LMemI->getPointerOperand();
+ if (L->isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE.getSCEV(PtrValue);
+ const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
+ continue;
+
+ // FIXME? We could take pairing of unrolled load copies into account
+ // by looking at the AddRec, but we would probably have to limit this
+ // to loops with no stores or other memory optimization barriers.
+ ++StridedLoads;
+ // We've seen enough strided loads that seeing more won't make a
+ // difference.
+ if (StridedLoads > MaxStridedLoads / 2)
+ return StridedLoads;
+ }
+ }
+ return StridedLoads;
+ };
+
+ int StridedLoads = countStridedLoads(L, SE);
+ DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
+ << " strided loads\n");
+ // Pick the largest power of 2 unroll count that won't result in too many
+ // strided loads.
+ if (StridedLoads) {
+ UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
+ DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to " << UP.MaxCount
+ << '\n');
+ }
+}
+
+void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// Enable partial unrolling and runtime unrolling.
- BaseT::getUnrollingPreferences(L, UP);
+ BaseT::getUnrollingPreferences(L, SE, UP);
// For inner loop, it is more likely to be a hot one, and the runtime check
// can be promoted out from LICM pass, so the overhead is less, let's try
@@ -644,6 +713,10 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
// Disable partial & runtime unrolling on -Os.
UP.PartialOptSizeThreshold = 0;
+
+ if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
+ EnableFalkorHWPFUnrollFix)
+ getFalkorUnrollingPreferences(L, SE, UP);
}
Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 290a1ca1f24b..31c037354925 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -51,6 +51,9 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const;
+
/// \name Scalar TTI Implementations
/// @{
@@ -119,7 +122,8 @@ public:
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 3d075018904c..475f91016840 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -541,14 +541,13 @@ public:
return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian, IsILP32);
}
- void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) override;
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
};
-void ELFAArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCFixup &Fixup,
- const MCValue &Target,
- bool &IsResolved) {
+bool ELFAArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
// The ADRP instruction adds some multiple of 0x1000 to the current PC &
// ~0xfff. This means that the required offset to reach a symbol can vary by
// up to one step depending on where the ADRP is in memory. For example:
@@ -562,11 +561,24 @@ void ELFAArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
// section isn't 0x1000-aligned, we therefore need to delegate this decision
// to the linker -- a relocation!
if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
- IsResolved = false;
+ return true;
+ return false;
}
}
+namespace {
+class COFFAArch64AsmBackend : public AArch64AsmBackend {
+public:
+ COFFAArch64AsmBackend(const Target &T, const Triple &TheTriple)
+ : AArch64AsmBackend(T, /*IsLittleEndian*/true) {}
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ return createAArch64WinCOFFObjectWriter(OS);
+ }
+};
+}
+
MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const Triple &TheTriple,
@@ -575,7 +587,11 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
if (TheTriple.isOSBinFormatMachO())
return new DarwinAArch64AsmBackend(T, MRI);
- assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
+ if (TheTriple.isOSBinFormatCOFF())
+ return new COFFAArch64AsmBackend(T, TheTriple);
+
+ assert(TheTriple.isOSBinFormatELF() && "Invalid target");
+
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
bool IsILP32 = Options.getABIName() == "ilp32";
return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/true, IsILP32);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index f7dda92fb551..89c3e5b4c76e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -49,10 +49,11 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
/*HasRelocationAddend*/ true),
IsILP32(IsILP32) {}
-#define R_CLS(rtype) \
- IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype
-#define BAD_ILP32_MOV(lp64rtype) "ILP32 absolute MOV relocation not "\
- "supported (LP64 eqv: " #lp64rtype ")"
+#define R_CLS(rtype) \
+ IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype
+#define BAD_ILP32_MOV(lp64rtype) \
+ "ILP32 absolute MOV relocation not " \
+ "supported (LP64 eqv: " #lp64rtype ")"
// assumes IsILP32 is true
static bool isNonILP32reloc(const MCFixup &Fixup,
@@ -60,44 +61,45 @@ static bool isNonILP32reloc(const MCFixup &Fixup,
MCContext &Ctx) {
if ((unsigned)Fixup.getKind() != AArch64::fixup_aarch64_movw)
return false;
- switch(RefKind) {
- case AArch64MCExpr::VK_ABS_G3:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3));
- return true;
- case AArch64MCExpr::VK_ABS_G2:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2));
- return true;
- case AArch64MCExpr::VK_ABS_G2_S:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2));
- return true;
- case AArch64MCExpr::VK_ABS_G2_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC));
- return true;
- case AArch64MCExpr::VK_ABS_G1_S:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1));
- return true;
- case AArch64MCExpr::VK_ABS_G1_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC));
- return true;
- case AArch64MCExpr::VK_DTPREL_G2:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2));
- return true;
- case AArch64MCExpr::VK_DTPREL_G1_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC));
- return true;
- case AArch64MCExpr::VK_TPREL_G2:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2));
- return true;
- case AArch64MCExpr::VK_TPREL_G1_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC));
- return true;
- case AArch64MCExpr::VK_GOTTPREL_G1:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1));
- return true;
- case AArch64MCExpr::VK_GOTTPREL_G0_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC));
- return true;
- default: return false;
+ switch (RefKind) {
+ case AArch64MCExpr::VK_ABS_G3:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3));
+ return true;
+ case AArch64MCExpr::VK_ABS_G2:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2));
+ return true;
+ case AArch64MCExpr::VK_ABS_G2_S:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2));
+ return true;
+ case AArch64MCExpr::VK_ABS_G2_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC));
+ return true;
+ case AArch64MCExpr::VK_ABS_G1_S:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1));
+ return true;
+ case AArch64MCExpr::VK_ABS_G1_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC));
+ return true;
+ case AArch64MCExpr::VK_DTPREL_G2:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2));
+ return true;
+ case AArch64MCExpr::VK_DTPREL_G1_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC));
+ return true;
+ case AArch64MCExpr::VK_TPREL_G2:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2));
+ return true;
+ case AArch64MCExpr::VK_TPREL_G1_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC));
+ return true;
+ case AArch64MCExpr::VK_GOTTPREL_G1:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1));
+ return true;
+ case AArch64MCExpr::VK_GOTTPREL_G0_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC));
+ return true;
+ default:
+ return false;
}
return false;
}
@@ -130,7 +132,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return R_CLS(PREL32);
case FK_Data_8:
if (IsILP32) {
- Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte PC relative data "
+ Ctx.reportError(Fixup.getLoc(),
+ "ILP32 8 byte PC relative data "
"relocation not supported (LP64 eqv: PREL64)");
return ELF::R_AARCH64_NONE;
} else
@@ -178,7 +181,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
}
} else {
if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx))
- return ELF::R_AARCH64_NONE;
+ return ELF::R_AARCH64_NONE;
switch ((unsigned)Fixup.getKind()) {
case FK_Data_1:
Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
@@ -189,8 +192,9 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return R_CLS(ABS32);
case FK_Data_8:
if (IsILP32) {
- Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte absolute data "
- "relocation not supported (LP64 eqv: ABS64)");
+ Ctx.reportError(Fixup.getLoc(),
+ "ILP32 8 byte absolute data "
+ "relocation not supported (LP64 eqv: ABS64)");
return ELF::R_AARCH64_NONE;
} else
return ELF::R_AARCH64_ABS64;
@@ -262,7 +266,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
} else {
Ctx.reportError(Fixup.getLoc(),
"LP64 4 byte unchecked GOT load/store relocation "
- "not supported (ILP32 eqv: LD32_GOT_LO12_NC");
+ "not supported (ILP32 eqv: LD32_GOT_LO12_NC");
return ELF::R_AARCH64_NONE;
}
}
@@ -270,12 +274,12 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
if (IsILP32) {
Ctx.reportError(Fixup.getLoc(),
"ILP32 4 byte checked GOT load/store relocation "
- "not supported (unchecked eqv: LD32_GOT_LO12_NC)");
+ "not supported (unchecked eqv: LD32_GOT_LO12_NC)");
} else {
Ctx.reportError(Fixup.getLoc(),
"LP64 4 byte checked GOT load/store relocation "
- "not supported (unchecked/ILP32 eqv: "
- "LD32_GOT_LO12_NC)");
+ "not supported (unchecked/ILP32 eqv: "
+ "LD32_GOT_LO12_NC)");
}
return ELF::R_AARCH64_NONE;
}
@@ -283,7 +287,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
if (IsILP32) {
return ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC;
} else {
- Ctx.reportError(Fixup.getLoc(), "LP64 32-bit load/store "
+ Ctx.reportError(Fixup.getLoc(),
+ "LP64 32-bit load/store "
"relocation not supported (ILP32 eqv: "
"TLSIE_LD32_GOTTPREL_LO12_NC)");
return ELF::R_AARCH64_NONE;
@@ -295,14 +300,14 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
} else {
Ctx.reportError(Fixup.getLoc(),
"LP64 4 byte TLSDESC load/store relocation "
- "not supported (ILP32 eqv: TLSDESC_LD64_LO12)");
+ "not supported (ILP32 eqv: TLSDESC_LD64_LO12)");
return ELF::R_AARCH64_NONE;
}
}
Ctx.reportError(Fixup.getLoc(),
"invalid fixup for 32-bit load/store instruction "
- "fixup_aarch64_ldst_imm12_scale4");
+ "fixup_aarch64_ldst_imm12_scale4");
return ELF::R_AARCH64_NONE;
case AArch64::fixup_aarch64_ldst_imm12_scale8:
if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
@@ -312,8 +317,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AARCH64_LD64_GOT_LO12_NC;
} else {
Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
- "relocation not supported (LP64 eqv: "
- "LD64_GOT_LO12_NC)");
+ "relocation not supported (LP64 eqv: "
+ "LD64_GOT_LO12_NC)");
return ELF::R_AARCH64_NONE;
}
}
@@ -330,8 +335,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
} else {
Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
- "relocation not supported (LP64 eqv: "
- "TLSIE_LD64_GOTTPREL_LO12_NC)");
+ "relocation not supported (LP64 eqv: "
+ "TLSIE_LD64_GOTTPREL_LO12_NC)");
return ELF::R_AARCH64_NONE;
}
}
@@ -340,8 +345,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AARCH64_TLSDESC_LD64_LO12;
} else {
Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
- "relocation not supported (LP64 eqv: "
- "TLSDESC_LD64_LO12)");
+ "relocation not supported (LP64 eqv: "
+ "TLSDESC_LD64_LO12)");
return ELF::R_AARCH64_NONE;
}
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 031aa8b81e35..a0de3c39562b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetStreamer.h"
+#include "AArch64WinCOFFStreamer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
@@ -30,6 +31,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
@@ -210,6 +212,8 @@ createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
const Triple &TT = STI.getTargetTriple();
if (TT.isOSBinFormatELF())
return new AArch64TargetELFStreamer(S);
+ if (TT.isOSBinFormatCOFF())
+ return new AArch64TargetWinCOFFStreamer(S);
return nullptr;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
index 0f5b765c7697..4293dcba955e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -16,53 +16,47 @@ namespace llvm {
namespace AArch64 {
enum Fixups {
- // fixup_aarch64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into
- // an ADR instruction.
+ // A 21-bit pc-relative immediate inserted into an ADR instruction.
fixup_aarch64_pcrel_adr_imm21 = FirstTargetFixupKind,
- // fixup_aarch64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into
- // an ADRP instruction.
+ // A 21-bit pc-relative immediate inserted into an ADRP instruction.
fixup_aarch64_pcrel_adrp_imm21,
- // fixup_aarch64_imm12 - 12-bit fixup for add/sub instructions.
- // No alignment adjustment. All value bits are encoded.
+ // 12-bit fixup for add/sub instructions. No alignment adjustment. All value
+ // bits are encoded.
fixup_aarch64_add_imm12,
- // fixup_aarch64_ldst_imm12_* - unsigned 12-bit fixups for load and
- // store instructions.
+ // unsigned 12-bit fixups for load and store instructions.
fixup_aarch64_ldst_imm12_scale1,
fixup_aarch64_ldst_imm12_scale2,
fixup_aarch64_ldst_imm12_scale4,
fixup_aarch64_ldst_imm12_scale8,
fixup_aarch64_ldst_imm12_scale16,
- // fixup_aarch64_ldr_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative
- // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is used by
- // pc-relative loads and generates relocations directly when necessary.
+ // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as
+ // fixup_aarch64_pcrel_adrhi, except this is used by pc-relative loads and
+ // generates relocations directly when necessary.
fixup_aarch64_ldr_pcrel_imm19,
// FIXME: comment
fixup_aarch64_movw,
- // fixup_aarch64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative
- // immediate.
+ // The high 14 bits of a 21-bit pc-relative immediate.
fixup_aarch64_pcrel_branch14,
- // fixup_aarch64_pcrel_branch19 - The high 19 bits of a 21-bit pc-relative
- // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is use by
- // b.cc and generates relocations directly when necessary.
+ // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as
+ // fixup_aarch64_pcrel_adrhi, except this is use by b.cc and generates
+ // relocations directly when necessary.
fixup_aarch64_pcrel_branch19,
- // fixup_aarch64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative
- // immediate.
+ // The high 26 bits of a 28-bit pc-relative immediate.
fixup_aarch64_pcrel_branch26,
- // fixup_aarch64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative
- // immediate. Distinguished from branch26 only on ELF.
+ // The high 26 bits of a 28-bit pc-relative immediate. Distinguished from
+ // branch26 only on ELF.
fixup_aarch64_pcrel_call26,
- // fixup_aarch64_tlsdesc_call - zero-space placeholder for the ELF
- // R_AARCH64_TLSDESC_CALL relocation.
+ // zero-space placeholder for the ELF R_AARCH64_TLSDESC_CALL relocation.
fixup_aarch64_tlsdesc_call,
// Marker
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 1b28df963b40..fc808ee0cdd6 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -100,3 +100,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
HasIdentDirective = true;
}
+
+AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() {
+ CommentString = ";";
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 253cd30f26ee..2d7107a37244 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
+#include "llvm/MC/MCAsmInfoCOFF.h"
#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
@@ -33,6 +34,10 @@ struct AArch64MCAsmInfoELF : public MCAsmInfoELF {
explicit AArch64MCAsmInfoELF(const Triple &T);
};
+struct AArch64MCAsmInfoCOFF : public MCAsmInfoCOFF {
+ explicit AArch64MCAsmInfoCOFF();
+};
+
} // namespace llvm
#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index f710065d9bc7..a2555496cdb9 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -14,6 +14,7 @@
#include "AArch64MCTargetDesc.h"
#include "AArch64ELFStreamer.h"
#include "AArch64MCAsmInfo.h"
+#include "AArch64WinCOFFStreamer.h"
#include "InstPrinter/AArch64InstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -59,8 +60,10 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI;
if (TheTriple.isOSBinFormatMachO())
MAI = new AArch64MCAsmInfoDarwin();
+ else if (TheTriple.isOSBinFormatCOFF())
+ MAI = new AArch64MCAsmInfoCOFF();
else {
- assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
+ assert(TheTriple.isOSBinFormatELF() && "Invalid target");
MAI = new AArch64MCAsmInfoELF(TheTriple);
}
@@ -74,8 +77,8 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
CodeModel::Model &CM) {
- assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()) &&
- "Only expect Darwin and ELF targets");
+ assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
+ TT.isOSBinFormatCOFF()) && "Invalid target");
if (CM == CodeModel::Default)
CM = CodeModel::Small;
@@ -122,6 +125,14 @@ static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
/*LabelSections*/ true);
}
+static MCStreamer *createWinCOFFStreamer(MCContext &Ctx, MCAsmBackend &TAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible) {
+ return createAArch64WinCOFFStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
+ IncrementalLinkerCompatible);
+}
+
static MCInstrAnalysis *createAArch64InstrAnalysis(const MCInstrInfo *Info) {
return new MCInstrAnalysis(Info);
}
@@ -154,6 +165,7 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
// Register the obj streamers.
TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer);
+ TargetRegistry::RegisterCOFFStreamer(*T, createWinCOFFStreamer);
// Register the obj target streamer.
TargetRegistry::RegisterObjectTargetStreamer(
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 615d7dab2c51..1404926b8124 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -60,6 +60,8 @@ MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS,
uint32_t CPUType,
uint32_t CPUSubtype);
+MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS);
+
MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
MCInstPrinter *InstPrint,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..7862a03e771c
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
@@ -0,0 +1,65 @@
+//= AArch64WinCOFFObjectWriter.cpp - AArch64 Windows COFF Object Writer C++ =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+namespace {
+
+class AArch64WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
+public:
+ AArch64WinCOFFObjectWriter()
+ : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARM64) {
+ }
+
+ ~AArch64WinCOFFObjectWriter() override = default;
+
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsCrossSection,
+ const MCAsmBackend &MAB) const override;
+
+ bool recordRelocation(const MCFixup &) const override;
+};
+
+} // end anonymous namespace
+
+unsigned
+AArch64WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsCrossSection,
+ const MCAsmBackend &MAB) const {
+ const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind());
+ report_fatal_error(Twine("unsupported relocation type: ") + Info.Name);
+}
+
+bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
+ return true;
+}
+
+namespace llvm {
+
+MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS) {
+ MCWinCOFFObjectTargetWriter *MOTW = new AArch64WinCOFFObjectWriter();
+ return createWinCOFFObjectWriter(MOTW, OS);
+}
+
+} // end namespace llvm
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
new file mode 100644
index 000000000000..6c8da27e398f
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -0,0 +1,37 @@
+//===-- AArch64WinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64WinCOFFStreamer.h"
+
+using namespace llvm;
+
+namespace {
+
+class AArch64WinCOFFStreamer : public MCWinCOFFStreamer {
+public:
+ friend class AArch64TargetWinCOFFStreamer;
+
+ AArch64WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE,
+ raw_pwrite_stream &OS)
+ : MCWinCOFFStreamer(C, AB, CE, OS) {}
+};
+} // end anonymous namespace
+
+namespace llvm {
+MCWinCOFFStreamer
+*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible) {
+ auto *S = new AArch64WinCOFFStreamer(Context, MAB, *Emitter, OS);
+ S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
+ return S;
+}
+
+} // end llvm namespace
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
new file mode 100644
index 000000000000..1b4fcd6804e2
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
@@ -0,0 +1,43 @@
+//===-- AArch64WinCOFFStreamer.h - WinCOFF Streamer for AArch64 -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements WinCOFF streamer information for the AArch64 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64WINCOFFSTREAMER_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64WINCOFFSTREAMER_H
+
+#include "AArch64TargetStreamer.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
+
+namespace {
+class AArch64WinCOFFStreamer;
+
+class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer {
+private:
+ AArch64WinCOFFStreamer &getStreamer();
+
+public:
+ AArch64TargetWinCOFFStreamer(llvm::MCStreamer &S)
+ : AArch64TargetStreamer(S) {}
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+MCWinCOFFStreamer
+*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible);
+} // end llvm namespace
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
index 6d8be5e63fbb..56eeba8a1d4b 100644
--- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@@ -8,6 +8,8 @@ add_llvm_library(LLVMAArch64Desc
AArch64MCTargetDesc.cpp
AArch64MachObjectWriter.cpp
AArch64TargetStreamer.cpp
+ AArch64WinCOFFObjectWriter.cpp
+ AArch64WinCOFFStreamer.cpp
)
add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 55d18c3f3646..5a799b2d88d0 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -36,7 +36,6 @@ FunctionPass *createR600ControlFlowFinalizer();
FunctionPass *createAMDGPUCFGStructurizerPass();
// SI Passes
-FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSIFoldOperandsPass();
FunctionPass *createSIPeepholeSDWAPass();
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 7494e5decd6f..f1d899c4d003 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -262,8 +262,8 @@ def FeatureSDWAMac : SubtargetFeature<"sdwa-mav",
"Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension"
>;
-def FeatureSDWAClampVOPC : SubtargetFeature<"sdwa-clamp-vopc",
- "HasSDWAClampVOPC",
+def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc",
+ "HasSDWAOutModsVOPC",
"true",
"Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension"
>;
@@ -452,7 +452,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
FeatureScalarStores, FeatureInv2PiInlineImm,
- FeatureSDWA, FeatureSDWAClampVOPC, FeatureSDWAMac, FeatureDPP
+ FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP
]
>;
diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 2071b6f157cd..9a391d06c9ea 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -1776,7 +1776,7 @@ static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
E = EndMBB->succ_end();
PI != E; ++PI) {
// Either we have a back-edge to the entry block, or a back-edge to the
- // succesor of the entry block since the block may be split.
+ // successor of the entry block since the block may be split.
if ((*PI) != StartMBB &&
!((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
Succs.insert(
@@ -1831,7 +1831,7 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
IfBB->addSuccessor(CodeBBStart);
DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n");
- // Ensure that the MergeBB is a succesor of the CodeEndBB.
+ // Ensure that the MergeBB is a successor of the CodeEndBB.
if (!CodeBBEnd->isSuccessor(MergeBB))
CodeBBEnd->addSuccessor(MergeBB);
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index ab5abf2039a5..be47b900c6f0 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -128,7 +128,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasSDWAScalar(false),
HasSDWASdst(false),
HasSDWAMac(false),
- HasSDWAClampVOPC(false),
+ HasSDWAOutModsVOPC(false),
HasDPP(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 2b16289c723e..22cede59086a 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -153,7 +153,7 @@ protected:
bool HasSDWAScalar;
bool HasSDWASdst;
bool HasSDWAMac;
- bool HasSDWAClampVOPC;
+ bool HasSDWAOutModsVOPC;
bool HasDPP;
bool FlatAddressSpace;
bool FlatInstOffsets;
@@ -452,8 +452,8 @@ public:
return HasSDWAMac;
}
- bool hasSDWAClampVOPC() const {
- return HasSDWAClampVOPC;
+ bool hasSDWAOutModsVOPC() const {
+ return HasSDWAOutModsVOPC;
}
/// \brief Returns the offset in bytes from the start of the input buffer
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 04fe9f689806..425fd35d47de 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -720,7 +720,6 @@ bool GCNPassConfig::addPreISel() {
addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
}
addPass(createSinkingPass());
- addPass(createSITypeRewriter());
addPass(createAMDGPUAnnotateUniformValues());
if (!LateCFGStructurize) {
addPass(createSIAnnotateControlFlowPass());
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 88245b01683a..89a03902dc69 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -63,7 +63,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
return false;
}
-void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
+void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
UP.Threshold = 300; // Twice the default.
UP.MaxCount = UINT_MAX;
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 485e20411ab4..9a320bdfcc3d 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -68,7 +68,8 @@ public:
bool hasBranchDivergence() { return true; }
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index e30844f082cd..917d9cfa6905 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -96,7 +96,6 @@ add_llvm_target(AMDGPUCodeGen
SIPeepholeSDWA.cpp
SIRegisterInfo.cpp
SIShrinkInstructions.cpp
- SITypeRewriter.cpp
SIWholeQuadMode.cpp
GCNIterativeScheduler.cpp
GCNMinRegStrategy.cpp
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 04308fb3aaf6..f26e49295e69 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -626,7 +626,9 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
using namespace AMDGPU::SDWA;
if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
- if (SDWA9EncValues::SRC_VGPR_MIN <= Val &&
+ // XXX: static_cast<int> is needed to avoid stupid warning:
+ // compare with unsigned is always true
+ if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) &&
Val <= SDWA9EncValues::SRC_VGPR_MAX) {
return createRegOperand(getVgprClassId(Width),
Val - SDWA9EncValues::SRC_VGPR_MIN);
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index d0f4e00994de..d39b345bdf03 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4314,6 +4314,23 @@ SDValue SITargetLowering::splitBinaryBitConstantOp(
return SDValue();
}
+// Returns true if argument is a boolean value which is not serialized into
+// memory or argument and does not require v_cmdmask_b32 to be deserialized.
+static bool isBoolSGPR(SDValue V) {
+ if (V.getValueType() != MVT::i1)
+ return false;
+ switch (V.getOpcode()) {
+ default: break;
+ case ISD::SETCC:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case AMDGPUISD::FP_CLASS:
+ return true;
+ }
+ return false;
+}
+
SDValue SITargetLowering::performAndCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (DCI.isBeforeLegalize())
@@ -4402,6 +4419,16 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
}
}
+ if (VT == MVT::i32 &&
+ (RHS.getOpcode() == ISD::SIGN_EXTEND || LHS.getOpcode() == ISD::SIGN_EXTEND)) {
+ // and x, (sext cc from i1) => select cc, x, 0
+ if (RHS.getOpcode() != ISD::SIGN_EXTEND)
+ std::swap(LHS, RHS);
+ if (isBoolSGPR(RHS.getOperand(0)))
+ return DAG.getSelect(SDLoc(N), MVT::i32, RHS.getOperand(0),
+ LHS, DAG.getConstant(0, SDLoc(N), MVT::i32));
+ }
+
return SDValue();
}
@@ -4941,8 +4968,7 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND: {
auto Cond = RHS.getOperand(0);
- if (Cond.getOpcode() != ISD::SETCC &&
- Cond.getOpcode() != AMDGPUISD::FP_CLASS)
+ if (!isBoolSGPR(Cond))
break;
SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
@@ -5109,6 +5135,35 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
EVT VT = LHS.getValueType();
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ auto CRHS = dyn_cast<ConstantSDNode>(RHS);
+ if (!CRHS) {
+ CRHS = dyn_cast<ConstantSDNode>(LHS);
+ if (CRHS) {
+ std::swap(LHS, RHS);
+ CC = getSetCCSwappedOperands(CC);
+ }
+ }
+
+ if (CRHS && VT == MVT::i32 && LHS.getOpcode() == ISD::SIGN_EXTEND &&
+ isBoolSGPR(LHS.getOperand(0))) {
+ // setcc (sext from i1 cc), -1, ne|sgt|ult) => not cc => xor cc, -1
+ // setcc (sext from i1 cc), -1, eq|sle|uge) => cc
+ // setcc (sext from i1 cc), 0, eq|sge|ule) => not cc => xor cc, -1
+ // setcc (sext from i1 cc), 0, ne|ugt|slt) => cc
+ if ((CRHS->isAllOnesValue() &&
+ (CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) ||
+ (CRHS->isNullValue() &&
+ (CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE)))
+ return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
+ DAG.getConstant(-1, SL, MVT::i1));
+ if ((CRHS->isAllOnesValue() &&
+ (CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) ||
+ (CRHS->isNullValue() &&
+ (CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT)))
+ return LHS.getOperand(0);
+ }
if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() &&
VT != MVT::f16))
@@ -5116,7 +5171,6 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
// Match isinf pattern
// (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) {
const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
if (!CRHS)
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index c9b48fea7225..b6784ec14e9f 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -770,7 +770,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
if (ST.hasScalarStores()) {
// m0 is used for offset to scalar stores if used to spill.
- Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
+ Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
}
return;
@@ -871,7 +871,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (ST.hasScalarStores()) {
// m0 is used for offset to scalar stores if used to spill.
- Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
+ Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
}
return;
@@ -2444,8 +2444,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
- if ( DstIdx == -1)
- DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::sdst);
const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
@@ -2488,14 +2486,20 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
return false;
}
- } else if (!ST.hasSDWAClampVOPC()) {
+ } else if (!ST.hasSDWAOutModsVOPC()) {
// No clamp allowed on GFX9 for VOPC
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
- if (Clamp != nullptr &&
- (!Clamp->isImm() || Clamp->getImm() != 0)) {
+ if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
return false;
}
+
+ // No omod allowed on GFX9 for VOPC
+ const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
+ if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) {
+ ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
+ return false;
+ }
}
}
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 3b4a8b5d1e81..4a81fb3b463a 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -336,6 +336,10 @@ def NegSubInlineConst16 : ImmLeaf<i16, [{
return Imm < -16 && Imm >= -64;
}], NegateImm>;
+def ShiftAmt32Imm : PatLeaf <(imm), [{
+ return N->getZExtValue() < 32;
+}]>;
+
//===----------------------------------------------------------------------===//
// Custom Operands
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 3b4bdc864253..bcc685015cf5 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -929,6 +929,14 @@ def : UMad24Pat<V_MAD_U32_U24>;
defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
def : ROTRPattern <V_ALIGNBIT_B32>;
+def : Pat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
+ (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
+ (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
+
+def : Pat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
+ (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
+ (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
+
/********** ====================== **********/
/********** Indirect addressing **********/
/********** ====================== **********/
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 4ac23ef03cb3..e2ac6631d2f3 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -627,10 +627,13 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
return false;
}
- if (!ST.hasSDWAClampVOPC() && TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+ if (!ST.hasSDWAOutModsVOPC() &&
+ (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
return false;
- } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
+ } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
+ !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
return false;
}
@@ -649,25 +652,24 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode()));
assert(SDWAOpcode != -1);
- // Copy dst, if it is present in original then should also be present in SDWA
- MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (!Dst && !TII->isVOPC(MI))
- return false;
-
const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
// Create SDWA version of instruction MI and initialize its operands
MachineInstrBuilder SDWAInst =
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
+ // Copy dst, if it is present in original then should also be present in SDWA
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
if (Dst) {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
SDWAInst.add(*Dst);
- } else {
- Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
assert(Dst &&
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
SDWAInst.add(*Dst);
+ } else {
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
+ SDWAInst.addReg(AMDGPU::VCC, RegState::Define);
}
// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -714,20 +716,22 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
}
// Copy omod if present, initialize otherwise if needed
- MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
- if (OMod) {
- assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1);
- SDWAInst.add(*OMod);
- } else if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
- SDWAInst.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
+ MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
+ if (OMod) {
+ SDWAInst.add(*OMod);
+ } else {
+ SDWAInst.addImm(0);
+ }
}
- // Initialize dst_sel and dst_unused if present
- if (Dst) {
- assert(
- AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 &&
- AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1);
+ // Initialize dst_sel if present
+ if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) {
SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
+ }
+
+ // Initialize dst_unused if present
+ if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) {
SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
}
diff --git a/lib/Target/AMDGPU/SITypeRewriter.cpp b/lib/Target/AMDGPU/SITypeRewriter.cpp
deleted file mode 100644
index aad68537f779..000000000000
--- a/lib/Target/AMDGPU/SITypeRewriter.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-//===-- SITypeRewriter.cpp - Remove unwanted types ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass removes performs the following type substitution on all
-/// non-compute shaders:
-///
-/// v16i8 => i128
-/// - v16i8 is used for constant memory resource descriptors. This type is
-/// legal for some compute APIs, and we don't want to declare it as legal
-/// in the backend, because we want the legalizer to expand all v16i8
-/// operations.
-/// v1* => *
-/// - Having v1* types complicates the legalizer and we can easily replace
-/// - them with the element type.
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstVisitor.h"
-
-using namespace llvm;
-
-namespace {
-
-class SITypeRewriter : public FunctionPass,
- public InstVisitor<SITypeRewriter> {
-
- static char ID;
- Module *Mod;
- Type *v16i8;
- Type *v4i32;
-
-public:
- SITypeRewriter() : FunctionPass(ID) { }
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
- StringRef getPassName() const override { return "SI Type Rewriter"; }
- void visitLoadInst(LoadInst &I);
- void visitCallInst(CallInst &I);
- void visitBitCast(BitCastInst &I);
-};
-
-} // End anonymous namespace
-
-char SITypeRewriter::ID = 0;
-
-bool SITypeRewriter::doInitialization(Module &M) {
- Mod = &M;
- v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
- v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4);
- return false;
-}
-
-bool SITypeRewriter::runOnFunction(Function &F) {
- if (!AMDGPU::isShader(F.getCallingConv()))
- return false;
-
- visit(F);
- visit(F);
-
- return false;
-}
-
-void SITypeRewriter::visitLoadInst(LoadInst &I) {
- Value *Ptr = I.getPointerOperand();
- Type *PtrTy = Ptr->getType();
- Type *ElemTy = PtrTy->getPointerElementType();
- IRBuilder<> Builder(&I);
- if (ElemTy == v16i8) {
- Value *BitCast = Builder.CreateBitCast(Ptr,
- PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
- LoadInst *Load = Builder.CreateLoad(BitCast);
- SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
- I.getAllMetadataOtherThanDebugLoc(MD);
- for (unsigned i = 0, e = MD.size(); i != e; ++i) {
- Load->setMetadata(MD[i].first, MD[i].second);
- }
- Value *BitCastLoad = Builder.CreateBitCast(Load, I.getType());
- I.replaceAllUsesWith(BitCastLoad);
- I.eraseFromParent();
- }
-}
-
-void SITypeRewriter::visitCallInst(CallInst &I) {
- IRBuilder<> Builder(&I);
-
- SmallVector <Value*, 8> Args;
- SmallVector <Type*, 8> Types;
- bool NeedToReplace = false;
- Function *F = I.getCalledFunction();
- if (!F)
- return;
-
- std::string Name = F->getName();
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- Value *Arg = I.getArgOperand(i);
- if (Arg->getType() == v16i8) {
- Args.push_back(Builder.CreateBitCast(Arg, v4i32));
- Types.push_back(v4i32);
- NeedToReplace = true;
- Name = Name + ".v4i32";
- } else if (Arg->getType()->isVectorTy() &&
- Arg->getType()->getVectorNumElements() == 1 &&
- Arg->getType()->getVectorElementType() ==
- Type::getInt32Ty(I.getContext())){
- Type *ElementTy = Arg->getType()->getVectorElementType();
- std::string TypeName = "i32";
- InsertElementInst *Def = cast<InsertElementInst>(Arg);
- Args.push_back(Def->getOperand(1));
- Types.push_back(ElementTy);
- std::string VecTypeName = "v1" + TypeName;
- Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName);
- NeedToReplace = true;
- } else {
- Args.push_back(Arg);
- Types.push_back(Arg->getType());
- }
- }
-
- if (!NeedToReplace) {
- return;
- }
- Function *NewF = Mod->getFunction(Name);
- if (!NewF) {
- NewF = Function::Create(FunctionType::get(F->getReturnType(), Types, false), GlobalValue::ExternalLinkage, Name, Mod);
- NewF->setAttributes(F->getAttributes());
- }
- I.replaceAllUsesWith(Builder.CreateCall(NewF, Args));
- I.eraseFromParent();
-}
-
-void SITypeRewriter::visitBitCast(BitCastInst &I) {
- IRBuilder<> Builder(&I);
- if (I.getDestTy() != v4i32) {
- return;
- }
-
- if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
- if (Op->getSrcTy() == v4i32) {
- I.replaceAllUsesWith(Op->getOperand(0));
- I.eraseFromParent();
- }
- }
-}
-
-FunctionPass *llvm::createSITypeRewriter() {
- return new SITypeRewriter();
-}
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 6f67183df6a1..c40b4450a5b5 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -222,6 +222,13 @@ def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", "HasRetAddrStack", "true",
"Has return address stack">;
+// Some processors have no branch predictor, which changes the expected cost of
+// taking a branch which affects the choice of whether to use predicated
+// instructions.
+def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor",
+ "HasBranchPredictor", "false",
+ "Has no branch predictor">;
+
/// DSP extension.
def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
"Supports DSP instructions in ARM and/or Thumb2">;
@@ -262,6 +269,10 @@ def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
"Generate calls via indirect call "
"instructions">;
+def FeatureExecuteOnly
+ : SubtargetFeature<"execute-only", "GenExecuteOnly", "true",
+ "Enable the generation of execute only code.">;
+
def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
"Reserve R9, making it unavailable as "
"GPR">;
@@ -540,7 +551,7 @@ def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
//
// Dummy CPU, used to target architectures
-def : ProcNoItin<"generic", []>;
+def : ProcessorModel<"generic", CortexA8Model, []>;
def : ProcNoItin<"arm8", [ARMv4]>;
def : ProcNoItin<"arm810", [ARMv4]>;
@@ -756,13 +767,19 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
FeatureHasSlowFPVMLx,
FeatureAvoidPartialCPSR]>;
-def : ProcNoItin<"cortex-m3", [ARMv7m, ProcM3]>;
-def : ProcNoItin<"sc300", [ARMv7m, ProcM3]>;
+def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m,
+ ProcM3,
+ FeatureHasNoBranchPredictor]>;
+
+def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m,
+ ProcM3,
+ FeatureHasNoBranchPredictor]>;
-def : ProcNoItin<"cortex-m4", [ARMv7em,
+def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em,
FeatureVFP4,
FeatureVFPOnlySP,
- FeatureD16]>;
+ FeatureD16,
+ FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-m7", [ARMv7em,
FeatureFPARMv8,
@@ -771,11 +788,12 @@ def : ProcNoItin<"cortex-m7", [ARMv7em,
def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
FeatureNoMovt]>;
-def : ProcNoItin<"cortex-m33", [ARMv8mMainline,
+def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline,
FeatureDSP,
FeatureFPARMv8,
FeatureD16,
- FeatureVFPOnlySP]>;
+ FeatureVFPOnlySP,
+ FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDivThumb,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e0810c358f2d..1ec6b24b2ed6 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1851,9 +1851,9 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
}
bool ARMBaseInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &,
+isProfitableToIfCvt(MachineBasicBlock &TBB,
unsigned TCycles, unsigned TExtra,
- MachineBasicBlock &,
+ MachineBasicBlock &FBB,
unsigned FCycles, unsigned FExtra,
BranchProbability Probability) const {
if (!TCycles)
@@ -1863,14 +1863,43 @@ isProfitableToIfCvt(MachineBasicBlock &,
// Here we scale up each component of UnpredCost to avoid precision issue when
// scaling TCycles/FCycles by Probability.
const unsigned ScalingUpFactor = 1024;
- unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
- unsigned FUnpredCost =
+
+ unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
+ unsigned UnpredCost;
+ if (!Subtarget.hasBranchPredictor()) {
+ // When we don't have a branch predictor it's always cheaper to not take a
+ // branch than take it, so we have to take that into account.
+ unsigned NotTakenBranchCost = 1;
+ unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
+ unsigned TUnpredCycles, FUnpredCycles;
+ if (!FCycles) {
+ // Triangle: TBB is the fallthrough
+ TUnpredCycles = TCycles + NotTakenBranchCost;
+ FUnpredCycles = TakenBranchCost;
+ } else {
+ // Diamond: TBB is the block that is branched to, FBB is the fallthrough
+ TUnpredCycles = TCycles + TakenBranchCost;
+ FUnpredCycles = FCycles + NotTakenBranchCost;
+ }
+ // The total cost is the cost of each path scaled by their probabilites
+ unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
+ unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
+ UnpredCost = TUnpredCost + FUnpredCost;
+ // When predicating assume that the first IT can be folded away but later
+ // ones cost one cycle each
+ if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
+ PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
+ }
+ } else {
+ unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
+ unsigned FUnpredCost =
Probability.getCompl().scale(FCycles * ScalingUpFactor);
- unsigned UnpredCost = TUnpredCost + FUnpredCost;
- UnpredCost += 1 * ScalingUpFactor; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+ UnpredCost = TUnpredCost + FUnpredCost;
+ UnpredCost += 1 * ScalingUpFactor; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+ }
- return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
+ return PredCost <= UnpredCost;
}
bool
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 2bcc707e9fc3..e42514acd76f 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -7580,6 +7580,9 @@ static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDValue VHi = DAG.getAnyExtOrTrunc(
DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
dl, MVT::i32);
+ bool isBigEndian = DAG.getDataLayout().isBigEndian();
+ if (isBigEndian)
+ std::swap (VLo, VHi);
SDValue RegClass =
DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
@@ -7607,10 +7610,14 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N,
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
- Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32,
- SDValue(CmpSwap, 0)));
- Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32,
- SDValue(CmpSwap, 0)));
+ bool isBigEndian = DAG.getDataLayout().isBigEndian();
+
+ Results.push_back(
+ DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
+ SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
+ Results.push_back(
+ DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
+ SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
Results.push_back(SDValue(CmpSwap, 2));
}
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 423f97ccacd6..891a8f482f0a 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1416,12 +1416,12 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1,
isIndirectBranch = 1 in {
def tTBB_JT : tPseudoInst<(outs),
- (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
- Sched<[WriteBr]>;
+ (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0,
+ IIC_Br, []>, Sched<[WriteBr]>;
def tTBH_JT : tPseudoInst<(outs),
- (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
- Sched<[WriteBr]>;
+ (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0,
+ IIC_Br, []>, Sched<[WriteBr]>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 4cb0eca5ee5f..374176d1d737 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -46,6 +46,10 @@ private:
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) const;
+ bool selectSelect(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) const;
+
const ARMBaseInstrInfo &TII;
const ARMBaseRegisterInfo &TRI;
const ARMBaseTargetMachine &TM;
@@ -346,6 +350,50 @@ bool ARMInstructionSelector::selectICmp(MachineInstrBuilder &MIB,
return true;
}
+bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
+ const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) const {
+ auto &MBB = *MIB->getParent();
+ auto InsertBefore = std::next(MIB->getIterator());
+ auto &DebugLoc = MIB->getDebugLoc();
+
+ // Compare the condition to 0.
+ auto CondReg = MIB->getOperand(1).getReg();
+ assert(MRI.getType(CondReg).getSizeInBits() == 1 &&
+ RBI.getRegBank(CondReg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported types for select operation");
+ auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPri))
+ .addUse(CondReg)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI))
+ return false;
+
+ // Move a value into the result register based on the result of the
+ // comparison.
+ auto ResReg = MIB->getOperand(0).getReg();
+ auto TrueReg = MIB->getOperand(2).getReg();
+ auto FalseReg = MIB->getOperand(3).getReg();
+ assert(MRI.getType(ResReg) == MRI.getType(TrueReg) &&
+ MRI.getType(TrueReg) == MRI.getType(FalseReg) &&
+ MRI.getType(FalseReg).getSizeInBits() == 32 &&
+ RBI.getRegBank(TrueReg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ RBI.getRegBank(FalseReg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported types for select operation");
+ auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCr))
+ .addDef(ResReg)
+ .addUse(TrueReg)
+ .addUse(FalseReg)
+ .add(predOps(ARMCC::EQ, ARM::CPSR));
+ if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI))
+ return false;
+
+ MIB->eraseFromParent();
+ return true;
+}
+
bool ARMInstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -448,6 +496,8 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
}
case G_ICMP:
return selectICmp(MIB, TII, MRI, TRI, RBI);
+ case G_SELECT:
+ return selectSelect(MIB, TII, MRI, TRI, RBI);
case G_GEP:
I.setDesc(TII.get(ARM::ADDrr));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 5873c7fb3872..f3e62d09cc30 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -55,10 +55,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
for (unsigned Op : {G_SDIV, G_UDIV}) {
for (auto Ty : {s8, s16})
- // FIXME: We need WidenScalar here, but in the case of targets with
- // software division we'll also need Libcall afterwards. Treat as Custom
- // until we have better support for chaining legalization actions.
- setAction({Op, Ty}, Custom);
+ setAction({Op, Ty}, WidenScalar);
if (ST.hasDivideInARMMode())
setAction({Op, s32}, Legal);
else
@@ -84,6 +81,10 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({G_GEP, p0}, Legal);
setAction({G_GEP, 1, s32}, Legal);
+ setAction({G_SELECT, s32}, Legal);
+ setAction({G_SELECT, p0}, Legal);
+ setAction({G_SELECT, 1, s1}, Legal);
+
setAction({G_CONSTANT, s32}, Legal);
setAction({G_ICMP, s1}, Legal);
@@ -118,40 +119,6 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
return false;
- case G_SDIV:
- case G_UDIV: {
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- if (Ty != LLT::scalar(16) && Ty != LLT::scalar(8))
- return false;
-
- // We need to widen to 32 bits and then maybe, if the target requires,
- // transform into a libcall.
- LegalizerHelper Helper(MIRBuilder.getMF());
-
- MachineInstr *NewMI = nullptr;
- Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
- // Store the new, 32-bit div instruction.
- if (MI->getOpcode() == G_SDIV || MI->getOpcode() == G_UDIV)
- NewMI = MI;
- });
-
- auto Result = Helper.widenScalar(MI, 0, LLT::scalar(32));
- Helper.MIRBuilder.stopRecordingInsertions();
- if (Result == LegalizerHelper::UnableToLegalize) {
- return false;
- }
- assert(NewMI && "Couldn't find widened instruction");
- assert((NewMI->getOpcode() == G_SDIV || NewMI->getOpcode() == G_UDIV) &&
- "Unexpected widened instruction");
- assert(MRI.getType(NewMI->getOperand(0).getReg()).getSizeInBits() == 32 &&
- "Unexpected type for the widened instruction");
-
- Result = Helper.legalizeInstrStep(*NewMI);
- if (Result == LegalizerHelper::UnableToLegalize) {
- return false;
- }
- return true;
- }
case G_SREM:
case G_UREM: {
unsigned OriginalResult = MI.getOperand(0).getReg();
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 2350d0c6ef69..11fb81a4f9fe 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -255,6 +255,18 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping =
getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
break;
+ case G_SELECT: {
+ LLT Ty2 = MRI.getType(MI.getOperand(1).getReg());
+ (void)Ty2;
+ assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT");
+ assert(Ty2.getSizeInBits() == 1 && "Unsupported size for G_SELECT");
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx]});
+ break;
+ }
case G_ICMP: {
LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
(void)Ty2;
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 02cbfb1fa9f1..b10583bc7983 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -245,6 +245,10 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
// the general GPR register class above (MOV, e.g.)
def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;
+// Thumb registers R0-R7 and the PC. Some instructions like TBB or THH allow
+// the PC to be used as a destination operand as well.
+def tGPRwithpc : RegisterClass<"ARM", [i32], 32, (add tGPR, PC)>;
+
// The high registers in thumb mode, R8-R15.
def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 1c7902520f2d..53e012f13ee2 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -424,3 +424,4 @@ include "ARMScheduleA9.td"
include "ARMScheduleSwift.td"
include "ARMScheduleR52.td"
include "ARMScheduleA57.td"
+include "ARMScheduleM3.td"
diff --git a/lib/Target/ARM/ARMScheduleM3.td b/lib/Target/ARM/ARMScheduleM3.td
new file mode 100644
index 000000000000..93f8299f9bd0
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleM3.td
@@ -0,0 +1,21 @@
+//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the ARM Cortex-M3 processor.
+//
+//===----------------------------------------------------------------------===//
+
+def CortexM3Model : SchedMachineModel {
+ let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue
+ let MicroOpBufferSize = 0; // In-order
+ let LoadLatency = 2; // Latency when not pipelined, not pc-relative
+ let MispredictPenalty = 2; // Best case branch taken cost
+
+ let CompleteModel = 0;
+}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index d9d0c27c6304..2c42a1336166 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -11,6 +11,13 @@
//
//===----------------------------------------------------------------------===//
+#include "ARM.h"
+
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "ARMCallLowering.h"
+#include "ARMLegalizerInfo.h"
+#include "ARMRegisterBankInfo.h"
+#endif
#include "ARMSubtarget.h"
#include "ARMFrameLowering.h"
#include "ARMInstrInfo.h"
@@ -23,6 +30,13 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#endif
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -78,11 +92,6 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
return *this;
}
-/// EnableExecuteOnly - Enables the generation of execute-only code on supported
-/// targets
-static cl::opt<bool>
-EnableExecuteOnly("arm-execute-only");
-
ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
StringRef FS) {
ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS);
@@ -92,13 +101,41 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
return new ARMFrameLowering(STI);
}
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+namespace {
+
+struct ARMGISelActualAccessor : public GISelAccessor {
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
+ const CallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+
+ const InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
+};
+
+} // end anonymous namespace
+#endif
+
ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle)
: ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
- GenExecuteOnly(EnableExecuteOnly), CPUString(CPU), IsLittle(IsLittle),
- TargetTriple(TT), Options(TM.Options), TM(TM),
- FrameLowering(initializeFrameLowering(CPU, FS)),
+ CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options),
+ TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
InstrInfo(isThumb1Only()
@@ -106,7 +143,29 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
: !isThumb()
? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
: (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
- TLInfo(TM, *this) {}
+ TLInfo(TM, *this) {
+ assert((isThumb() || hasARMOps()) &&
+ "Target must either be thumb or support ARM operations!");
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+ GISelAccessor *GISel = new GISelAccessor();
+#else
+ ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor();
+ GISel->CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering()));
+ GISel->Legalizer.reset(new ARMLegalizerInfo(*this));
+
+ auto *RBI = new ARMRegisterBankInfo(*getRegisterInfo());
+
+ // FIXME: At this point, we can't rely on Subtarget having RBI.
+ // It's awkward to mix passing RBI and the Subtarget; should we pass
+ // TII/TRI as well?
+ GISel->InstSelector.reset(createARMInstructionSelector(
+ *static_cast<const ARMBaseTargetMachine *>(&TM), *this, *RBI));
+
+ GISel->RegBankInfo.reset(RBI);
+#endif
+ setGISelAccessor(*GISel);
+}
const CallLowering *ARMSubtarget::getCallLowering() const {
assert(GISel && "Access to GlobalISel APIs not set");
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index d890d0fa777e..e15b17512c96 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -246,6 +246,11 @@ protected:
/// avoid issue "normal" call instructions to callees which do not return.
bool HasRetAddrStack = false;
+ /// HasBranchPredictor - True if the subtarget has a branch predictor. Having
+ /// a branch predictor or not changes the expected cost of taking a branch
+ /// which affects the choice of whether to use predicated instructions.
+ bool HasBranchPredictor = true;
+
/// HasMPExtension - True if the subtarget supports Multiprocessing
/// extension (ARMv7 only).
bool HasMPExtension = false;
@@ -554,6 +559,7 @@ public:
bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; }
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
bool hasRetAddrStack() const { return HasRetAddrStack; }
+ bool hasBranchPredictor() const { return HasBranchPredictor; }
bool hasMPExtension() const { return HasMPExtension; }
bool hasDSP() const { return HasDSP; }
bool useNaClTrap() const { return UseNaClTrap; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index eb71e557ec91..c323a1d368de 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,11 +11,6 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMCallLowering.h"
-#include "ARMLegalizerInfo.h"
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-#include "ARMRegisterBankInfo.h"
-#endif
#include "ARMSubtarget.h"
#include "ARMMacroFusion.h"
#include "ARMTargetMachine.h"
@@ -29,7 +24,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExecutionDepsFix.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
@@ -110,60 +104,20 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static ARMBaseTargetMachine::ARMABI
computeTargetABI(const Triple &TT, StringRef CPU,
const TargetOptions &Options) {
- if (Options.MCOptions.getABIName() == "aapcs16")
+ StringRef ABIName = Options.MCOptions.getABIName();
+
+ if (ABIName.empty())
+ ABIName = ARM::computeDefaultTargetABI(TT, CPU);
+
+ if (ABIName == "aapcs16")
return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
- else if (Options.MCOptions.getABIName().startswith("aapcs"))
+ else if (ABIName.startswith("aapcs"))
return ARMBaseTargetMachine::ARM_ABI_AAPCS;
- else if (Options.MCOptions.getABIName().startswith("apcs"))
+ else if (ABIName.startswith("apcs"))
return ARMBaseTargetMachine::ARM_ABI_APCS;
- assert(Options.MCOptions.getABIName().empty() &&
- "Unknown target-abi option!");
-
- ARMBaseTargetMachine::ARMABI TargetABI =
- ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
-
- unsigned ArchKind = ARM::parseCPUArch(CPU);
- StringRef ArchName = ARM::getArchName(ArchKind);
- // FIXME: This is duplicated code from the front end and should be unified.
- if (TT.isOSBinFormatMachO()) {
- if (TT.getEnvironment() == Triple::EABI ||
- (TT.getOS() == Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
- ARM::parseArchProfile(ArchName) == ARM::PK_M) {
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- } else if (TT.isWatchABI()) {
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
- } else {
- TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
- }
- } else if (TT.isOSWindows()) {
- // FIXME: this is invalid for WindowsCE
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- } else {
- // Select the default based on the platform.
- switch (TT.getEnvironment()) {
- case Triple::Android:
- case Triple::GNUEABI:
- case Triple::GNUEABIHF:
- case Triple::MuslEABI:
- case Triple::MuslEABIHF:
- case Triple::EABIHF:
- case Triple::EABI:
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- break;
- case Triple::GNU:
- TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
- break;
- default:
- if (TT.isOSNetBSD())
- TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
- else
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- break;
- }
- }
-
- return TargetABI;
+ llvm_unreachable("Unhandled/unknown ABI Name!");
+ return ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
}
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
@@ -248,61 +202,39 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
CPU, FS, Options, getEffectiveRelocModel(TT, RM), CM,
OL),
TargetABI(computeTargetABI(TT, CPU, Options)),
- TLOF(createTLOF(getTargetTriple())),
- Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
+ TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) {
// Default to triple-appropriate float ABI
- if (Options.FloatABIType == FloatABI::Default)
- this->Options.FloatABIType =
- Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft;
+ if (Options.FloatABIType == FloatABI::Default) {
+ if (TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+ TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
+ TargetTriple.getEnvironment() == Triple::EABIHF ||
+ TargetTriple.isOSWindows() ||
+ TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
+ this->Options.FloatABIType = FloatABI::Hard;
+ else
+ this->Options.FloatABIType = FloatABI::Soft;
+ }
// Default to triple-appropriate EABI
if (Options.EABIVersion == EABI::Default ||
Options.EABIVersion == EABI::Unknown) {
// musl is compatible with glibc with regard to EABI version
- if (Subtarget.isTargetGNUAEABI() || Subtarget.isTargetMuslAEABI())
+ if ((TargetTriple.getEnvironment() == Triple::GNUEABI ||
+ TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+ TargetTriple.getEnvironment() == Triple::MuslEABI ||
+ TargetTriple.getEnvironment() == Triple::MuslEABIHF) &&
+ !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin()))
this->Options.EABIVersion = EABI::GNU;
else
this->Options.EABIVersion = EABI::EABI5;
}
initAsmInfo();
- if (!Subtarget.isThumb() && !Subtarget.hasARMOps())
- report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
- "support ARM mode execution!");
}
ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-namespace {
-
-struct ARMGISelActualAccessor : public GISelAccessor {
- std::unique_ptr<CallLowering> CallLoweringInfo;
- std::unique_ptr<InstructionSelector> InstSelector;
- std::unique_ptr<LegalizerInfo> Legalizer;
- std::unique_ptr<RegisterBankInfo> RegBankInfo;
-
- const CallLowering *getCallLowering() const override {
- return CallLoweringInfo.get();
- }
-
- const InstructionSelector *getInstructionSelector() const override {
- return InstSelector.get();
- }
-
- const LegalizerInfo *getLegalizerInfo() const override {
- return Legalizer.get();
- }
-
- const RegisterBankInfo *getRegBankInfo() const override {
- return RegBankInfo.get();
- }
-};
-
-} // end anonymous namespace
-#endif
-
const ARMSubtarget *
ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
@@ -334,24 +266,6 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
-
-#ifndef LLVM_BUILD_GLOBAL_ISEL
- GISelAccessor *GISel = new GISelAccessor();
-#else
- ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor();
- GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering()));
- GISel->Legalizer.reset(new ARMLegalizerInfo(*I));
-
- auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo());
-
- // FIXME: At this point, we can't rely on Subtarget having RBI.
- // It's awkward to mix passing RBI and the Subtarget; should we pass
- // TII/TRI as well?
- GISel->InstSelector.reset(createARMInstructionSelector(*this, *I, *RBI));
-
- GISel->RegBankInfo.reset(RBI);
-#endif
- I->setGISelAccessor(*GISel);
}
return I.get();
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 2fcee73228fe..f41da3e8e223 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -36,7 +36,6 @@ public:
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- ARMSubtarget Subtarget;
bool isLittle;
mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap;
@@ -47,8 +46,8 @@ public:
CodeGenOpt::Level OL, bool isLittle);
~ARMBaseTargetMachine() override;
- const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
+ const ARMSubtarget *getSubtargetImpl() const = delete;
bool isLittleEndian() const { return isLittle; }
/// \brief Get the TargetIRAnalysis for this target.
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index a5b27abeb27f..88bab64ffaf2 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -32,7 +32,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
const ARMBaseTargetMachine &ARM_TM = static_cast<const ARMBaseTargetMachine &>(TM);
bool isAAPCS_ABI = ARM_TM.TargetABI == ARMBaseTargetMachine::ARMABI::ARM_ABI_AAPCS;
- genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly();
+ // genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly();
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(isAAPCS_ABI);
@@ -43,16 +43,6 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
AttributesSection =
getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0);
-
- // Make code section unreadable when in execute-only mode
- if (genExecuteOnly) {
- unsigned Type = ELF::SHT_PROGBITS;
- unsigned Flags = ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_ARM_PURECODE;
- // Since we cannot modify flags for an existing section, we create a new
- // section with the right flags, and use 0 as the unique ID for
- // execute-only text
- TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U);
- }
}
const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
@@ -74,21 +64,27 @@ getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
getContext());
}
-MCSection *
-ARMElfTargetObjectFile::getExplicitSectionGlobal(const GlobalObject *GO,
- SectionKind SK, const TargetMachine &TM) const {
+static bool isExecuteOnlyFunction(const GlobalObject *GO, SectionKind SK,
+ const TargetMachine &TM) {
+ if (const Function *F = dyn_cast<Function>(GO))
+ if (TM.getSubtarget<ARMSubtarget>(*F).genExecuteOnly() && SK.isText())
+ return true;
+ return false;
+}
+
+MCSection *ARMElfTargetObjectFile::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const {
// Set execute-only access for the explicit section
- if (genExecuteOnly && SK.isText())
+ if (isExecuteOnlyFunction(GO, SK, TM))
SK = SectionKind::getExecuteOnly();
return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM);
}
-MCSection *
-ARMElfTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO,
- SectionKind SK, const TargetMachine &TM) const {
+MCSection *ARMElfTargetObjectFile::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const {
// Place the global in the execute-only text section
- if (genExecuteOnly && SK.isText())
+ if (isExecuteOnlyFunction(GO, SK, TM))
SK = SectionKind::getExecuteOnly();
return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, SK, TM);
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index dbb8128269dc..bd7aa1cfe02b 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -16,8 +16,6 @@
namespace llvm {
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
- mutable bool genExecuteOnly = false;
-
protected:
const MCSection *AttributesSection = nullptr;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 585726208a8d..5ab236b7fd4c 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -486,7 +486,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- Size = 0;
+ Size = 4;
return MCDisassembler::Fail;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 81760f03940a..22de728fe06e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -738,13 +738,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
}
}
-void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) {
+bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
const MCSymbolRefExpr *A = Target.getSymA();
const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
const unsigned FixupKind = Fixup.getKind() ;
- if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
+ if ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
assert(Sym && "How did we resolve this?");
// If the symbol is external the linker will handle it.
@@ -753,7 +753,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// If the symbol is out of range, produce a relocation and hope the
// linker can handle it. GNU AS produces an error in this case.
if (Sym->isExternal())
- IsResolved = false;
+ return true;
}
// Create relocations for unconditional branches to function symbols with
// different execution mode in ELF binaries.
@@ -761,12 +761,12 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
unsigned Type = dyn_cast<MCSymbolELF>(Sym)->getType();
if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) {
if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch))
- IsResolved = false;
+ return true;
if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br ||
FixupKind == ARM::fixup_arm_thumb_bl ||
FixupKind == ARM::fixup_t2_condbranch ||
FixupKind == ARM::fixup_t2_uncondbranch))
- IsResolved = false;
+ return true;
}
}
// We must always generate a relocation for BL/BLX instructions if we have
@@ -776,7 +776,8 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
FixupKind == ARM::fixup_arm_blx ||
FixupKind == ARM::fixup_arm_uncondbl ||
FixupKind == ARM::fixup_arm_condbl))
- IsResolved = false;
+ return true;
+ return false;
}
/// getFixupKindNumBytes - The number of bytes the fixup may change.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 6a0ba2ed41c1..84b54bbb9a49 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -38,10 +38,8 @@ public:
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
- /// processFixupValue - Target hook to process the literal value of a fixup
- /// if necessary.
- void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) override;
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, uint64_t Value, bool IsPCRel,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 9f6c5d7bf920..831589ba0581 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -15,55 +15,47 @@
namespace llvm {
namespace ARM {
enum Fixups {
- // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol
- // addresses
+ // 12-bit PC relative relocation for symbol addresses
fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind,
- // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with
- // the 16-bit halfwords reordered.
+ // Equivalent to fixup_arm_ldst_pcrel_12, with the 16-bit halfwords reordered.
fixup_t2_ldst_pcrel_12,
- // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol
- // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded.
+ // 10-bit PC relative relocation for symbol addresses used in
+ // LDRD/LDRH/LDRB/etc. instructions. All bits are encoded.
fixup_arm_pcrel_10_unscaled,
- // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
- // used in VFP instructions where the lower 2 bits are not encoded
- // (so it's encoded as an 8-bit immediate).
+ // 10-bit PC relative relocation for symbol addresses used in VFP instructions
+ // where the lower 2 bits are not encoded (so it's encoded as an 8-bit
+ // immediate).
fixup_arm_pcrel_10,
- // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
- // the short-swapped encoding of Thumb2 instructions.
+ // Equivalent to fixup_arm_pcrel_10, accounting for the short-swapped encoding
+ // of Thumb2 instructions.
fixup_t2_pcrel_10,
- // fixup_arm_pcrel_9 - 9-bit PC relative relocation for symbol addresses
- // used in VFP instructions where bit 0 not encoded (so it's encoded as an
- // 8-bit immediate).
+ // 9-bit PC relative relocation for symbol addresses used in VFP instructions
+ // where bit 0 not encoded (so it's encoded as an 8-bit immediate).
fixup_arm_pcrel_9,
- // fixup_t2_pcrel_9 - Equivalent to fixup_arm_pcrel_9, accounting for
- // the short-swapped encoding of Thumb2 instructions.
+ // Equivalent to fixup_arm_pcrel_9, accounting for the short-swapped encoding
+ // of Thumb2 instructions.
fixup_t2_pcrel_9,
- // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
- // addresses where the lower 2 bits are not encoded (so it's encoded as an
- // 8-bit immediate).
+ // 10-bit PC relative relocation for symbol addresses where the lower 2 bits
+ // are not encoded (so it's encoded as an 8-bit immediate).
fixup_thumb_adr_pcrel_10,
- // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
- // instruction.
+ // 12-bit PC relative relocation for the ADR instruction.
fixup_arm_adr_pcrel_12,
- // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
- // instruction.
+ // 12-bit PC relative relocation for the ADR instruction.
fixup_t2_adr_pcrel_12,
- // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch
- // instructions.
+ // 24-bit PC relative relocation for conditional branch instructions.
fixup_arm_condbranch,
- // fixup_arm_uncondbranch - 24-bit PC relative relocation for
- // branch instructions. (unconditional)
+ // 24-bit PC relative relocation for branch instructions. (unconditional)
fixup_arm_uncondbranch,
- // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct
- // uconditional branch instructions.
+ // 20-bit PC relative relocation for Thumb2 direct uconditional branch
+ // instructions.
fixup_t2_condbranch,
- // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct
- // branch unconditional branch instructions.
+ // 20-bit PC relative relocation for Thumb2 direct branch unconditional branch
+ // instructions.
fixup_t2_uncondbranch,
- // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
+ // 12-bit fixup for Thumb B instructions.
fixup_arm_thumb_br,
// The following fixups handle the ARM BL instructions. These can be
@@ -75,42 +67,41 @@ enum Fixups {
// MachO does not draw a distinction between the two cases, so it will treat
// fixup_arm_uncondbl and fixup_arm_condbl as identical fixups.
- // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions.
+ // Fixup for unconditional ARM BL instructions.
fixup_arm_uncondbl,
- // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial
- // conditionalisation.
+ // Fixup for ARM BL instructions with nontrivial conditionalisation.
fixup_arm_condbl,
- // fixup_arm_blx - Fixup for ARM BLX instructions.
+ // Fixup for ARM BLX instructions.
fixup_arm_blx,
- // fixup_arm_thumb_bl - Fixup for Thumb BL instructions.
+ // Fixup for Thumb BL instructions.
fixup_arm_thumb_bl,
- // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions.
+ // Fixup for Thumb BLX instructions.
fixup_arm_thumb_blx,
- // fixup_arm_thumb_cb - Fixup for Thumb branch instructions.
+ // Fixup for Thumb branch instructions.
fixup_arm_thumb_cb,
- // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs.
+ // Fixup for Thumb load/store from constant pool instrs.
fixup_arm_thumb_cp,
- // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions.
+ // Fixup for Thumb conditional branching instructions.
fixup_arm_thumb_bcc,
// The next two are for the movt/movw pair
// the 16bit imm field are split into imm{15-12} and imm{11-0}
fixup_arm_movt_hi16, // :upper16:
fixup_arm_movw_lo16, // :lower16:
- fixup_t2_movt_hi16, // :upper16:
- fixup_t2_movw_lo16, // :lower16:
+ fixup_t2_movt_hi16, // :upper16:
+ fixup_t2_movw_lo16, // :lower16:
- // fixup_arm_mod_imm - Fixup for mod_imm
+ // Fixup for mod_imm
fixup_arm_mod_imm,
- // fixup_t2_so_imm - Fixup for Thumb2 8-bit rotated operand
+ // Fixup for Thumb2 8-bit rotated operand
fixup_t2_so_imm,
// Marker
@@ -118,6 +109,6 @@ enum Fixups {
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
}
-}
+} // namespace llvm
#endif
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index 5c3b45ac2328..d18298385adf 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -230,13 +230,25 @@ void ms8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
namespace llvm {
// Prepare value for the target space for it
-void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t &Value,
+void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup,
+ const MCValue &Target,
+ uint64_t &Value,
MCContext *Ctx) const {
// The size of the fixup in bits.
uint64_t Size = AVRAsmBackend::getFixupKindInfo(Fixup.getKind()).TargetSize;
unsigned Kind = Fixup.getKind();
+ // Parsed LLVM-generated temporary labels are already
+ // adjusted for instruction size, but normal labels aren't.
+ //
+ // To handle both cases, we simply un-adjust the temporary label
+ // case so it acts like all other labels.
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ if (A->getSymbol().isTemporary())
+ Value += 2;
+ }
+
switch (Kind) {
default:
llvm_unreachable("unhandled fixup");
@@ -333,9 +345,10 @@ MCObjectWriter *AVRAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
MCELFObjectTargetWriter::getOSABI(OSType));
}
-void AVRAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel, MCContext &Ctx) const {
+void AVRAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const {
+ adjustFixupValue(Fixup, Target, Value, &Asm.getContext());
if (Value == 0)
return; // Doesn't change encoding.
@@ -349,7 +362,7 @@ void AVRAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
Value <<= Info.TargetOffset;
unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
@@ -436,30 +449,16 @@ bool AVRAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
-void AVRAsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFixup &Fixup,
- const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) {
+bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
switch ((unsigned) Fixup.getKind()) {
+ default: return false;
// Fixups which should always be recorded as relocations.
case AVR::fixup_7_pcrel:
case AVR::fixup_13_pcrel:
case AVR::fixup_call:
- IsResolved = false;
- break;
- default:
- // Parsed LLVM-generated temporary labels are already
- // adjusted for instruction size, but normal labels aren't.
- //
- // To handle both cases, we simply un-adjust the temporary label
- // case so it acts like all other labels.
- if (Target.getSymA()->getSymbol().isTemporary())
- Value += 2;
-
- adjustFixupValue(Fixup, Value, &Asm.getContext());
- break;
+ return true;
}
}
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index f2be2494684a..4a75e3b0d22d 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -35,13 +35,14 @@ public:
AVRAsmBackend(Triple::OSType OSType)
: MCAsmBackend(), OSType(OSType) {}
- void adjustFixupValue(const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) const;
+ void adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
+ uint64_t &Value, MCContext *Ctx = nullptr) const;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
@@ -63,10 +64,8 @@ public:
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override;
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
private:
Triple::OSType OSType;
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index c6ddd6bdad5e..f48429ee57b0 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -16,6 +16,7 @@
#include "BPFRegisterInfo.h"
#include "BPFSubtarget.h"
#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -57,6 +58,11 @@ private:
bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
+ // Node preprocessing cases
+ void PreprocessLoad(SDNode *Node, SelectionDAG::allnodes_iterator I);
+ void PreprocessCopyToReg(SDNode *Node);
+ void PreprocessTrunc(SDNode *Node, SelectionDAG::allnodes_iterator I);
+
// Find constants from a constant structure
typedef std::vector<unsigned char> val_vec_type;
bool fillGenericConstant(const DataLayout &DL, const Constant *CV,
@@ -69,9 +75,12 @@ private:
val_vec_type &Vals, int Offset);
bool getConstantFieldValue(const GlobalAddressSDNode *Node, uint64_t Offset,
uint64_t Size, unsigned char *ByteSeq);
+ bool checkLoadDef(unsigned DefReg, unsigned match_load_op);
// Mapping from ConstantStruct global value to corresponding byte-list values
std::map<const void *, val_vec_type> cs_vals_;
+ // Mapping from vreg to load memory opcode
+ std::map<unsigned, unsigned> load_to_vreg_;
};
} // namespace
@@ -203,89 +212,110 @@ void BPFDAGToDAGISel::Select(SDNode *Node) {
SelectCode(Node);
}
+void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,
+ SelectionDAG::allnodes_iterator I) {
+ union {
+ uint8_t c[8];
+ uint16_t s;
+ uint32_t i;
+ uint64_t d;
+ } new_val; // hold up the constant values replacing loads.
+ bool to_replace = false;
+ SDLoc DL(Node);
+ const LoadSDNode *LD = cast<LoadSDNode>(Node);
+ uint64_t size = LD->getMemOperand()->getSize();
+
+ if (!size || size > 8 || (size & (size - 1)))
+ return;
+
+ SDNode *LDAddrNode = LD->getOperand(1).getNode();
+ // Match LDAddr against either global_addr or (global_addr + offset)
+ unsigned opcode = LDAddrNode->getOpcode();
+ if (opcode == ISD::ADD) {
+ SDValue OP1 = LDAddrNode->getOperand(0);
+ SDValue OP2 = LDAddrNode->getOperand(1);
+
+ // We want to find the pattern global_addr + offset
+ SDNode *OP1N = OP1.getNode();
+ if (OP1N->getOpcode() <= ISD::BUILTIN_OP_END || OP1N->getNumOperands() == 0)
+ return;
+
+ DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n');
+
+ const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(OP1N->getOperand(0).getNode());
+ const ConstantSDNode *CDN = dyn_cast<ConstantSDNode>(OP2.getNode());
+ if (GADN && CDN)
+ to_replace =
+ getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val.c);
+ } else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END &&
+ LDAddrNode->getNumOperands() > 0) {
+ DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n');
+
+ SDValue OP1 = LDAddrNode->getOperand(0);
+ if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(OP1.getNode()))
+ to_replace = getConstantFieldValue(GADN, 0, size, new_val.c);
+ }
+
+ if (!to_replace)
+ return;
+
+ // replacing the old with a new value
+ uint64_t val;
+ if (size == 1)
+ val = new_val.c[0];
+ else if (size == 2)
+ val = new_val.s;
+ else if (size == 4)
+ val = new_val.i;
+ else {
+ val = new_val.d;
+ }
+
+ DEBUG(dbgs() << "Replacing load of size " << size << " with constant " << val
+ << '\n');
+ SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64);
+
+ // After replacement, the current node is dead, we need to
+ // go backward one step to make iterator still work
+ I--;
+ SDValue From[] = {SDValue(Node, 0), SDValue(Node, 1)};
+ SDValue To[] = {NVal, NVal};
+ CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
+ I++;
+ // It is safe to delete node now
+ CurDAG->DeleteNode(Node);
+}
+
void BPFDAGToDAGISel::PreprocessISelDAG() {
- // Iterate through all nodes, only interested in loads from ConstantStruct
- // ConstantArray should have converted by IR->DAG processing
+ // Iterate through all nodes, interested in the following cases:
+ //
+ // . loads from ConstantStruct or ConstantArray of constructs
+ // which can be turns into constant itself, with this we can
+ // avoid reading from read-only section at runtime.
+ //
+ // . reg truncating is often the result of 8/16/32bit->64bit or
+ // 8/16bit->32bit conversion. If the reg value is loaded with
+ // masked byte width, the AND operation can be removed since
+ // BPF LOAD already has zero extension.
+ //
+ // This also solved a correctness issue.
+ // In BPF socket-related program, e.g., __sk_buff->{data, data_end}
+ // are 32-bit registers, but later on, kernel verifier will rewrite
+ // it with 64-bit value. Therefore, truncating the value after the
+ // load will result in incorrect code.
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end();
I != E;) {
SDNode *Node = &*I++;
unsigned Opcode = Node->getOpcode();
- if (Opcode != ISD::LOAD)
- continue;
-
- union {
- uint8_t c[8];
- uint16_t s;
- uint32_t i;
- uint64_t d;
- } new_val; // hold up the constant values replacing loads.
- bool to_replace = false;
- SDLoc DL(Node);
- const LoadSDNode *LD = cast<LoadSDNode>(Node);
- uint64_t size = LD->getMemOperand()->getSize();
- if (!size || size > 8 || (size & (size - 1)))
- continue;
-
- SDNode *LDAddrNode = LD->getOperand(1).getNode();
- // Match LDAddr against either global_addr or (global_addr + offset)
- unsigned opcode = LDAddrNode->getOpcode();
- if (opcode == ISD::ADD) {
- SDValue OP1 = LDAddrNode->getOperand(0);
- SDValue OP2 = LDAddrNode->getOperand(1);
-
- // We want to find the pattern global_addr + offset
- SDNode *OP1N = OP1.getNode();
- if (OP1N->getOpcode() <= ISD::BUILTIN_OP_END ||
- OP1N->getNumOperands() == 0)
- continue;
-
- DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n');
-
- const GlobalAddressSDNode *GADN =
- dyn_cast<GlobalAddressSDNode>(OP1N->getOperand(0).getNode());
- const ConstantSDNode *CDN = dyn_cast<ConstantSDNode>(OP2.getNode());
- if (GADN && CDN)
- to_replace =
- getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val.c);
- } else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END &&
- LDAddrNode->getNumOperands() > 0) {
- DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n');
-
- SDValue OP1 = LDAddrNode->getOperand(0);
- if (const GlobalAddressSDNode *GADN =
- dyn_cast<GlobalAddressSDNode>(OP1.getNode()))
- to_replace = getConstantFieldValue(GADN, 0, size, new_val.c);
- }
-
- if (!to_replace)
- continue;
-
- // replacing the old with a new value
- uint64_t val;
- if (size == 1)
- val = new_val.c[0];
- else if (size == 2)
- val = new_val.s;
- else if (size == 4)
- val = new_val.i;
- else {
- val = new_val.d;
- }
-
- DEBUG(dbgs() << "Replacing load of size " << size << " with constant "
- << val << '\n');
- SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64);
-
- // After replacement, the current node is dead, we need to
- // go backward one step to make iterator still work
- I--;
- SDValue From[] = {SDValue(Node, 0), SDValue(Node, 1)};
- SDValue To[] = {NVal, NVal};
- CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
- I++;
- // It is safe to delete node now
- CurDAG->DeleteNode(Node);
+ if (Opcode == ISD::LOAD)
+ PreprocessLoad(Node, I);
+ else if (Opcode == ISD::CopyToReg)
+ PreprocessCopyToReg(Node);
+ else if (Opcode == ISD::AND)
+ PreprocessTrunc(Node, I);
}
}
@@ -415,6 +445,134 @@ bool BPFDAGToDAGISel::fillConstantStruct(const DataLayout &DL,
return true;
}
+void BPFDAGToDAGISel::PreprocessCopyToReg(SDNode *Node) {
+ const RegisterSDNode *RegN = dyn_cast<RegisterSDNode>(Node->getOperand(1));
+ if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg()))
+ return;
+
+ const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node->getOperand(2));
+ if (!LD)
+ return;
+
+ // Assign a load value to a virtual register. record its load width
+ unsigned mem_load_op = 0;
+ switch (LD->getMemOperand()->getSize()) {
+ default:
+ return;
+ case 4:
+ mem_load_op = BPF::LDW;
+ break;
+ case 2:
+ mem_load_op = BPF::LDH;
+ break;
+ case 1:
+ mem_load_op = BPF::LDB;
+ break;
+ }
+
+ DEBUG(dbgs() << "Find Load Value to VReg "
+ << TargetRegisterInfo::virtReg2Index(RegN->getReg()) << '\n');
+ load_to_vreg_[RegN->getReg()] = mem_load_op;
+}
+
+void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node,
+ SelectionDAG::allnodes_iterator I) {
+ ConstantSDNode *MaskN = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!MaskN)
+ return;
+
+ unsigned match_load_op = 0;
+ switch (MaskN->getZExtValue()) {
+ default:
+ return;
+ case 0xFFFFFFFF:
+ match_load_op = BPF::LDW;
+ break;
+ case 0xFFFF:
+ match_load_op = BPF::LDH;
+ break;
+ case 0xFF:
+ match_load_op = BPF::LDB;
+ break;
+ }
+
+ // The Reg operand should be a virtual register, which is defined
+ // outside the current basic block. DAG combiner has done a pretty
+ // good job in removing truncating inside a single basic block.
+ SDValue BaseV = Node->getOperand(0);
+ if (BaseV.getOpcode() != ISD::CopyFromReg)
+ return;
+
+ const RegisterSDNode *RegN =
+ dyn_cast<RegisterSDNode>(BaseV.getNode()->getOperand(1));
+ if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg()))
+ return;
+ unsigned AndOpReg = RegN->getReg();
+ DEBUG(dbgs() << "Examine %vreg" << TargetRegisterInfo::virtReg2Index(AndOpReg)
+ << '\n');
+
+ // Examine the PHI insns in the MachineBasicBlock to found out the
+ // definitions of this virtual register. At this stage (DAG2DAG
+ // transformation), only PHI machine insns are available in the machine basic
+ // block.
+ MachineBasicBlock *MBB = FuncInfo->MBB;
+ MachineInstr *MII = nullptr;
+ for (auto &MI : *MBB) {
+ for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+ const MachineOperand &MOP = MI.getOperand(i);
+ if (!MOP.isReg() || !MOP.isDef())
+ continue;
+ unsigned Reg = MOP.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg) && Reg == AndOpReg) {
+ MII = &MI;
+ break;
+ }
+ }
+ }
+
+ if (MII == nullptr) {
+ // No phi definition in this block.
+ if (!checkLoadDef(AndOpReg, match_load_op))
+ return;
+ } else {
+ // The PHI node looks like:
+ // %vreg2<def> = PHI %vreg0, <BB#1>, %vreg1, <BB#3>
+ // Trace each incoming definition, e.g., (%vreg0, BB#1) and (%vreg1, BB#3)
+ // The AND operation can be removed if both %vreg0 in BB#1 and %vreg1 in
+ // BB#3 are defined with with a load matching the MaskN.
+ DEBUG(dbgs() << "Check PHI Insn: "; MII->dump(); dbgs() << '\n');
+ unsigned PrevReg = -1;
+ for (unsigned i = 0; i < MII->getNumOperands(); ++i) {
+ const MachineOperand &MOP = MII->getOperand(i);
+ if (MOP.isReg()) {
+ if (MOP.isDef())
+ continue;
+ PrevReg = MOP.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(PrevReg))
+ return;
+ if (!checkLoadDef(PrevReg, match_load_op))
+ return;
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump();
+ dbgs() << '\n');
+
+ I--;
+ CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV);
+ I++;
+ CurDAG->DeleteNode(Node);
+}
+
+bool BPFDAGToDAGISel::checkLoadDef(unsigned DefReg, unsigned match_load_op) {
+ auto it = load_to_vreg_.find(DefReg);
+ if (it == load_to_vreg_.end())
+ return false; // The definition of register is not exported yet.
+
+ return it->second == match_load_op;
+}
+
FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) {
return new BPFDAGToDAGISel(TM);
}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 2b0ceaa66258..97a53dcbaed7 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -178,8 +178,8 @@ static cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long",
cl::Hidden, cl::desc("Enable long calls for save-restore stubs."),
cl::init(false), cl::ZeroOrMore);
-static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
- cl::Hidden, cl::desc("Use allocframe more conservatively"));
+static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true),
+ cl::Hidden, cl::desc("Refrain from using FP whenever possible"));
static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
cl::init(true), cl::desc("Optimize spill slots"));
@@ -550,7 +550,6 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
- DebugLoc dl;
unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment());
@@ -584,77 +583,56 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
MI->eraseFromParent();
}
- if (!hasFP(MF))
- return;
-
- // Check for overflow.
- // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
- const unsigned int ALLOCFRAME_MAX = 16384;
+ DebugLoc dl = MBB.findDebugLoc(InsertPt);
- // Create a dummy memory operand to avoid allocframe from being treated as
- // a volatile memory reference.
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
- 4, 4);
-
- if (NumBytes >= ALLOCFRAME_MAX) {
- // Emit allocframe(#0).
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
- .addImm(0)
- .addMemOperand(MMO);
-
- // Subtract offset from frame pointer.
- // We use a caller-saved non-parameter register for that.
- unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg();
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32),
- CallerSavedReg).addImm(NumBytes);
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP)
+ if (hasFP(MF)) {
+ insertAllocframe(MBB, InsertPt, NumBytes);
+ if (AlignStack) {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
+ .addReg(SP)
+ .addImm(-int64_t(MaxAlign));
+ }
+ // If the stack-checking is enabled, and we spilled the callee-saved
+ // registers inline (i.e. did not use a spill function), then call
+ // the stack checker directly.
+ if (EnableStackOVFSanitizer && !PrologueStubs)
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
+ .addExternalSymbol("__runtime_stack_check");
+ } else if (NumBytes > 0) {
+ assert(alignTo(NumBytes, 8) == NumBytes);
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
.addReg(SP)
- .addReg(CallerSavedReg);
- } else {
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
- .addImm(NumBytes)
- .addMemOperand(MMO);
+ .addImm(-int(NumBytes));
}
-
- if (AlignStack) {
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
- .addReg(SP)
- .addImm(-int64_t(MaxAlign));
- }
-
- // If the stack-checking is enabled, and we spilled the callee-saved
- // registers inline (i.e. did not use a spill function), then call
- // the stack checker directly.
- if (EnableStackOVFSanitizer && !PrologueStubs)
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
- .addExternalSymbol("__runtime_stack_check");
}
void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
MachineFunction &MF = *MBB.getParent();
- if (!hasFP(MF))
- return;
-
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
unsigned SP = HRI.getStackRegister();
+ MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
+ DebugLoc dl = MBB.findDebugLoc(InsertPt);
+
+ if (!hasFP(MF)) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (unsigned NumBytes = MFI.getStackSize()) {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+ .addReg(SP)
+ .addImm(NumBytes);
+ }
+ return;
+ }
+
MachineInstr *RetI = getReturn(MBB);
unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
- MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
- DebugLoc DL;
- if (InsertPt != MBB.end())
- DL = InsertPt->getDebugLoc();
- else if (!MBB.empty())
- DL = std::prev(MBB.end())->getDebugLoc();
-
// Handle EH_RETURN.
if (RetOpc == Hexagon::EH_RETURN_JMPR) {
- BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
- BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP)
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe));
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP)
.addReg(SP)
.addReg(Hexagon::R28);
return;
@@ -699,16 +677,52 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
// otherwise just add deallocframe. The function could be returning via a
// tail call.
if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
- BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe));
return;
}
unsigned NewOpc = Hexagon::L4_return;
- MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc));
+ MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc));
// Transfer the function live-out registers.
NewI->copyImplicitOps(MF, *RetI);
MBB.erase(RetI);
}
+void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const {
+ MachineFunction &MF = *MBB.getParent();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HII = *HST.getInstrInfo();
+ auto &HRI = *HST.getRegisterInfo();
+
+ // Check for overflow.
+ // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+ const unsigned int ALLOCFRAME_MAX = 16384;
+
+ // Create a dummy memory operand to avoid allocframe from being treated as
+ // a volatile memory reference.
+ auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0),
+ MachineMemOperand::MOStore, 4, 4);
+
+ DebugLoc dl = MBB.findDebugLoc(InsertPt);
+
+ if (NumBytes >= ALLOCFRAME_MAX) {
+ // Emit allocframe(#0).
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+ .addImm(0)
+ .addMemOperand(MMO);
+
+ // Subtract the size from the stack pointer.
+ unsigned SP = HRI.getStackRegister();
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+ .addReg(SP)
+ .addImm(-int(NumBytes));
+ } else {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+ .addImm(NumBytes)
+ .addMemOperand(MMO);
+ }
+}
+
void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,
MachineBasicBlock &SaveB) const {
SetVector<unsigned> Worklist;
@@ -928,12 +942,11 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
}
bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
+ if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
+ return false;
+
auto &MFI = MF.getFrameInfo();
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
-
- bool HasFixed = MFI.getNumFixedObjects();
- bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI)
- .getLocalFrameObjectCount();
bool HasExtraAlign = HRI.needsStackRealignment(MF);
bool HasAlloca = MFI.hasVarSizedObjects();
@@ -947,18 +960,35 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
// By default we want to use SP (since it's always there). FP requires
// some setup (i.e. ALLOCFRAME).
- // Fixed and preallocated objects need FP if the distance from them to
- // the SP is unknown (as is with alloca or aligna).
- if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign))
+ // Both, alloca and stack alignment modify the stack pointer by an
+ // undetermined value, so we need to save it at the entry to the function
+ // (i.e. use allocframe).
+ if (HasAlloca || HasExtraAlign)
return true;
if (MFI.getStackSize() > 0) {
- if (EnableStackOVFSanitizer || UseAllocframe)
+ // If FP-elimination is disabled, we have to use FP at this point.
+ const TargetMachine &TM = MF.getTarget();
+ if (TM.Options.DisableFramePointerElim(MF) || !EliminateFramePointer)
+ return true;
+ if (EnableStackOVFSanitizer)
return true;
}
- if (MFI.hasCalls() ||
- MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR())
+ const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ if (MFI.hasCalls() || HMFI.hasClobberLR())
+ return true;
+
+ // Frame pointer elimination is a possiblility at this point, but
+ // to know if FP is necessary we need to know if spill/restore
+ // functions will be used (they require FP to be valid).
+ // This means that hasFP shouldn't really be called before CSI is
+ // calculated, and some measures are taken to make sure of that
+ // (e.g. default implementations of virtual functions that call it
+ // are overridden apropriately).
+ assert(MFI.isCalleeSavedInfoValid() && "Need to know CSI");
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (useSpillFunction(MF, CSI) || useRestoreFunction(MF, CSI))
return true;
return false;
@@ -1051,9 +1081,10 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
bool HasExtraAlign = HRI.needsStackRealignment(MF);
bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
- unsigned FrameSize = MFI.getStackSize();
- unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister();
auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ unsigned FrameSize = MFI.getStackSize();
+ unsigned SP = HRI.getStackRegister();
+ unsigned FP = HRI.getFrameRegister();
unsigned AP = HMFI.getStackAlignBasePhysReg();
// It may happen that AP will be absent even HasAlloca && HasExtraAlign
// is true. HasExtraAlign may be set because of vector spills, without
@@ -1135,7 +1166,7 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
// there will be no SP -= FrameSize), so the frame size should not be
// added to the calculated offset.
int RealOffset = Offset;
- if (!UseFP && !UseAP && HasFP)
+ if (!UseFP && !UseAP)
RealOffset = FrameSize+Offset;
return RealOffset;
}
@@ -2402,7 +2433,7 @@ void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
/// be generated via inline code. If this function returns "true", inline
/// code will be generated. If this function returns "false", additional
/// checks are performed, which may still lead to the inline code.
-bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
+bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
const CSIVect &CSI) const {
if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
return true;
@@ -2432,7 +2463,7 @@ bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
return false;
}
-bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
+bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF,
const CSIVect &CSI) const {
if (shouldInlineCSR(MF, CSI))
return false;
@@ -2445,7 +2476,7 @@ bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
return Threshold < NumCSI;
}
-bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
+bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF,
const CSIVect &CSI) const {
if (shouldInlineCSR(MF, CSI))
return false;
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 529a61d4a5b5..f4d4e1b61a26 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -48,6 +48,15 @@ public:
return true;
}
+ bool hasReservedCallFrame(const MachineFunction &MF) const override {
+ // We always reserve call frame as a part of the initial stack allocation.
+ return true;
+ }
+ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override {
+ // Override this function to avoid calling hasFP before CSI is set
+ // (the default implementation calls hasFP).
+ return true;
+ }
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
@@ -94,6 +103,8 @@ private:
unsigned SP, unsigned CF) const;
void insertPrologueInBlock(MachineBasicBlock &MBB, bool PrologueStubs) const;
void insertEpilogueInBlock(MachineBasicBlock &MBB) const;
+ void insertAllocframe(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const;
bool insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
const HexagonRegisterInfo &HRI, bool &PrologueStubs) const;
bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
@@ -148,9 +159,9 @@ private:
void addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, const CSIVect &CSI,
bool IsDef, bool IsKill) const;
- bool shouldInlineCSR(MachineFunction &MF, const CSIVect &CSI) const;
- bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const;
- bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
+ bool shouldInlineCSR(const MachineFunction &MF, const CSIVect &CSI) const;
+ bool useSpillFunction(const MachineFunction &MF, const CSIVect &CSI) const;
+ bool useRestoreFunction(const MachineFunction &MF, const CSIVect &CSI) const;
bool mayOverflowFrameOffset(MachineFunction &MF) const;
};
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index afed894cfb9a..2daacf795555 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1002,51 +1002,46 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue
HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
- SDNode *Node = Op.getNode();
MachineFunction &MF = DAG.getMachineFunction();
- auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
- switch (Node->getOpcode()) {
- case ISD::INLINEASM: {
- unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
- --NumOps; // Ignore the flag operand.
-
- for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
- if (FuncInfo.hasClobberLR())
- break;
- unsigned Flags =
- cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
- ++i; // Skip the ID value.
-
- switch (InlineAsm::getKind(Flags)) {
- default: llvm_unreachable("Bad flags!");
- case InlineAsm::Kind_RegDef:
- case InlineAsm::Kind_RegUse:
- case InlineAsm::Kind_Imm:
- case InlineAsm::Kind_Clobber:
- case InlineAsm::Kind_Mem: {
- for (; NumVals; --NumVals, ++i) {}
- break;
- }
- case InlineAsm::Kind_RegDefEarlyClobber: {
- for (; NumVals; --NumVals, ++i) {
- unsigned Reg =
- cast<RegisterSDNode>(Node->getOperand(i))->getReg();
-
- // Check it to be lr
- const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo();
- if (Reg == QRI->getRARegister()) {
- FuncInfo.setHasClobberLR(true);
- break;
- }
- }
- break;
- }
+ auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
+ unsigned LR = HRI.getRARegister();
+
+ if (Op.getOpcode() != ISD::INLINEASM || HMFI.hasClobberLR())
+ return Op;
+
+ unsigned NumOps = Op.getNumOperands();
+ if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default:
+ llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegUse:
+ case InlineAsm::Kind_Imm:
+ case InlineAsm::Kind_Mem:
+ i += NumVals;
+ break;
+ case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegDefEarlyClobber: {
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
+ if (Reg != LR)
+ continue;
+ HMFI.setHasClobberLR(true);
+ return Op;
}
+ break;
}
}
- } // Node->getOpcode
+ }
+
return Op;
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index fec2dc5ce306..1eac2d3dd8e2 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1253,10 +1253,16 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
LivePhysRegs LiveAtMI(HRI);
getLiveRegsAt(LiveAtMI, MI);
bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
+ unsigned PReg = Op1.getReg();
+ assert(Op1.getSubReg() == 0);
+ unsigned PState = getRegState(Op1);
+
if (Op0.getReg() != Op2.getReg()) {
+ unsigned S = Op0.getReg() != Op3.getReg() ? PState & ~RegState::Kill
+ : PState;
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vcmov))
.add(Op0)
- .add(Op1)
+ .addReg(PReg, S)
.add(Op2);
if (IsDestLive)
T.addReg(Op0.getReg(), RegState::Implicit);
@@ -1265,7 +1271,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if (Op0.getReg() != Op3.getReg()) {
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vncmov))
.add(Op0)
- .add(Op1)
+ .addReg(PReg, PState)
.add(Op3);
if (IsDestLive)
T.addReg(Op0.getReg(), RegState::Implicit);
@@ -1282,12 +1288,18 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
LivePhysRegs LiveAtMI(HRI);
getLiveRegsAt(LiveAtMI, MI);
bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
+ unsigned PReg = Op1.getReg();
+ assert(Op1.getSubReg() == 0);
+ unsigned PState = getRegState(Op1);
if (Op0.getReg() != Op2.getReg()) {
+ unsigned S = Op0.getReg() != Op3.getReg() ? PState & ~RegState::Kill
+ : PState;
unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo);
unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi);
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine))
.add(Op0)
+ .addReg(PReg, S)
.add(Op1)
.addReg(SrcHi)
.addReg(SrcLo);
@@ -1300,7 +1312,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi);
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine))
.add(Op0)
- .add(Op1)
+ .addReg(PReg, PState)
.addReg(SrcHi)
.addReg(SrcLo);
if (IsDestLive)
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index de6b203015d8..e93f075f4ccd 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -69,9 +69,7 @@ namespace {
public:
static char ID;
- HexagonNewValueJump() : MachineFunctionPass(ID) {
- initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry());
- }
+ HexagonNewValueJump() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
@@ -445,8 +443,6 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
unsigned predReg = 0; // predicate reg of the jump.
unsigned cmpReg1 = 0;
int cmpOp2 = 0;
- bool MO1IsKill = false;
- bool MO2IsKill = false;
MachineBasicBlock::iterator jmpPos;
MachineBasicBlock::iterator cmpPos;
MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr;
@@ -548,14 +544,10 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
// We need cmpReg1 and cmpOp2(imm or reg) while building
// new value jump instruction.
cmpReg1 = MI.getOperand(1).getReg();
- if (MI.getOperand(1).isKill())
- MO1IsKill = true;
- if (isSecondOpReg) {
+ if (isSecondOpReg)
cmpOp2 = MI.getOperand(2).getReg();
- if (MI.getOperand(2).isKill())
- MO2IsKill = true;
- } else
+ else
cmpOp2 = MI.getOperand(2).getImm();
continue;
}
@@ -605,11 +597,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
if ((COp == Hexagon::C2_cmpeq || COp == Hexagon::C4_cmpneq) &&
(feederReg == (unsigned) cmpOp2)) {
unsigned tmp = cmpReg1;
- bool tmpIsKill = MO1IsKill;
cmpReg1 = cmpOp2;
- MO1IsKill = MO2IsKill;
cmpOp2 = tmp;
- MO2IsKill = tmpIsKill;
}
// Now we have swapped the operands, all we need to check is,
@@ -623,31 +612,33 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
// make sure we are respecting the kill values of
// the operands of the feeder.
- bool updatedIsKill = false;
- for (unsigned i = 0; i < MI.getNumOperands(); i++) {
- MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() && MO.isUse()) {
- unsigned feederReg = MO.getReg();
- for (MachineBasicBlock::iterator localII = feederPos,
- end = cmpInstr->getIterator(); localII != end; localII++) {
- MachineInstr &localMI = *localII;
- for (unsigned j = 0; j < localMI.getNumOperands(); j++) {
- MachineOperand &localMO = localMI.getOperand(j);
- if (localMO.isReg() && localMO.isUse() &&
- localMO.isKill() && feederReg == localMO.getReg()) {
- // We found that there is kill of a use register
- // Set up a kill flag on the register
- localMO.setIsKill(false);
- MO.setIsKill();
- updatedIsKill = true;
- break;
- }
+ auto TransferKills = [jmpPos,cmpPos] (MachineInstr &MI) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned UseR = MO.getReg();
+ for (auto I = std::next(MI.getIterator()); I != jmpPos; ++I) {
+ if (I == cmpPos)
+ continue;
+ for (MachineOperand &Op : I->operands()) {
+ if (!Op.isReg() || !Op.isUse() || !Op.isKill())
+ continue;
+ if (Op.getReg() != UseR)
+ continue;
+ // We found that there is kill of a use register
+ // Set up a kill flag on the register
+ Op.setIsKill(false);
+ MO.setIsKill(true);
+ return;
}
- if (updatedIsKill) break;
}
}
- if (updatedIsKill) break;
- }
+ };
+
+ TransferKills(*feederPos);
+ TransferKills(*cmpPos);
+ bool MO1IsKill = cmpPos->killsRegister(cmpReg1, QRI);
+ bool MO2IsKill = isSecondOpReg && cmpPos->killsRegister(cmpOp2, QRI);
MBB->splice(jmpPos, MI.getParent(), MI);
MBB->splice(jmpPos, MI.getParent(), cmpInstr);
diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 27b40f134b1f..a331c978f59d 100644
--- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -535,9 +535,9 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
!MI->getOperand(1).isGlobal())
continue;
- DEBUG(dbgs() << "[Analyzing A2_tfrsi]: " << *MI << "\n");
- DEBUG(dbgs() << "\t[InstrNode]: " << Print<NodeAddr<InstrNode *>>(IA, *DFG)
- << "\n");
+ DEBUG(dbgs() << "[Analyzing " << HII->getName(MI->getOpcode()) << "]: "
+ << *MI << "\n\t[InstrNode]: "
+ << Print<NodeAddr<InstrNode *>>(IA, *DFG) << '\n');
NodeList UNodeList;
getAllRealUses(SA, UNodeList);
@@ -605,7 +605,9 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
const TargetOperandInfo TOI(*HII);
DataFlowGraph G(MF, *HII, TRI, *MDT, MDF, TOI);
- G.build();
+ // Need to keep dead phis because we can propagate uses of registers into
+ // nodes dominated by those would-be phis.
+ G.build(BuildOptions::KeepDeadPhis);
DFG = &G;
Liveness L(MRI, *DFG);
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 031a1bdefafb..76d9b31b005f 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -113,6 +113,7 @@ namespace llvm {
void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
void initializeHexagonGenMuxPass(PassRegistry&);
void initializeHexagonOptAddrModePass(PassRegistry&);
+ void initializeHexagonNewValueJumpPass(PassRegistry&);
Pass *createHexagonLoopIdiomPass();
FunctionPass *createHexagonBitSimplify();
@@ -158,6 +159,7 @@ extern "C" void LLVMInitializeHexagonTarget() {
initializeHexagonLoopIdiomRecognizePass(PR);
initializeHexagonGenMuxPass(PR);
initializeHexagonOptAddrModePass(PR);
+ initializeHexagonNewValueJumpPass(PR);
}
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 4dacb1501392..34df2ebcc520 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -49,6 +49,10 @@ static cl::opt<bool> TraceGVPlacement("trace-gv-placement",
cl::Hidden, cl::init(false),
cl::desc("Trace global value placement"));
+static cl::opt<bool>
+ EmitJtInText("hexagon-emit-jt-text", cl::Hidden, cl::init(false),
+ cl::desc("Emit hexagon jump tables in function section"));
+
// TraceGVPlacement controls messages for all builds. For builds with assertions
// (debug or release), messages are also controlled by the usual debug flags
// (e.g. -debug and -debug-only=globallayout)
@@ -256,6 +260,11 @@ unsigned HexagonTargetObjectFile::getSmallDataSize() const {
return SmallDataThreshold;
}
+bool HexagonTargetObjectFile::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ return EmitJtInText;
+}
+
/// Descends any type down to "elementary" components,
/// discovering the smallest addressable one.
/// If zero is returned, declaration will not be modified.
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
index 58dff2b95e19..373d850b53be 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -33,6 +33,9 @@ namespace llvm {
unsigned getSmallDataSize() const;
+ bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
+ const Function &F) const override;
+
private:
MCSectionELF *SmallDataSection;
MCSectionELF *SmallBSSSection;
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index d578bfab3658..aac810e29fe9 100644
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -21,6 +21,10 @@ using namespace llvm;
#define DEBUG_TYPE "hexagontti"
+static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
+ cl::init(true), cl::Hidden,
+ cl::desc("Control lookup table emission on Hexagon target"));
+
TargetTransformInfo::PopcntSupportKind
HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
// Return Fast Hardware support as every input < 64 bits will be promoted
@@ -29,7 +33,7 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
}
// The Hexagon target can unroll loops with run-time trip counts.
-void HexagonTTIImpl::getUnrollingPreferences(Loop *L,
+void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
UP.Runtime = UP.Partial = true;
}
@@ -46,8 +50,9 @@ unsigned HexagonTTIImpl::getCacheLineSize() const {
return getST()->getL1CacheLineSize();
}
-int HexagonTTIImpl::getUserCost(const User *U) {
- auto isCastFoldedIntoLoad = [] (const CastInst *CI) -> bool {
+int HexagonTTIImpl::getUserCost(const User *U,
+ ArrayRef<const Value *> Operands) {
+ auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool {
if (!CI->isIntegerCast())
return false;
const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
@@ -67,5 +72,9 @@ int HexagonTTIImpl::getUserCost(const User *U) {
if (const CastInst *CI = dyn_cast<const CastInst>(U))
if (isCastFoldedIntoLoad(CI))
return TargetTransformInfo::TCC_Free;
- return BaseT::getUserCost(U);
+ return BaseT::getUserCost(U, Operands);
+}
+
+bool HexagonTTIImpl::shouldBuildLookupTables() const {
+ return EmitLookupTables;
}
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 8414bfc4e197..ab5a6e07d873 100644
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -46,7 +46,8 @@ public:
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
// The Hexagon target can unroll loops with run-time trip counts.
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
// L1 cache prefetch.
unsigned getPrefetchDistance() const;
@@ -61,7 +62,10 @@ public:
/// @}
- int getUserCost(const User *U);
+ int getUserCost(const User *U, ArrayRef<const Value *> Operands);
+
+ // Hexagon specific decision to generate a lookup table.
+ bool shouldBuildLookupTables() const;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 093ce80bc2e3..34d0b55aa22a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -199,11 +199,8 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
- /// processFixupValue - Target hook to adjust the literal value of a fixup
- /// if necessary. IsResolved signals whether the caller believes a relocation
- /// is needed; the target can modify the value. The default does nothing.
- void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) override {
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override {
MCFixupKind Kind = Fixup.getKind();
switch((unsigned)Kind) {
@@ -299,8 +296,7 @@ public:
case fixup_Hexagon_LD_PLT_B22_PCREL_X:
case fixup_Hexagon_LD_PLT_B32_PCREL_X:
// These relocations should always have a relocation recorded
- IsResolved = false;
- return;
+ return true;
case fixup_Hexagon_B22_PCREL:
//IsResolved = false;
@@ -317,7 +313,7 @@ public:
case fixup_Hexagon_B7_PCREL:
case fixup_Hexagon_B7_PCREL_X:
if (DisableFixup)
- IsResolved = false;
+ return true;
break;
case FK_Data_1:
@@ -326,8 +322,9 @@ public:
case FK_PCRel_4:
case fixup_Hexagon_32:
// Leave these relocations alone as they are used for EH.
- return;
+ return false;
}
+ return false;
}
/// getFixupKindNumBytes - The number of bytes the fixup may change.
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 9d5c179a0fd9..69b1ba1528d0 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -2789,6 +2789,7 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
bool Is32BitSym, SMLoc IDLoc,
MCStreamer &Out,
const MCSubtargetInfo *STI) {
+ // FIXME: These expansions do not respect -mxgot.
MipsTargetStreamer &TOut = getTargetStreamer();
bool UseSrcReg = SrcReg != Mips::NoRegister;
warnIfNoMacro(IDLoc);
@@ -2808,8 +2809,12 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
// symbol in the final relocation is external and not modified with a
// constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT16.
if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg &&
- Res.getConstant() == 0 && !Res.getSymA()->getSymbol().isInSection() &&
- !Res.getSymA()->getSymbol().isTemporary()) {
+ Res.getConstant() == 0 &&
+ !(Res.getSymA()->getSymbol().isInSection() ||
+ Res.getSymA()->getSymbol().isTemporary() ||
+ (Res.getSymA()->getSymbol().isELF() &&
+ cast<MCSymbolELF>(Res.getSymA()->getSymbol()).getBinding() ==
+ ELF::STB_LOCAL))) {
const MCExpr *CallExpr =
MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
TOut.emitRRX(Mips::LW, DstReg, ABI.GetGlobalPtr(),
@@ -2865,6 +2870,85 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
return false;
}
+ if (inPicMode() && ABI.ArePtrs64bit()) {
+ MCValue Res;
+ if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) {
+ Error(IDLoc, "expected relocatable expression");
+ return true;
+ }
+ if (Res.getSymB() != nullptr) {
+ Error(IDLoc, "expected relocatable expression with only one symbol");
+ return true;
+ }
+
+ // The case where the result register is $25 is somewhat special. If the
+ // symbol in the final relocation is external and not modified with a
+ // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT_DISP.
+ if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg &&
+ Res.getConstant() == 0 &&
+ !(Res.getSymA()->getSymbol().isInSection() ||
+ Res.getSymA()->getSymbol().isTemporary() ||
+ (Res.getSymA()->getSymbol().isELF() &&
+ cast<MCSymbolELF>(Res.getSymA()->getSymbol()).getBinding() ==
+ ELF::STB_LOCAL))) {
+ const MCExpr *CallExpr =
+ MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
+ TOut.emitRRX(Mips::LD, DstReg, ABI.GetGlobalPtr(),
+ MCOperand::createExpr(CallExpr), IDLoc, STI);
+ return false;
+ }
+
+ // The remaining cases are:
+ // Small offset: ld $tmp, %got_disp(symbol)($gp)
+ // >daddiu $tmp, $tmp, offset
+ // >daddu $rd, $tmp, $rs
+ // The daddiu's marked with a '>' may be omitted if they are redundant. If
+ // this happens then the last instruction must use $rd as the result
+ // register.
+ const MipsMCExpr *GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP,
+ Res.getSymA(),
+ getContext());
+ const MCExpr *LoExpr = nullptr;
+ if (Res.getConstant() != 0) {
+ // Symbols fully resolve with just the %got_disp(symbol) but we
+ // must still account for any offset to the symbol for
+ // expressions like symbol+8.
+ LoExpr = MCConstantExpr::create(Res.getConstant(), getContext());
+
+ // FIXME: Offsets greater than 16 bits are not yet implemented.
+ // FIXME: The correct range is a 32-bit sign-extended number.
+ if (Res.getConstant() < -0x8000 || Res.getConstant() > 0x7fff) {
+ Error(IDLoc, "macro instruction uses large offset, which is not "
+ "currently supported");
+ return true;
+ }
+ }
+
+ unsigned TmpReg = DstReg;
+ if (UseSrcReg &&
+ getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg,
+ SrcReg)) {
+ // If $rs is the same as $rd, we need to use AT.
+ // If it is not available we exit.
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+ TmpReg = ATReg;
+ }
+
+ TOut.emitRRX(Mips::LD, TmpReg, ABI.GetGlobalPtr(),
+ MCOperand::createExpr(GotExpr), IDLoc, STI);
+
+ if (LoExpr)
+ TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr),
+ IDLoc, STI);
+
+ if (UseSrcReg)
+ TOut.emitRRR(Mips::DADDu, DstReg, TmpReg, SrcReg, IDLoc, STI);
+
+ return false;
+ }
+
const MipsMCExpr *HiExpr =
MipsMCExpr::create(MipsMCExpr::MEK_HI, SymExpr, getContext());
const MipsMCExpr *LoExpr =
diff --git a/lib/Target/Mips/MicroMips64r6InstrInfo.td b/lib/Target/Mips/MicroMips64r6InstrInfo.td
index 6b7f39e9dd79..38b09d105ddd 100644
--- a/lib/Target/Mips/MicroMips64r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips64r6InstrInfo.td
@@ -548,3 +548,15 @@ def : MipsInstAlias<"dnegu $rt, $rs",
def : MipsInstAlias<"dnegu $rt",
(DSUBU_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 1>,
ISA_MICROMIPS64R6;
+def : MipsInstAlias<"dsll $rd, $rt, $rs",
+ (DSLLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rt,
+ GPR32Opnd:$rs), 0>, ISA_MICROMIPS64R6;
+def : MipsInstAlias<"dsrl $rd, $rt, $rs",
+ (DSRLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rt,
+ GPR32Opnd:$rs), 0>, ISA_MICROMIPS64R6;
+def : MipsInstAlias<"dsrl $rd, $rt",
+ (DSRLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rd,
+ GPR32Opnd:$rt), 0>, ISA_MICROMIPS64R6;
+def : MipsInstAlias<"dsll $rd, $rt",
+ (DSLLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rd,
+ GPR32Opnd:$rt), 0>, ISA_MICROMIPS64R6;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 99025fe1341d..3dba7ce30cad 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -748,9 +748,6 @@ let AdditionalPredicates = [NotInMicroMips] in {
defm : OneOrTwoOperandMacroImmediateAlias<"xor", XORi64, GPR64Opnd, imm64>,
GPR_64;
}
-def : MipsInstAlias<"dsll $rd, $rt, $rs",
- (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
- ISA_MIPS3;
let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"dneg $rt, $rs",
(DSUB GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>,
@@ -793,9 +790,18 @@ def : MipsInstAlias<"dsra $rd, $rt, $rs",
(DSRAV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
ISA_MIPS3;
let AdditionalPredicates = [NotInMicroMips] in {
+ def : MipsInstAlias<"dsll $rd, $rt, $rs",
+ (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
+ ISA_MIPS3;
def : MipsInstAlias<"dsrl $rd, $rt, $rs",
(DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
ISA_MIPS3;
+ def : MipsInstAlias<"dsrl $rd, $rt",
+ (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rd, GPR32Opnd:$rt), 0>,
+ ISA_MIPS3;
+ def : MipsInstAlias<"dsll $rd, $rt",
+ (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rd, GPR32Opnd:$rt), 0>,
+ ISA_MIPS3;
// Two operand (implicit 0 selector) versions:
def : MipsInstAlias<"dmtc0 $rt, $rd",
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 5d82571ff94f..4a34e3101cb8 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -564,7 +564,7 @@ Iter Filler::replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch,
// For given opcode returns opcode of corresponding instruction with short
// delay slot.
-// For the pseudo TAILCALL*_MM instrunctions return the short delay slot
+// For the pseudo TAILCALL*_MM instructions return the short delay slot
// form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range
// that is too short to make use of for tail calls.
static int getEquivalentCallShort(int Opcode) {
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 02102d6b22f4..a6ec9fb2e598 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -364,18 +364,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
- if (!(Subtarget.hasDSP() && Subtarget.hasMips32r2())) {
- setOperationAction(ISD::ADDC, MVT::i32, Expand);
- setOperationAction(ISD::ADDE, MVT::i32, Expand);
- }
-
- setOperationAction(ISD::ADDC, MVT::i64, Expand);
- setOperationAction(ISD::ADDE, MVT::i64, Expand);
- setOperationAction(ISD::SUBC, MVT::i32, Expand);
- setOperationAction(ISD::SUBE, MVT::i32, Expand);
- setOperationAction(ISD::SUBC, MVT::i64, Expand);
- setOperationAction(ISD::SUBE, MVT::i64, Expand);
-
// Operations not directly supported by Mips.
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
@@ -481,7 +469,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::AssertZext);
setTargetDAGCombine(ISD::SHL);
@@ -936,130 +923,14 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
}
}
-static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG,
- const MipsSubtarget &Subtarget) {
- // ROOTNode must have a multiplication as an operand for the match to be
- // successful.
- if (ROOTNode->getOperand(0).getOpcode() != ISD::MUL &&
- ROOTNode->getOperand(1).getOpcode() != ISD::MUL)
- return SDValue();
-
- // We don't handle vector types here.
- if (ROOTNode->getValueType(0).isVector())
- return SDValue();
-
- // For MIPS64, madd / msub instructions are inefficent to use with 64 bit
- // arithmetic. E.g.
- // (add (mul a b) c) =>
- // let res = (madd (mthi (drotr c 32))x(mtlo c) a b) in
- // MIPS64: (or (dsll (mfhi res) 32) (dsrl (dsll (mflo res) 32) 32)
- // or
- // MIPS64R2: (dins (mflo res) (mfhi res) 32 32)
- //
- // The overhead of setting up the Hi/Lo registers and reassembling the
- // result makes this a dubious optimzation for MIPS64. The core of the
- // problem is that Hi/Lo contain the upper and lower 32 bits of the
- // operand and result.
- //
- // It requires a chain of 4 add/mul for MIPS64R2 to get better code
- // density than doing it naively, 5 for MIPS64. Additionally, using
- // madd/msub on MIPS64 requires the operands actually be 32 bit sign
- // extended operands, not true 64 bit values.
- //
- // FIXME: For the moment, disable this completely for MIPS64.
- if (Subtarget.hasMips64())
- return SDValue();
-
- SDValue Mult = ROOTNode->getOperand(0).getOpcode() == ISD::MUL
- ? ROOTNode->getOperand(0)
- : ROOTNode->getOperand(1);
-
- SDValue AddOperand = ROOTNode->getOperand(0).getOpcode() == ISD::MUL
- ? ROOTNode->getOperand(1)
- : ROOTNode->getOperand(0);
-
- // Transform this to a MADD only if the user of this node is the add.
- // If there are other users of the mul, this function returns here.
- if (!Mult.hasOneUse())
- return SDValue();
-
- // maddu and madd are unusual instructions in that on MIPS64 bits 63..31
- // must be in canonical form, i.e. sign extended. For MIPS32, the operands
- // of the multiply must have 32 or more sign bits, otherwise we cannot
- // perform this optimization. We have to check this here as we're performing
- // this optimization pre-legalization.
- SDValue MultLHS = Mult->getOperand(0);
- SDValue MultRHS = Mult->getOperand(1);
- unsigned LHSSB = CurDAG.ComputeNumSignBits(MultLHS);
- unsigned RHSSB = CurDAG.ComputeNumSignBits(MultRHS);
-
- if (LHSSB < 32 || RHSSB < 32)
- return SDValue();
-
- APInt HighMask =
- APInt::getHighBitsSet(Mult->getValueType(0).getScalarSizeInBits(), 32);
- bool IsUnsigned = CurDAG.MaskedValueIsZero(Mult->getOperand(0), HighMask) &&
- CurDAG.MaskedValueIsZero(Mult->getOperand(1), HighMask) &&
- CurDAG.MaskedValueIsZero(AddOperand, HighMask);
-
- // Initialize accumulator.
- SDLoc DL(ROOTNode);
- SDValue TopHalf;
- SDValue BottomHalf;
- BottomHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand,
- CurDAG.getIntPtrConstant(0, DL));
-
- TopHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand,
- CurDAG.getIntPtrConstant(1, DL));
- SDValue ACCIn = CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
- BottomHalf,
- TopHalf);
-
- // Create MipsMAdd(u) / MipsMSub(u) node.
- bool IsAdd = ROOTNode->getOpcode() == ISD::ADD;
- unsigned Opcode = IsAdd ? (IsUnsigned ? MipsISD::MAddu : MipsISD::MAdd)
- : (IsUnsigned ? MipsISD::MSubu : MipsISD::MSub);
- SDValue MAddOps[3] = {
- CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(0)),
- CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(1)), ACCIn};
- EVT VTs[2] = {MVT::i32, MVT::i32};
- SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps);
-
- SDValue ResLo = CurDAG.getNode(MipsISD::MFLO, DL, MVT::i32, MAdd);
- SDValue ResHi = CurDAG.getNode(MipsISD::MFHI, DL, MVT::i32, MAdd);
- SDValue Combined =
- CurDAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResLo, ResHi);
- return Combined;
-}
-
-static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget &Subtarget) {
- // (sub v0 (mul v1, v2)) => (msub v1, v2, v0)
- if (DCI.isBeforeLegalizeOps()) {
- if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
- !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64)
- return performMADD_MSUBCombine(N, DAG, Subtarget);
-
- return SDValue();
- }
-
- return SDValue();
-}
-
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget &Subtarget) {
- // (add v0 (mul v1, v2)) => (madd v1, v2, v0)
- if (DCI.isBeforeLegalizeOps()) {
- if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
- !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64)
- return performMADD_MSUBCombine(N, DAG, Subtarget);
+ // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
+ if (DCI.isBeforeLegalizeOps())
return SDValue();
- }
- // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
SDValue Add = N->getOperand(1);
if (Add.getOpcode() != ISD::ADD)
@@ -1187,8 +1058,6 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return performAssertZextCombine(N, DAG, DCI, Subtarget);
case ISD::SHL:
return performSHLCombine(N, DAG, DCI, Subtarget);
- case ISD::SUB:
- return performSUBCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 4be26dd25dc0..49ae6dd4cd39 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -245,64 +245,46 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
}
}
-void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const {
- SDValue InFlag = Node->getOperand(2);
- unsigned Opc = InFlag.getOpcode();
+void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
+ SDValue CmpLHS, const SDLoc &DL,
+ SDNode *Node) const {
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu;
+ if (Subtarget->isGP64bit()) {
+ SLTuOp = Mips::SLTu64;
+ ADDuOp = Mips::DADDu;
+ }
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
- // In the base case, we can rely on the carry bit from the addsc
- // instruction.
- if (Opc == ISD::ADDC) {
- SDValue Ops[3] = {LHS, RHS, InFlag};
- CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops);
- return;
+ SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops);
+
+ if (Subtarget->isGP64bit()) {
+ // On 64-bit targets, sltu produces an i64 but our backend currently says
+ // that SLTu64 produces an i32. We need to fix this in the long run but for
+ // now, just make the DAG type-correct by asserting the upper bits are zero.
+ Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT,
+ CurDAG->getTargetConstant(0, DL, VT),
+ SDValue(Carry, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, DL,
+ VT));
}
- assert(Opc == ISD::ADDE && "ISD::ADDE not in a chain of ADDE nodes!");
-
- // The more complex case is when there is a chain of ISD::ADDE nodes like:
- // (adde (adde (adde (addc a b) c) d) e).
- //
- // The addwc instruction does not write to the carry bit, instead it writes
- // to bit 20 of the dsp control register. To match this series of nodes, each
- // intermediate adde node must be expanded to write the carry bit before the
- // addition.
-
- // Start by reading the overflow field for addsc and moving the value to the
- // carry field. The usage of 1 here with MipsISD::RDDSP / Mips::WRDSP
- // corresponds to reading/writing the entire control register to/from a GPR.
-
- SDValue CstOne = CurDAG->getTargetConstant(1, DL, MVT::i32);
-
- SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32);
-
- SDNode *DSPCtrlField =
- CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag);
-
- SDNode *Carry = CurDAG->getMachineNode(
- Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne);
+ // Generate a second addition only if we know that RHS is not a
+ // constant-zero node.
+ SDNode *AddCarry = Carry;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+ if (!C || C->getZExtValue())
+ AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS);
- SDValue Ops[4] = {SDValue(DSPCtrlField, 0),
- CurDAG->getTargetConstant(6, DL, MVT::i32), CstOne,
- SDValue(Carry, 0)};
- SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops);
-
- // My reading of the the MIPS DSP 3.01 specification isn't as clear as I
- // would like about whether bit 20 always gets overwritten by addwc.
- // Hence take an extremely conservative view and presume it's sticky. We
- // therefore need to clear it.
-
- SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32);
-
- SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)};
- SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps);
-
- SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue,
- SDValue(DSPCtrlFinal, 0), CstOne);
-
- SDValue Operands[3] = {LHS, RHS, SDValue(WrDSP, 0)};
- CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Operands);
+ CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0));
}
/// Match frameindex
@@ -783,8 +765,19 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
switch(Opcode) {
default: break;
+ case ISD::SUBE: {
+ SDValue InFlag = Node->getOperand(2);
+ unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu;
+ selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node);
+ return true;
+ }
+
case ISD::ADDE: {
- selectAddE(Node, DL);
+ if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
+ break;
+ SDValue InFlag = Node->getOperand(2);
+ unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu;
+ selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node);
return true;
}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 6f38289c5a45..f89a350cab04 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -41,7 +41,8 @@ private:
const SDLoc &dl, EVT Ty, bool HasLo,
bool HasHi);
- void selectAddE(SDNode *Node, const SDLoc &DL) const;
+ void selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS,
+ const SDLoc &DL, SDNode *Node) const;
bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const;
bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset,
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index b57bceb3c837..06a97b9d123e 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -179,6 +179,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setTargetDAGCombine(ISD::ADDE);
+ setTargetDAGCombine(ISD::SUBE);
setTargetDAGCombine(ISD::MUL);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -419,6 +421,163 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
return MipsTargetLowering::LowerOperation(Op, DAG);
}
+// selectMADD -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
+ // ADDENode's second operand must be a flag output of an ADDC node in order
+ // for the matching to be successful.
+ SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
+
+ if (ADDCNode->getOpcode() != ISD::ADDC)
+ return false;
+
+ SDValue MultHi = ADDENode->getOperand(0);
+ SDValue MultLo = ADDCNode->getOperand(0);
+ SDNode *MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than ADDENode or ADDCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MADD instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ SDLoc DL(ADDENode);
+
+ // Initialize accumulator.
+ SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
+ ADDCNode->getOperand(1),
+ ADDENode->getOperand(1));
+
+ // create MipsMAdd(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+ SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ACCIn);
+
+ // replace uses of adde and addc here
+ if (!SDValue(ADDCNode, 0).use_empty()) {
+ SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut);
+ }
+ if (!SDValue(ADDENode, 0).use_empty()) {
+ SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut);
+ }
+
+ return true;
+}
+
+// selectMSUB -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
+ // SUBENode's second operand must be a flag output of an SUBC node in order
+ // for the matching to be successful.
+ SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
+
+ if (SUBCNode->getOpcode() != ISD::SUBC)
+ return false;
+
+ SDValue MultHi = SUBENode->getOperand(1);
+ SDValue MultLo = SUBCNode->getOperand(1);
+ SDNode *MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than SUBENode or SUBCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MSUB instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ SDLoc DL(SUBENode);
+
+ // Initialize accumulator.
+ SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
+ SUBCNode->getOperand(0),
+ SUBENode->getOperand(0));
+
+ // create MipsSub(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+ SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ACCIn);
+
+ // replace uses of sube and subc here
+ if (!SDValue(SUBCNode, 0).use_empty()) {
+ SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut);
+ }
+ if (!SDValue(SUBENode, 0).use_empty()) {
+ SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut);
+ }
+
+ return true;
+}
+
+static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget &Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
+ N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
//
// Performs the following transformations:
@@ -661,6 +820,19 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget &Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 &&
+ selectMSUB(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT,
EVT ShiftTy, SelectionDAG &DAG) {
// Clear the upper (64 - VT.sizeInBits) bits.
@@ -938,12 +1110,16 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
SDValue Val;
switch (N->getOpcode()) {
+ case ISD::ADDE:
+ return performADDECombine(N, DAG, DCI, Subtarget);
case ISD::AND:
Val = performANDCombine(N, DAG, DCI, Subtarget);
break;
case ISD::OR:
Val = performORCombine(N, DAG, DCI, Subtarget);
break;
+ case ISD::SUBE:
+ return performSUBECombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
return performMULCombine(N, DAG, DCI, this);
case ISD::SHL:
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index dd7707084948..a64d95512a4a 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -141,9 +141,9 @@ int NVPTXTTIImpl::getArithmeticInstrCost(
}
}
-void NVPTXTTIImpl::getUnrollingPreferences(Loop *L,
+void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
- BaseT::getUnrollingPreferences(L, UP);
+ BaseT::getUnrollingPreferences(L, SE, UP);
// Enable partial unrolling and runtime unrolling, but reduce the
// threshold. This partially unrolls small loops which are often
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 03075b550429..f987892ba675 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -61,7 +61,8 @@ public:
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 6d7eb786a683..7393f3d7a08a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -131,10 +131,11 @@ public:
}
}
- void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) override {
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override {
switch ((PPC::Fixups)Fixup.getKind()) {
- default: break;
+ default:
+ return false;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
// If the target symbol has a local entry point we must not attempt
@@ -147,10 +148,10 @@ public:
// and thus the shift to pack it.
unsigned Other = S->getOther() << 2;
if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
- IsResolved = false;
+ return true;
}
}
- break;
+ return false;
}
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index ae43e59d3cb1..dce443997ea5 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -17,35 +17,31 @@
namespace llvm {
namespace PPC {
enum Fixups {
- // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b'
- // and 'bl'.
+ // 24-bit PC relative relocation for direct branches like 'b' and 'bl'.
fixup_ppc_br24 = FirstTargetFixupKind,
-
- /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional
- /// branches.
+
+ /// 14-bit PC relative relocation for conditional branches.
fixup_ppc_brcond14,
-
- /// fixup_ppc_br24abs - 24-bit absolute relocation for direct branches
- /// like 'ba' and 'bla'.
+
+ /// 24-bit absolute relocation for direct branches like 'ba' and 'bla'.
fixup_ppc_br24abs,
- /// fixup_ppc_brcond14abs - 14-bit absolute relocation for conditional
- /// branches.
+ /// 14-bit absolute relocation for conditional branches.
fixup_ppc_brcond14abs,
- /// fixup_ppc_half16 - A 16-bit fixup corresponding to lo16(_foo)
- /// or ha16(_foo) for instrs like 'li' or 'addis'.
+ /// A 16-bit fixup corresponding to lo16(_foo) or ha16(_foo) for instrs like
+ /// 'li' or 'addis'.
fixup_ppc_half16,
-
- /// fixup_ppc_half16ds - A 14-bit fixup corresponding to lo16(_foo) with
- /// implied 2 zero bits for instrs like 'std'.
+
+ /// A 14-bit fixup corresponding to lo16(_foo) with implied 2 zero bits for
+ /// instrs like 'std'.
fixup_ppc_half16ds,
- /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call
- /// to __tls_get_addr for the TLS general and local dynamic models,
- /// or inserts the thread-pointer register number.
+ /// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the
+ /// TLS general and local dynamic models, or inserts the thread-pointer
+ /// register number.
fixup_ppc_nofixup,
-
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 6d591ca964a6..d5506277ca88 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -219,11 +219,11 @@ bool PPCMachObjectWriter::recordScatteredRelocation(
const MCSymbol *SB = &B->getSymbol();
if (!SB->getFragment())
- report_fatal_error("symbol '" + B->getSymbol().getName() +
+ report_fatal_error("symbol '" + SB->getName() +
"' can not be undefined in a subtraction expression");
// FIXME: is Type correct? see include/llvm/BinaryFormat/MachO.h
- Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
+ Value2 = Writer->getSymbolAddress(*SB, Layout);
FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
}
// FIXME: does FixedValue get used??
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 07c9c1f9f84c..ad92ac8ce120 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPC_H
#define LLVM_LIB_TARGET_POWERPC_PPC_H
+#include "llvm/Support/CodeGen.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
// GCC #defines PPC on Linux but we use it as our namespace name
@@ -28,7 +29,7 @@ namespace llvm {
class AsmPrinter;
class MCInst;
- FunctionPass *createPPCCTRLoops(PPCTargetMachine &TM);
+ FunctionPass *createPPCCTRLoops();
#ifndef NDEBUG
FunctionPass *createPPCCTRLoopsVerify();
#endif
@@ -41,7 +42,7 @@ namespace llvm {
FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCQPXLoadSplatPass();
- FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+ FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL);
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
FunctionPass *createPPCExpandISELPass();
@@ -51,6 +52,7 @@ namespace llvm {
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
void initializePPCExpandISELPass(PassRegistry &);
+ void initializePPCTLSDynamicCallPass(PassRegistry &);
extern char &PPCVSXFMAMutateID;
namespace PPCII {
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 24bc027f8106..094d3e6a61b5 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -24,12 +24,14 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
+#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
@@ -81,10 +83,7 @@ namespace {
public:
static char ID;
- PPCCTRLoops() : FunctionPass(ID), TM(nullptr) {
- initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
- }
- PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
+ PPCCTRLoops() : FunctionPass(ID) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
@@ -99,16 +98,18 @@ namespace {
}
private:
- bool mightUseCTR(const Triple &TT, BasicBlock *BB);
+ bool mightUseCTR(BasicBlock *BB);
bool convertToCTRLoop(Loop *L);
private:
- PPCTargetMachine *TM;
+ const PPCTargetMachine *TM;
+ const PPCSubtarget *STI;
+ const PPCTargetLowering *TLI;
+ const DataLayout *DL;
+ const TargetLibraryInfo *LibInfo;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *DL;
DominatorTree *DT;
- const TargetLibraryInfo *LibInfo;
bool PreserveLCSSA;
};
@@ -149,9 +150,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
-FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) {
- return new PPCCTRLoops(TM);
-}
+FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
#ifndef NDEBUG
INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
@@ -169,6 +168,14 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ TM = &TPC->getTM<PPCTargetMachine>();
+ STI = TM->getSubtargetImpl(F);
+ TLI = STI->getTargetLowering();
+
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -198,8 +205,7 @@ static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
// Determining the address of a TLS variable results in a function call in
// certain TLS models.
-static bool memAddrUsesCTR(const PPCTargetMachine *TM,
- const Value *MemAddr) {
+static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
const auto *GV = dyn_cast<GlobalValue>(MemAddr);
if (!GV) {
// Recurse to check for constants that refer to TLS global variables.
@@ -213,35 +219,35 @@ static bool memAddrUsesCTR(const PPCTargetMachine *TM,
if (!GV->isThreadLocal())
return false;
- if (!TM)
- return true;
- TLSModel::Model Model = TM->getTLSModel(GV);
+ TLSModel::Model Model = TM.getTLSModel(GV);
return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
}
-bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
+// Loop through the inline asm constraints and look for something that clobbers
+// ctr.
+static bool asmClobbersCTR(InlineAsm *IA) {
+ InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
+ for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
+ InlineAsm::ConstraintInfo &C = CIV[i];
+ if (C.Type != InlineAsm::isInput)
+ for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
+ if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
+ return true;
+ }
+ return false;
+}
+
+bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
J != JE; ++J) {
if (CallInst *CI = dyn_cast<CallInst>(J)) {
+ // Inline ASM is okay, unless it clobbers the ctr register.
if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
- // Inline ASM is okay, unless it clobbers the ctr register.
- InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
- for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
- InlineAsm::ConstraintInfo &C = CIV[i];
- if (C.Type != InlineAsm::isInput)
- for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
- if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
- return true;
- }
-
+ if (asmClobbersCTR(IA))
+ return true;
continue;
}
- if (!TM)
- return true;
- const TargetLowering *TLI =
- TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
-
if (Function *F = CI->getCalledFunction()) {
// Most intrinsics don't become function calls, but some might.
// sin, cos, exp and log are always calls.
@@ -380,9 +386,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
}
if (Opcode) {
- auto &DL = CI->getModule()->getDataLayout();
- MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(),
- true);
+ MVT VTy = TLI->getSimpleValueType(
+ *DL, CI->getArgOperand(0)->getType(), true);
if (VTy == MVT::Other)
return true;
@@ -406,17 +411,17 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
CastInst *CI = cast<CastInst>(J);
if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
- isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
- isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
+ isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
+ isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
return true;
- } else if (isLargeIntegerTy(TT.isArch32Bit(),
+ } else if (isLargeIntegerTy(!TM->isPPC64(),
J->getType()->getScalarType()) &&
(J->getOpcode() == Instruction::UDiv ||
J->getOpcode() == Instruction::SDiv ||
J->getOpcode() == Instruction::URem ||
J->getOpcode() == Instruction::SRem)) {
return true;
- } else if (TT.isArch32Bit() &&
+ } else if (!TM->isPPC64() &&
isLargeIntegerTy(false, J->getType()->getScalarType()) &&
(J->getOpcode() == Instruction::Shl ||
J->getOpcode() == Instruction::AShr ||
@@ -428,16 +433,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
// On PowerPC, indirect jumps use the counter register.
return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
- if (!TM)
- return true;
- const TargetLowering *TLI =
- TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
-
if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
return true;
}
- if (TM->getSubtargetImpl(*BB->getParent())->getTargetLowering()->useSoftFloat()) {
+ if (STI->useSoftFloat()) {
switch(J->getOpcode()) {
case Instruction::FAdd:
case Instruction::FSub:
@@ -456,7 +456,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
}
for (Value *Operand : J->operands())
- if (memAddrUsesCTR(TM, Operand))
+ if (memAddrUsesCTR(*TM, Operand))
return true;
}
@@ -466,11 +466,6 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
bool MadeChange = false;
- const Triple TT =
- Triple(L->getHeader()->getParent()->getParent()->getTargetTriple());
- if (!TT.isArch32Bit() && !TT.isArch64Bit())
- return MadeChange; // Unknown arch. type.
-
// Process nested loops first.
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
MadeChange |= convertToCTRLoop(*I);
@@ -495,7 +490,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// want to use the counter register if the loop contains calls.
for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
I != IE; ++I)
- if (mightUseCTR(TT, *I))
+ if (mightUseCTR(*I))
return MadeChange;
SmallVector<BasicBlock*, 4> ExitingBlocks;
@@ -517,7 +512,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
} else if (!SE->isLoopInvariant(EC, L))
continue;
- if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
+ if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
continue;
// We now have a loop-invariant count of loop iterations (which is not the
@@ -571,7 +566,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// preheader, then we can use it (except if the preheader contains a use of
// the CTR register because some such uses might be reordered by the
// selection DAG after the mtctr instruction).
- if (!Preheader || mightUseCTR(TT, Preheader))
+ if (!Preheader || mightUseCTR(Preheader))
Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
if (!Preheader)
return MadeChange;
@@ -582,10 +577,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// selected branch.
MadeChange = true;
- SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt");
+ SCEVExpander SCEVE(*SE, *DL, "loopcnt");
LLVMContext &C = SE->getContext();
- Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
- Type::getInt32Ty(C);
+ Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
if (!ExitCount->getType()->isPointerTy() &&
ExitCount->getType() != CountType)
ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index afd2e87078a9..535b9deaefac 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -114,8 +114,8 @@ namespace {
unsigned GlobalBaseReg;
public:
- explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
- : SelectionDAGISel(tm), TM(tm) {}
+ explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel), TM(tm) {}
bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
@@ -5116,6 +5116,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
/// createPPCISelDag - This pass converts a legalized DAG into a
/// PowerPC-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
- return new PPCDAGToDAGISel(TM);
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PPCDAGToDAGISel(TM, OptLevel);
}
diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 31c50785c2ee..5f8085f4626e 100644
--- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -52,6 +52,7 @@ namespace {
protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
+ bool NeedFence = true;
bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
@@ -62,6 +63,16 @@ protected:
MI.getOpcode() != PPC::ADDItlsldLADDR &&
MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
MI.getOpcode() != PPC::ADDItlsldLADDR32) {
+
+ // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
+ // as scheduling fences, we skip creating fences if we already
+ // have existing ADJCALLSTACKDOWN/UP to avoid nesting,
+ // which causes verification error with -verify-machineinstrs.
+ if (MI.getOpcode() == PPC::ADJCALLSTACKDOWN)
+ NeedFence = false;
+ else if (MI.getOpcode() == PPC::ADJCALLSTACKUP)
+ NeedFence = true;
+
++I;
continue;
}
@@ -96,11 +107,15 @@ protected:
break;
}
- // Don't really need to save data to the stack - the clobbered
+ // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr
+ // as schduling fence to avoid it is scheduled before
+ // mflr in the prologue and the address in LR is clobbered (PR25839).
+ // We don't really need to save data to the stack - the clobbered
// registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr)
// gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR).
- BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
- .addImm(0);
+ if (NeedFence)
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
+ .addImm(0);
// Expand into two ops built prior to the existing instruction.
MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
@@ -116,7 +131,8 @@ protected:
.addReg(GPR3));
Call->addOperand(MI.getOperand(3));
- BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
+ if (NeedFence)
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
.addReg(GPR3);
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index a88a6541e8d0..fe092cc3b858 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -93,6 +93,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializePPCBoolRetToIntPass(PR);
initializePPCExpandISELPass(PR);
+ initializePPCTLSDynamicCallPass(PR);
}
/// Return the datalayout string of a subtarget.
@@ -336,7 +337,7 @@ bool PPCPassConfig::addPreISel() {
addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
- addPass(createPPCCTRLoops(getPPCTargetMachine()));
+ addPass(createPPCCTRLoops());
return false;
}
@@ -352,7 +353,7 @@ bool PPCPassConfig::addILPOpts() {
bool PPCPassConfig::addInstSelector() {
// Install an instruction selector.
- addPass(createPPCISelDag(getPPCTargetMachine()));
+ addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel()));
#ifndef NDEBUG
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 5eb6ba785d1b..2dc3828334ac 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -41,6 +41,7 @@ public:
~PPCTargetMachine() override;
const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
+ const PPCSubtarget *getSubtargetImpl() const = delete;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 3dbd5f5b9a92..6110706b01b9 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -189,7 +189,7 @@ int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
return PPCTTIImpl::getIntImmCost(Imm, Ty);
}
-void PPCTTIImpl::getUnrollingPreferences(Loop *L,
+void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
if (ST->getDarwinDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
@@ -201,7 +201,7 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
UP.AllowExpensiveTripCount = true;
}
- BaseT::getUnrollingPreferences(L, UP);
+ BaseT::getUnrollingPreferences(L, SE, UP);
}
bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 758c335def08..99ca6394d1be 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -52,7 +52,8 @@ public:
Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
/// @}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index c72b47b09085..d4454c271f5a 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -203,13 +203,14 @@ namespace {
return InfosBE[Kind - FirstTargetFixupKind];
}
- void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) override {
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override {
switch ((Sparc::Fixups)Fixup.getKind()) {
- default: break;
+ default:
+ return false;
case Sparc::fixup_sparc_wplt30:
if (Target.getSymA()->getSymbol().isTemporary())
- return;
+ return false;
case Sparc::fixup_sparc_tls_gd_hi22:
case Sparc::fixup_sparc_tls_gd_lo10:
case Sparc::fixup_sparc_tls_gd_add:
@@ -227,7 +228,8 @@ namespace {
case Sparc::fixup_sparc_tls_ie_ldx:
case Sparc::fixup_sparc_tls_ie_add:
case Sparc::fixup_sparc_tls_le_hix22:
- case Sparc::fixup_sparc_tls_le_lox10: IsResolved = false; break;
+ case Sparc::fixup_sparc_tls_le_lox10:
+ return true;
}
}
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index ad05779a9f64..ee23692ad1db 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -61,6 +61,7 @@ enum RegisterKind {
VR64Reg,
VR128Reg,
AR32Reg,
+ CR64Reg,
};
enum MemoryKind {
@@ -343,6 +344,7 @@ public:
bool isVF128() const { return false; }
bool isVR128() const { return isReg(VR128Reg); }
bool isAR32() const { return isReg(AR32Reg); }
+ bool isCR64() const { return isReg(CR64Reg); }
bool isAnyReg() const { return (isReg() || isImm(0, 15)); }
bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); }
bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); }
@@ -379,7 +381,8 @@ private:
RegGR,
RegFP,
RegV,
- RegAR
+ RegAR,
+ RegCR
};
struct Register {
RegisterGroup Group;
@@ -487,6 +490,9 @@ public:
OperandMatchResultTy parseAR32(OperandVector &Operands) {
return parseRegister(Operands, RegAR, SystemZMC::AR32Regs, AR32Reg);
}
+ OperandMatchResultTy parseCR64(OperandVector &Operands) {
+ return parseRegister(Operands, RegCR, SystemZMC::CR64Regs, CR64Reg);
+ }
OperandMatchResultTy parseAnyReg(OperandVector &Operands) {
return parseAnyRegister(Operands);
}
@@ -648,6 +654,8 @@ bool SystemZAsmParser::parseRegister(Register &Reg) {
Reg.Group = RegV;
else if (Prefix == 'a' && Reg.Num < 16)
Reg.Group = RegAR;
+ else if (Prefix == 'c' && Reg.Num < 16)
+ Reg.Group = RegCR;
else
return Error(Reg.StartLoc, "invalid register");
@@ -741,6 +749,10 @@ SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {
Kind = AR32Reg;
RegNo = SystemZMC::AR32Regs[Reg.Num];
}
+ else if (Reg.Group == RegCR) {
+ Kind = CR64Reg;
+ RegNo = SystemZMC::CR64Regs[Reg.Num];
+ }
else {
return MatchOperand_ParseFail;
}
@@ -1056,6 +1068,8 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
RegNo = SystemZMC::VR128Regs[Reg.Num];
else if (Reg.Group == RegAR)
RegNo = SystemZMC::AR32Regs[Reg.Num];
+ else if (Reg.Group == RegCR)
+ RegNo = SystemZMC::CR64Regs[Reg.Num];
StartLoc = Reg.StartLoc;
EndLoc = Reg.EndLoc;
return false;
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 27fd70bc6092..8903b57ffd0b 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -162,6 +162,12 @@ static DecodeStatus DecodeAR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, SystemZMC::AR32Regs, 16);
}
+static DecodeStatus DecodeCR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::CR64Regs, 16);
+}
+
template<unsigned N>
static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) {
if (!isUInt<N>(Imm))
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index dfea7e33fa15..727ab921daf9 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -116,6 +116,13 @@ const unsigned SystemZMC::AR32Regs[16] = {
SystemZ::A12, SystemZ::A13, SystemZ::A14, SystemZ::A15
};
+const unsigned SystemZMC::CR64Regs[16] = {
+ SystemZ::C0, SystemZ::C1, SystemZ::C2, SystemZ::C3,
+ SystemZ::C4, SystemZ::C5, SystemZ::C6, SystemZ::C7,
+ SystemZ::C8, SystemZ::C9, SystemZ::C10, SystemZ::C11,
+ SystemZ::C12, SystemZ::C13, SystemZ::C14, SystemZ::C15
+};
+
unsigned SystemZMC::getFirstReg(unsigned Reg) {
static unsigned Map[SystemZ::NUM_TARGET_REGS];
static bool Initialized = false;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index d9926c7e4986..dbca3485290a 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -55,6 +55,7 @@ extern const unsigned VR32Regs[32];
extern const unsigned VR64Regs[32];
extern const unsigned VR128Regs[32];
extern const unsigned AR32Regs[16];
+extern const unsigned CR64Regs[16];
// Return the 0-based number of the first architectural register that
// contains the given LLVM register. E.g. R1D -> 1.
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
index 74cf653b9d95..9b714157550d 100644
--- a/lib/Target/SystemZ/README.txt
+++ b/lib/Target/SystemZ/README.txt
@@ -67,6 +67,11 @@ We don't use ICM, STCM, or CLM.
--
+We don't use ADD (LOGICAL) HIGH, SUBTRACT (LOGICAL) HIGH,
+or COMPARE (LOGICAL) HIGH yet.
+
+--
+
DAGCombiner doesn't yet fold truncations of extended loads. Functions like:
unsigned long f (unsigned long x, unsigned short *y)
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td
index c5f324418da5..41300a1b6295 100644
--- a/lib/Target/SystemZ/SystemZ.td
+++ b/lib/Target/SystemZ/SystemZ.td
@@ -56,6 +56,7 @@ include "SystemZInstrVector.td"
include "SystemZInstrFP.td"
include "SystemZInstrHFP.td"
include "SystemZInstrDFP.td"
+include "SystemZInstrSystem.td"
def SystemZInstrInfo : InstrInfo {}
diff --git a/lib/Target/SystemZ/SystemZFeatures.td b/lib/Target/SystemZ/SystemZFeatures.td
index ffb0b8d1c861..c5faa0d62881 100644
--- a/lib/Target/SystemZ/SystemZFeatures.td
+++ b/lib/Target/SystemZ/SystemZFeatures.td
@@ -68,11 +68,21 @@ def FeaturePopulationCount : SystemZFeature<
"Assume that the population-count facility is installed"
>;
+def FeatureMessageSecurityAssist3 : SystemZFeature<
+ "message-security-assist-extension3", "MessageSecurityAssist3",
+ "Assume that the message-security-assist extension facility 3 is installed"
+>;
+
def FeatureMessageSecurityAssist4 : SystemZFeature<
"message-security-assist-extension4", "MessageSecurityAssist4",
"Assume that the message-security-assist extension facility 4 is installed"
>;
+def FeatureResetReferenceBitsMultiple : SystemZFeature<
+ "reset-reference-bits-multiple", "ResetReferenceBitsMultiple",
+ "Assume that the reset-reference-bits-multiple facility is installed"
+>;
+
def Arch9NewFeatures : SystemZFeatureList<[
FeatureDistinctOps,
FeatureFastSerialization,
@@ -81,7 +91,9 @@ def Arch9NewFeatures : SystemZFeatureList<[
FeatureInterlockedAccess1,
FeatureLoadStoreOnCond,
FeaturePopulationCount,
- FeatureMessageSecurityAssist4
+ FeatureMessageSecurityAssist3,
+ FeatureMessageSecurityAssist4,
+ FeatureResetReferenceBitsMultiple
]>;
//===----------------------------------------------------------------------===//
@@ -120,13 +132,19 @@ def FeatureDFPZonedConversion : SystemZFeature<
"Assume that the DFP zoned-conversion facility is installed"
>;
+def FeatureEnhancedDAT2 : SystemZFeature<
+ "enhanced-dat-2", "EnhancedDAT2",
+ "Assume that the enhanced-DAT facility 2 is installed"
+>;
+
def Arch10NewFeatures : SystemZFeatureList<[
FeatureExecutionHint,
FeatureLoadAndTrap,
FeatureMiscellaneousExtensions,
FeatureProcessorAssist,
FeatureTransactionalExecution,
- FeatureDFPZonedConversion
+ FeatureDFPZonedConversion,
+ FeatureEnhancedDAT2
]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 5f6115ed86a4..7620e06ccbc9 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2468,6 +2468,14 @@ class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let OpType = "reg";
}
+class UnaryTiedRRE<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRRE<opcode, (outs cls:$R1), (ins cls:$R1src),
+ mnemonic#"\t$R1", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let R2 = 0;
+}
+
class UnaryMemRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src),
@@ -2702,6 +2710,26 @@ class SideEffectBinaryRILPC<string mnemonic, bits<12> opcode,
let AddedComplexity = 7;
}
+class SideEffectBinaryRRE<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []>;
+
+class SideEffectBinaryRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []> {
+ let R3 = 0;
+ let M4 = 0;
+}
+
+class SideEffectBinaryRRFc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []> {
+ let M3 = 0;
+}
+
class SideEffectBinaryIE<string mnemonic, bits<16> opcode,
Immediate imm1, Immediate imm2>
: InstIE<opcode, (outs), (ins imm1:$I1, imm2:$I2),
@@ -2729,6 +2757,10 @@ class SideEffectBinarySSf<string mnemonic, bits<8> opcode>
: InstSSf<opcode, (outs), (ins bdaddr12only:$BD1, bdladdr12onlylen8:$BDL2),
mnemonic##"\t$BD1, $BDL2", []>;
+class SideEffectBinarySSE<string mnemonic, bits<16> opcode>
+ : InstSSE<opcode, (outs), (ins bdaddr12only:$BD1, bdaddr12only:$BD2),
+ mnemonic#"\t$BD1, $BD2", []>;
+
class SideEffectBinaryMemMemRR<string mnemonic, bits<8> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRR<opcode, (outs cls1:$R1, cls2:$R2), (ins cls1:$R1src, cls2:$R2src),
@@ -3612,6 +3644,22 @@ class SideEffectTernarySSc<string mnemonic, bits<8> opcode>
shift12only:$BD2, imm32zx4:$I3),
mnemonic##"\t$BDL1, $BD2, $I3", []>;
+class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3),
+ mnemonic#"\t$R1, $R2, $R3", []> {
+ let M4 = 0;
+}
+
+class SideEffectTernaryRRFb<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFb<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3),
+ mnemonic#"\t$R1, $R3, $R2", []> {
+ let M4 = 0;
+}
+
class SideEffectTernaryMemMemMemRRFb<string mnemonic, bits<16> opcode,
RegisterOperand cls1,
RegisterOperand cls2,
@@ -3630,6 +3678,13 @@ class SideEffectTernaryRRFc<string mnemonic, bits<16> opcode,
: InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2, imm:$M3),
mnemonic#"\t$R1, $R2, $M3", []>;
+multiclass SideEffectTernaryRRFcOpt<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1,
+ RegisterOperand cls2> {
+ def "" : SideEffectTernaryRRFc<mnemonic, opcode, cls1, cls2, imm32zx4>;
+ def Opt : SideEffectBinaryRRFc<mnemonic, opcode, cls1, cls2>;
+}
+
class SideEffectTernaryMemMemRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
Immediate imm>
@@ -3720,6 +3775,18 @@ multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
}
}
+class SideEffectTernaryRS<string mnemonic, bits<8> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRSa<opcode, (outs),
+ (ins cls1:$R1, cls2:$R3, bdaddr12only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []>;
+
+class SideEffectTernaryRSY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRSYa<opcode, (outs),
+ (ins cls1:$R1, cls2:$R3, bdaddr20only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2", []>;
+
class SideEffectTernaryMemMemRS<string mnemonic, bits<8> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRSa<opcode, (outs cls1:$R1, cls2:$R3),
@@ -3997,6 +4064,35 @@ multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> {
VR128:$V4, imm32zx4:$M5, 0)>;
}
+class SideEffectQuaternaryRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R2, $R3, $M4", []>;
+
+multiclass SideEffectQuaternaryRRFaOptOpt<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1,
+ RegisterOperand cls2,
+ RegisterOperand cls3> {
+ def "" : SideEffectQuaternaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
+ def Opt : SideEffectTernaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
+ def OptOpt : SideEffectBinaryRRFa<mnemonic, opcode, cls1, cls2>;
+}
+
+class SideEffectQuaternaryRRFb<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFb<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R3, $R2, $M4", []>;
+
+multiclass SideEffectQuaternaryRRFbOpt<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1,
+ RegisterOperand cls2,
+ RegisterOperand cls3> {
+ def "" : SideEffectQuaternaryRRFb<mnemonic, opcode, cls1, cls2, cls3>;
+ def Opt : SideEffectTernaryRRFb<mnemonic, opcode, cls1, cls2, cls3>;
+}
+
class SideEffectQuaternarySSe<string mnemonic, bits<8> opcode,
RegisterOperand cls>
: InstSSe<opcode, (outs),
@@ -4012,6 +4108,16 @@ class LoadAndOpRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let mayStore = 1;
}
+class CmpSwapRRE<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+ mnemonic#"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls, AddressingMode mode = bdaddr12only>
: InstRSa<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 9f5e6288348e..98f66c29ae64 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -883,6 +883,12 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
}
def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>;
+ // Addition to a high register.
+ def AHHHR : BinaryRRFa<"ahhhr", 0xB9C8, null_frag, GRH32, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def AHHLR : BinaryRRFa<"ahhlr", 0xB9D8, null_frag, GRH32, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
// Addition of signed 16-bit immediates.
defm AHIMux : BinaryRIAndKPseudo<"ahimux", add, GRX32, imm32sx16>;
defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, add, GR32, imm32sx16>;
@@ -917,6 +923,12 @@ let Defs = [CC] in {
}
def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>;
+ // Addition to a high register.
+ def ALHHHR : BinaryRRFa<"alhhhr", 0xB9CA, null_frag, GRH32, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def ALHHLR : BinaryRRFa<"alhhlr", 0xB9DA, null_frag, GRH32, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
// Addition of signed 16-bit immediates.
def ALHSIK : BinaryRIE<"alhsik", 0xECDA, addc, GR32, imm32sx16>,
Requires<[FeatureDistinctOps]>;
@@ -927,6 +939,10 @@ let Defs = [CC] in {
def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>;
def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>;
+ // Addition of signed 32-bit immediates.
+ def ALSIH : BinaryRIL<"alsih", 0xCCA, null_frag, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
+
// Addition of memory.
defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>;
def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>;
@@ -949,6 +965,10 @@ let Defs = [CC], Uses = [CC] in {
def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load, 8>;
}
+// Addition that does not modify the condition code.
+def ALSIHN : BinaryRIL<"alsihn", 0xCCB, null_frag, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
+
//===----------------------------------------------------------------------===//
// Subtraction
//===----------------------------------------------------------------------===//
@@ -961,6 +981,12 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>;
defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, sub, GR64, GR64>;
+ // Subtraction from a high register.
+ def SHHHR : BinaryRRFa<"shhhr", 0xB9C9, null_frag, GRH32, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def SHHLR : BinaryRRFa<"shhlr", 0xB9D9, null_frag, GRH32, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
// Subtraction of memory.
defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>;
defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>;
@@ -976,6 +1002,12 @@ let Defs = [CC] in {
def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>;
defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, subc, GR64, GR64>;
+ // Subtraction from a high register.
+ def SLHHHR : BinaryRRFa<"slhhhr", 0xB9CB, null_frag, GRH32, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def SLHHLR : BinaryRRFa<"slhhlr", 0xB9DB, null_frag, GRH32, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
// Subtraction of unsigned 32-bit immediates. These don't match
// subc because we prefer addc for constants.
def SLFI : BinaryRIL<"slfi", 0xC25, null_frag, GR32, uimm32>;
@@ -1298,6 +1330,12 @@ let Defs = [CC], CCValues = 0xE in {
def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>;
def CGR : CompareRRE<"cgr", 0xB920, z_scmp, GR64, GR64>;
+ // Comparison with a high register.
+ def CHHR : CompareRRE<"chhr", 0xB9CD, null_frag, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def CHLR : CompareRRE<"chlr", 0xB9DD, null_frag, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
// Comparison with a signed 16-bit immediate. CHIMux expands to CHI or CIH,
// depending on the choice of register.
def CHIMux : CompareRIPseudo<z_scmp, GRX32, imm32sx16>,
@@ -1344,6 +1382,12 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>;
def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>;
+ // Comparison with a high register.
+ def CLHHR : CompareRRE<"clhhr", 0xB9CF, null_frag, GRH32, GRH32>,
+ Requires<[FeatureHighWord]>;
+ def CLHLR : CompareRRE<"clhlr", 0xB9DF, null_frag, GRH32, GR32>,
+ Requires<[FeatureHighWord]>;
+
// Comparison with an unsigned 32-bit immediate. CLFIMux expands to CLFI
// or CLIH, depending on the choice of register.
def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>,
@@ -1888,54 +1932,12 @@ let mayLoad = 1, Defs = [CC] in
let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in
def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>;
-// Supervisor call.
-let hasSideEffects = 1, isCall = 1, Defs = [CC] in
- def SVC : SideEffectUnaryI<"svc", 0x0A, imm32zx8>;
-
-// Monitor call.
-let hasSideEffects = 1, isCall = 1 in
- def MC : SideEffectBinarySI<"mc", 0xAF, imm32zx8>;
-
-// Store clock.
-let hasSideEffects = 1, Defs = [CC] in {
- def STCK : StoreInherentS<"stck", 0xB205, null_frag, 8>;
- def STCKF : StoreInherentS<"stckf", 0xB27C, null_frag, 8>;
- def STCKE : StoreInherentS<"stcke", 0xB278, null_frag, 16>;
-}
-
-// Store facility list.
-let hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in
- def STFLE : StoreInherentS<"stfle", 0xB2B0, null_frag, 0>;
-
-// Extract CPU attribute.
-let hasSideEffects = 1 in
- def ECAG : BinaryRSY<"ecag", 0xEB4C, null_frag, GR64>;
-
-// Extract CPU time.
-let Defs = [R0D, R1D], hasSideEffects = 1, mayLoad = 1 in
- def ECTG : SideEffectTernarySSF<"ectg", 0xC81, GR64>;
-
-// Extract PSW.
-let hasSideEffects = 1, Uses = [CC] in
- def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>;
-
// Execute.
let hasSideEffects = 1 in {
def EX : SideEffectBinaryRX<"ex", 0x44, GR64>;
def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, GR64>;
}
-// Program return.
-let hasSideEffects = 1, Defs = [CC] in
- def PR : SideEffectInherentE<"pr", 0x0101>;
-
-// Move with key.
-let mayLoad = 1, mayStore = 1, Defs = [CC] in
- def MVCK : MemoryBinarySSd<"mvck", 0xD9, GR64>;
-
-// Store real address.
-def STRAG : StoreSSE<"strag", 0xE502>;
-
//===----------------------------------------------------------------------===//
// .insn directive instructions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrSystem.td b/lib/Target/SystemZ/SystemZInstrSystem.td
new file mode 100644
index 000000000000..a9803c2d83e9
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrSystem.td
@@ -0,0 +1,517 @@
+//==- SystemZInstrSystem.td - SystemZ system instructions -*- tblgen-*-----==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The instructions in this file implement SystemZ system-level instructions.
+// Most of these instructions are privileged or semi-privileged. They are
+// not used for code generation, but are provided for use with the assembler
+// and disassembler only.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Program-Status Word Instructions.
+//===----------------------------------------------------------------------===//
+
+// Extract PSW.
+let hasSideEffects = 1, Uses = [CC] in
+ def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>;
+
+// Load PSW (extended).
+let hasSideEffects = 1, Defs = [CC], mayLoad = 1 in {
+ def LPSW : SideEffectUnaryS<"lpsw", 0x8200, null_frag, 8>;
+ def LPSWE : SideEffectUnaryS<"lpswe", 0xB2B2, null_frag, 16>;
+}
+
+// Insert PSW key.
+let Uses = [R2L], Defs = [R2L] in
+ def IPK : SideEffectInherentS<"ipk", 0xB20B, null_frag>;
+
+// Set PSW key from address.
+let hasSideEffects = 1 in
+ def SPKA : SideEffectAddressS<"spka", 0xB20A, null_frag>;
+
+// Set system mask.
+let hasSideEffects = 1, mayLoad = 1 in
+ def SSM : SideEffectUnaryS<"ssm", 0x8000, null_frag, 1>;
+
+// Store then AND/OR system mask.
+let hasSideEffects = 1 in {
+ def STNSM : StoreSI<"stnsm", 0xAC, null_frag, imm32zx8>;
+ def STOSM : StoreSI<"stosm", 0xAD, null_frag, imm32zx8>;
+}
+
+// Insert address space control.
+let hasSideEffects = 1 in
+ def IAC : InherentRRE<"iac", 0xB224, GR32, null_frag>;
+
+// Set address space control (fast).
+let hasSideEffects = 1 in {
+ def SAC : SideEffectAddressS<"sac", 0xB219, null_frag>;
+ def SACF : SideEffectAddressS<"sacf", 0xB279, null_frag>;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Register Instructions.
+//===----------------------------------------------------------------------===//
+
+// Load control.
+def LCTL : LoadMultipleRS<"lctl", 0xB7, CR64>;
+def LCTLG : LoadMultipleRSY<"lctlg", 0xEB2F, CR64>;
+
+// Store control.
+def STCTL : StoreMultipleRS<"stctl", 0xB6, CR64>;
+def STCTG : StoreMultipleRSY<"stctg", 0xEB25, CR64>;
+
+// Extract primary ASN (and instance).
+let hasSideEffects = 1 in {
+ def EPAR : InherentRRE<"epar", 0xB226, GR32, null_frag>;
+ def EPAIR : InherentRRE<"epair", 0xB99A, GR64, null_frag>;
+}
+
+// Extract secondary ASN (and instance).
+let hasSideEffects = 1 in {
+ def ESAR : InherentRRE<"esar", 0xB227, GR32, null_frag>;
+ def ESAIR : InherentRRE<"esair", 0xB99B, GR64, null_frag>;
+}
+
+// Set secondary ASN (and instance).
+let hasSideEffects = 1 in {
+ def SSAR : SideEffectUnaryRRE<"ssar", 0xB225, GR32, null_frag>;
+ def SSAIR : SideEffectUnaryRRE<"ssair", 0xB99F, GR64, null_frag>;
+}
+
+// Extract and set extended authority.
+let hasSideEffects = 1 in
+ def ESEA : UnaryTiedRRE<"esea", 0xB99D, GR32>;
+
+//===----------------------------------------------------------------------===//
+// Prefix-Register Instructions.
+//===----------------------------------------------------------------------===//
+
+// Set prefix.
+let hasSideEffects = 1 in
+ def SPX : SideEffectUnaryS<"spx", 0xB210, null_frag, 4>;
+
+// Store prefix.
+let hasSideEffects = 1 in
+ def STPX : StoreInherentS<"stpx", 0xB211, null_frag, 4>;
+
+//===----------------------------------------------------------------------===//
+// Storage-Key and Real Memory Instructions.
+//===----------------------------------------------------------------------===//
+
+// Insert storage key extended.
+let hasSideEffects = 1 in
+ def ISKE : BinaryRRE<"iske", 0xB229, null_frag, GR32, GR64>;
+
+// Insert virtual storage key.
+let hasSideEffects = 1 in
+ def IVSK : BinaryRRE<"ivsk", 0xB223, null_frag, GR32, GR64>;
+
+// Set storage key extended.
+let hasSideEffects = 1, Defs = [CC] in
+ defm SSKE : SideEffectTernaryRRFcOpt<"sske", 0xB22B, GR32, GR64>;
+
+// Reset reference bit extended.
+let hasSideEffects = 1, Defs = [CC] in
+ def RRBE : SideEffectBinaryRRE<"rrbe", 0xB22A, GR32, GR64>;
+
+// Reset reference bits multiple.
+let Predicates = [FeatureResetReferenceBitsMultiple], hasSideEffects = 1 in
+ def RRBM : UnaryRRE<"rrbm", 0xB9AE, null_frag, GR64, GR64>;
+
+// Perform frame management function.
+let hasSideEffects = 1 in
+ def PFMF : SideEffectBinaryMemRRE<"pfmf", 0xB9AF, GR32, GR64>;
+
+// Test block.
+let hasSideEffects = 1, mayStore = 1, Uses = [R0D], Defs = [R0D, CC] in
+ def TB : SideEffectBinaryRRE<"tb", 0xB22C, GR64, GR64>;
+
+// Page in / out.
+let mayLoad = 1, mayStore = 1, Defs = [CC] in {
+ def PGIN : SideEffectBinaryRRE<"pgin", 0xB22E, GR64, GR64>;
+ def PGOUT : SideEffectBinaryRRE<"pgout", 0xB22F, GR64, GR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Dynamic-Address-Translation Instructions.
+//===----------------------------------------------------------------------===//
+
+// Invalidate page table entry.
+let hasSideEffects = 1 in
+ defm IPTE : SideEffectQuaternaryRRFaOptOpt<"ipte", 0xB221, GR64, GR32, GR32>;
+
+// Invalidate DAT table entry.
+let hasSideEffects = 1 in
+ defm IDTE : SideEffectQuaternaryRRFbOpt<"idte", 0xB98E, GR64, GR64, GR64>;
+
+// Compare and replace DAT table entry.
+let Predicates = [FeatureEnhancedDAT2], hasSideEffects = 1, Defs = [CC] in
+ defm CRDTE : SideEffectQuaternaryRRFbOpt<"crdte", 0xB98F, GR128, GR128, GR64>;
+
+// Purge TLB.
+let hasSideEffects = 1 in
+ def PTLB : SideEffectInherentS<"ptlb", 0xB20D, null_frag>;
+
+// Compare and swap and purge.
+let hasSideEffects = 1, Defs = [CC] in {
+ def CSP : CmpSwapRRE<"csp", 0xB250, GR128, GR64>;
+ def CSPG : CmpSwapRRE<"cspg", 0xB98A, GR128, GR64>;
+}
+
+// Load page-table-entry address.
+let hasSideEffects = 1, Defs = [CC] in
+ def LPTEA : TernaryRRFb<"lptea", 0xB9AA, GR64, GR64, GR64>;
+
+// Load real address.
+let hasSideEffects = 1, Defs = [CC] in {
+ defm LRA : LoadAddressRXPair<"lra", 0xB1, 0xE313, null_frag>;
+ def LRAG : LoadAddressRXY<"lrag", 0xE303, null_frag, laaddr20pair>;
+}
+
+// Store real address.
+def STRAG : StoreSSE<"strag", 0xE502>;
+
+// Load using real address.
+let mayLoad = 1 in {
+ def LURA : UnaryRRE<"lura", 0xB24B, null_frag, GR32, GR64>;
+ def LURAG : UnaryRRE<"lurag", 0xB905, null_frag, GR64, GR64>;
+}
+
+// Store using real address.
+let mayStore = 1 in {
+ def STURA : SideEffectBinaryRRE<"stura", 0xB246, GR32, GR64>;
+ def STURG : SideEffectBinaryRRE<"sturg", 0xB925, GR64, GR64>;
+}
+
+// Test protection.
+let hasSideEffects = 1, Defs = [CC] in
+ def TPROT : SideEffectBinarySSE<"tprot", 0xE501>;
+
+//===----------------------------------------------------------------------===//
+// Memory-move Instructions.
+//===----------------------------------------------------------------------===//
+
+// Move with key.
+let mayLoad = 1, mayStore = 1, Defs = [CC] in
+ def MVCK : MemoryBinarySSd<"mvck", 0xD9, GR64>;
+
+// Move to primary / secondary.
+let mayLoad = 1, mayStore = 1, Defs = [CC] in {
+ def MVCP : MemoryBinarySSd<"mvcp", 0xDA, GR64>;
+ def MVCS : MemoryBinarySSd<"mvcs", 0xDB, GR64>;
+}
+
+// Move with source / destination key.
+let mayLoad = 1, mayStore = 1, Uses = [R0L, R1L] in {
+ def MVCSK : SideEffectBinarySSE<"mvcsk", 0xE50E>;
+ def MVCDK : SideEffectBinarySSE<"mvcdk", 0xE50F>;
+}
+
+// Move with optional specifications.
+let mayLoad = 1, mayStore = 1, Uses = [R0L] in
+ def MVCOS : SideEffectTernarySSF<"mvcos", 0xC80, GR64>;
+
+// Move page.
+let mayLoad = 1, mayStore = 1, Uses = [R0L], Defs = [CC] in
+ def MVPG : SideEffectBinaryRRE<"mvpg", 0xB254, GR64, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Address-Space Instructions.
+//===----------------------------------------------------------------------===//
+
+// Load address space parameters.
+let hasSideEffects = 1, Defs = [CC] in
+ def LASP : SideEffectBinarySSE<"lasp", 0xE500>;
+
+// Purge ALB.
+let hasSideEffects = 1 in
+ def PALB : SideEffectInherentRRE<"palb", 0xB248>;
+
+// Program call.
+let hasSideEffects = 1 in
+ def PC : SideEffectAddressS<"pc", 0xB218, null_frag>;
+
+// Program return.
+let hasSideEffects = 1, Defs = [CC] in
+ def PR : SideEffectInherentE<"pr", 0x0101>;
+
+// Program transfer (with instance).
+let hasSideEffects = 1 in {
+ def PT : SideEffectBinaryRRE<"pt", 0xB228, GR32, GR64>;
+ def PTI : SideEffectBinaryRRE<"pti", 0xB99E, GR64, GR64>;
+}
+
+// Resume program.
+let hasSideEffects = 1, Defs = [CC] in
+ def RP : SideEffectAddressS<"rp", 0xB277, null_frag>;
+
+// Branch in subspace group.
+let hasSideEffects = 1 in
+ def BSG : UnaryRRE<"bsg", 0xB258, null_frag, GR64, GR64>;
+
+// Branch and set authority.
+let hasSideEffects = 1 in
+ def BSA : UnaryRRE<"bsa", 0xB25A, null_frag, GR64, GR64>;
+
+// Test access.
+let Defs = [CC] in
+ def TAR : SideEffectBinaryRRE<"tar", 0xB24C, AR32, GR32>;
+
+//===----------------------------------------------------------------------===//
+// Linkage-Stack Instructions.
+//===----------------------------------------------------------------------===//
+
+// Branch and stack.
+let hasSideEffects = 1 in
+ def BAKR : SideEffectBinaryRRE<"bakr", 0xB240, GR64, GR64>;
+
+// Extract stacked registers.
+let hasSideEffects = 1 in {
+ def EREG : SideEffectBinaryRRE<"ereg", 0xB249, GR32, GR32>;
+ def EREGG : SideEffectBinaryRRE<"eregg", 0xB90E, GR64, GR64>;
+}
+
+// Extract stacked state.
+let hasSideEffects = 1, Defs = [CC] in
+ def ESTA : UnaryRRE<"esta", 0xB24A, null_frag, GR128, GR32>;
+
+// Modify stacked state.
+let hasSideEffects = 1 in
+ def MSTA : SideEffectUnaryRRE<"msta", 0xB247, GR128, null_frag>;
+
+//===----------------------------------------------------------------------===//
+// Time-Related Instructions.
+//===----------------------------------------------------------------------===//
+
+// Perform timing facility function.
+let hasSideEffects = 1, mayLoad = 1, Uses = [R0L, R1D], Defs = [CC] in
+ def PTFF : SideEffectInherentE<"ptff", 0x0104>;
+
+// Set clock.
+let hasSideEffects = 1, Defs = [CC] in
+ def SCK : SideEffectUnaryS<"sck", 0xB204, null_frag, 8>;
+
+// Set clock programmable field.
+let hasSideEffects = 1, Uses = [R0L] in
+ def SCKPF : SideEffectInherentE<"sckpf", 0x0107>;
+
+// Set clock comparator.
+let hasSideEffects = 1 in
+ def SCKC : SideEffectUnaryS<"sckc", 0xB206, null_frag, 8>;
+
+// Set CPU timer.
+let hasSideEffects = 1 in
+ def SPT : SideEffectUnaryS<"spt", 0xB208, null_frag, 8>;
+
+// Store clock (fast / extended).
+let hasSideEffects = 1, Defs = [CC] in {
+ def STCK : StoreInherentS<"stck", 0xB205, null_frag, 8>;
+ def STCKF : StoreInherentS<"stckf", 0xB27C, null_frag, 8>;
+ def STCKE : StoreInherentS<"stcke", 0xB278, null_frag, 16>;
+}
+
+// Store clock comparator.
+let hasSideEffects = 1 in
+ def STCKC : StoreInherentS<"stckc", 0xB207, null_frag, 8>;
+
+// Store CPU timer.
+let hasSideEffects = 1 in
+ def STPT : StoreInherentS<"stpt", 0xB209, null_frag, 8>;
+
+//===----------------------------------------------------------------------===//
+// CPU-Related Instructions.
+//===----------------------------------------------------------------------===//
+
+// Store CPU address.
+let hasSideEffects = 1 in
+ def STAP : StoreInherentS<"stap", 0xB212, null_frag, 2>;
+
+// Store CPU ID.
+let hasSideEffects = 1 in
+ def STIDP : StoreInherentS<"stidp", 0xB202, null_frag, 8>;
+
+// Store system information.
+let hasSideEffects = 1, Uses = [R0L, R1L], Defs = [R0L, CC] in
+ def STSI : StoreInherentS<"stsi", 0xB27D, null_frag, 0>;
+
+// Store facility list.
+let hasSideEffects = 1 in
+ def STFL : StoreInherentS<"stfl", 0xB2B1, null_frag, 4>;
+
+// Store facility list extended.
+let hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in
+ def STFLE : StoreInherentS<"stfle", 0xB2B0, null_frag, 0>;
+
+// Extract CPU attribute.
+let hasSideEffects = 1 in
+ def ECAG : BinaryRSY<"ecag", 0xEB4C, null_frag, GR64>;
+
+// Extract CPU time.
+let hasSideEffects = 1, mayLoad = 1, Defs = [R0D, R1D] in
+ def ECTG : SideEffectTernarySSF<"ectg", 0xC81, GR64>;
+
+// Perform topology function.
+let hasSideEffects = 1 in
+ def PTF : UnaryTiedRRE<"ptf", 0xB9A2, GR64>;
+
+// Perform cryptographic key management operation.
+let Predicates = [FeatureMessageSecurityAssist3],
+ hasSideEffects = 1, Uses = [R0L, R1D] in
+ def PCKMO : SideEffectInherentRRE<"pckmo", 0xB928>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Supervisor call.
+let hasSideEffects = 1, isCall = 1, Defs = [CC] in
+ def SVC : SideEffectUnaryI<"svc", 0x0A, imm32zx8>;
+
+// Monitor call.
+let hasSideEffects = 1, isCall = 1 in
+ def MC : SideEffectBinarySI<"mc", 0xAF, imm32zx8>;
+
+// Diagnose.
+let hasSideEffects = 1, isCall = 1 in
+ def DIAG : SideEffectTernaryRS<"diag", 0x83, GR32, GR32>;
+
+// Trace.
+let hasSideEffects = 1, mayLoad = 1 in {
+ def TRACE : SideEffectTernaryRS<"trace", 0x99, GR32, GR32>;
+ def TRACG : SideEffectTernaryRSY<"tracg", 0xEB0F, GR64, GR64>;
+}
+
+// Trap.
+let hasSideEffects = 1 in {
+ def TRAP2 : SideEffectInherentE<"trap2", 0x01FF>;
+ def TRAP4 : SideEffectAddressS<"trap4", 0xB2FF, null_frag>;
+}
+
+// Signal processor.
+let hasSideEffects = 1, Defs = [CC] in
+ def SIGP : SideEffectTernaryRS<"sigp", 0xAE, GR64, GR64>;
+
+// Signal adapter.
+let hasSideEffects = 1, Uses = [R0D, R1D, R2D, R3D], Defs = [CC] in
+ def SIGA : SideEffectAddressS<"siga", 0xB274, null_frag>;
+
+// Start interpretive execution.
+let hasSideEffects = 1, Defs = [CC] in
+ def SIE : SideEffectUnaryS<"sie", 0xB214, null_frag, 0>;
+
+//===----------------------------------------------------------------------===//
+// CPU-Measurement Facility Instructions (SA23-2260).
+//===----------------------------------------------------------------------===//
+
+// Load program parameter
+let hasSideEffects = 1 in
+ def LPP : SideEffectUnaryS<"lpp", 0xB280, null_frag, 8>;
+
+// Extract coprocessor-group address.
+let hasSideEffects = 1, Defs = [CC] in
+ def ECPGA : UnaryRRE<"ecpga", 0xB2ED, null_frag, GR32, GR64>;
+
+// Extract CPU counter.
+let hasSideEffects = 1, Defs = [CC] in
+ def ECCTR : UnaryRRE<"ecctr", 0xB2E4, null_frag, GR64, GR64>;
+
+// Extract peripheral counter.
+let hasSideEffects = 1, Defs = [CC] in
+ def EPCTR : UnaryRRE<"epctr", 0xB2E5, null_frag, GR64, GR64>;
+
+// Load CPU-counter-set controls.
+let hasSideEffects = 1, Defs = [CC] in
+ def LCCTL : SideEffectUnaryS<"lcctl", 0xB284, null_frag, 8>;
+
+// Load peripheral-counter-set controls.
+let hasSideEffects = 1, Defs = [CC] in
+ def LPCTL : SideEffectUnaryS<"lpctl", 0xB285, null_frag, 8>;
+
+// Load sampling controls.
+let hasSideEffects = 1, Defs = [CC] in
+ def LSCTL : SideEffectUnaryS<"lsctl", 0xB287, null_frag, 0>;
+
+// Query sampling information.
+let hasSideEffects = 1 in
+ def QSI : StoreInherentS<"qsi", 0xB286, null_frag, 0>;
+
+// Query counter information.
+let hasSideEffects = 1 in
+ def QCTRI : StoreInherentS<"qctri", 0xB28E, null_frag, 0>;
+
+// Set CPU counter.
+let hasSideEffects = 1, Defs = [CC] in
+ def SCCTR : SideEffectBinaryRRE<"scctr", 0xB2E0, GR64, GR64>;
+
+// Set peripheral counter.
+let hasSideEffects = 1, Defs = [CC] in
+ def SPCTR : SideEffectBinaryRRE<"spctr", 0xB2E1, GR64, GR64>;
+
+//===----------------------------------------------------------------------===//
+// I/O Instructions (Principles of Operation, Chapter 14).
+//===----------------------------------------------------------------------===//
+
+// Clear subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def CSCH : SideEffectInherentS<"csch", 0xB230, null_frag>;
+
+// Halt subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def HSCH : SideEffectInherentS<"hsch", 0xB231, null_frag>;
+
+// Modify subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def MSCH : SideEffectUnaryS<"msch", 0xB232, null_frag, 0>;
+
+// Resume subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def RSCH : SideEffectInherentS<"rsch", 0xB238, null_frag>;
+
+// Start subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def SSCH : SideEffectUnaryS<"ssch", 0xB233, null_frag, 0>;
+
+// Store subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def STSCH : StoreInherentS<"stsch", 0xB234, null_frag, 0>;
+
+// Test subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def TSCH : StoreInherentS<"tsch", 0xB235, null_frag, 0>;
+
+// Cancel subchannel.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def XSCH : SideEffectInherentS<"xsch", 0xB276, null_frag>;
+
+// Reset channel path.
+let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in
+ def RCHP : SideEffectInherentS<"rchp", 0xB23B, null_frag>;
+
+// Set channel monitor.
+let hasSideEffects = 1, mayLoad = 1, Uses = [R1L, R2D] in
+ def SCHM : SideEffectInherentS<"schm", 0xB23C, null_frag>;
+
+// Store channel path status.
+let hasSideEffects = 1 in
+ def STCPS : StoreInherentS<"stcps", 0xB23A, null_frag, 0>;
+
+// Store channel report word.
+let hasSideEffects = 1, Defs = [CC] in
+ def STCRW : StoreInherentS<"stcrw", 0xB239, null_frag, 0>;
+
+// Test pending interruption.
+let hasSideEffects = 1, Defs = [CC] in
+ def TPI : StoreInherentS<"tpi", 0xB236, null_frag, 0>;
+
+// Set address limit.
+let hasSideEffects = 1, Uses = [R1L] in
+ def SAL : SideEffectInherentS<"sal", 0xB237, null_frag>;
+
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 47d2f75cc11a..36809ea81dc1 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -304,3 +304,13 @@ foreach I = 0-15 in {
defm AR32 : SystemZRegClass<"AR32", [i32], 32,
(add (sequence "A%u", 0, 15)), 0>;
+// Control registers.
+class CREG64<bits<16> num, string n> : SystemZReg<n> {
+ let HWEncoding = num;
+}
+foreach I = 0-15 in {
+ def C#I : CREG64<I, "c"#I>, DwarfRegNum<[!add(I, 32)]>;
+}
+defm CR64 : SystemZRegClass<"CR64", [i64], 64,
+ (add (sequence "C%u", 0, 15)), 0>;
+
diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td
index 5f5f2f690e58..adc9f2976f87 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -353,6 +353,9 @@ def : InstRW<[FXa], (instregex "ALGF(I|R)$")>;
def : InstRW<[FXa], (instregex "ALGR(K)?$")>;
def : InstRW<[FXa], (instregex "ALR(K)?$")>;
def : InstRW<[FXa], (instregex "AR(K)?$")>;
+def : InstRW<[FXa], (instregex "A(L)?HHHR$")>;
+def : InstRW<[FXa, Lat2], (instregex "A(L)?HHLR$")>;
+def : InstRW<[FXa], (instregex "ALSIH(N)?$")>;
def : InstRW<[FXb, LSU, Lat5], (instregex "A(L)?(G)?SI$")>;
// Logical addition with carry
@@ -376,6 +379,8 @@ def : InstRW<[FXa], (instregex "SLGF(I|R)$")>;
def : InstRW<[FXa], (instregex "SLGR(K)?$")>;
def : InstRW<[FXa], (instregex "SLR(K)?$")>;
def : InstRW<[FXa], (instregex "SR(K)?$")>;
+def : InstRW<[FXa], (instregex "S(L)?HHHR$")>;
+def : InstRW<[FXa, Lat2], (instregex "S(L)?HHLR$")>;
// Subtraction with borrow
def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "SLB(G)?$")>;
@@ -506,6 +511,8 @@ def : InstRW<[FXb], (instregex "CLIH$")>;
def : InstRW<[FXb, LSU, Lat5], (instregex "CLI(Y)?$")>;
def : InstRW<[FXb], (instregex "CLR$")>;
def : InstRW<[FXb, LSU, Lat5], (instregex "CLRL$")>;
+def : InstRW<[FXb], (instregex "C(L)?HHR$")>;
+def : InstRW<[FXb, Lat2], (instregex "C(L)?HLR$")>;
// Compare halfword
def : InstRW<[FXb, LSU, Lat6], (instregex "CH(Y|RL)?$")>;
@@ -701,38 +708,9 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>;
-// Move with key
-def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVCK$")>;
-
-// Monitor call
-def : InstRW<[FXb], (instregex "MC$")>;
-
-// Extract CPU attribute
-def : InstRW<[FXb, Lat30], (instregex "ECAG$")>;
-
-// Extract CPU Time
-def : InstRW<[FXa, Lat5, LSU], (instregex "ECTG$")>;
-
-// Extract PSW
-def : InstRW<[FXb, Lat30], (instregex "EPSW$")>;
-
// Execute
def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>;
-// Program return
-def : InstRW<[FXb, Lat30], (instregex "PR$")>;
-
-// Inline assembly
-def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone],
- (instregex "STCK(F)?$")>;
-def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone],
- (instregex "STCKE$")>;
-def : InstRW<[FXa, LSU, Lat5], (instregex "STFLE$")>;
-def : InstRW<[FXb, Lat30], (instregex "SVC$")>;
-
-// Store real address
-def : InstRW<[FXb, LSU, Lat5], (instregex "STRAG$")>;
-
//===----------------------------------------------------------------------===//
// .insn directive instructions
//===----------------------------------------------------------------------===//
@@ -1364,5 +1342,162 @@ def : InstRW<[VecStr, Lat5], (instregex "VSTRC(B|F|H)S$")>;
def : InstRW<[VecStr], (instregex "VSTRCZ(B|F|H)$")>;
def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>;
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "EPSW$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LPSW(E)?$")>;
+def : InstRW<[FXa, Lat3], (instregex "IPK$")>;
+def : InstRW<[LSU], (instregex "SPKA$")>;
+def : InstRW<[LSU], (instregex "SSM$")>;
+def : InstRW<[FXb], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXa, Lat3], (instregex "IAC$")>;
+def : InstRW<[LSU], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "LCTL(G)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[FXb, Lat30], (instregex "SSA(I)?R$")>;
+def : InstRW<[FXb, Lat30], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "SPX$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STPX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "ISKE$")>;
+def : InstRW<[FXb, Lat30], (instregex "IVSK$")>;
+def : InstRW<[FXb, Lat30], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "RRB(E|M)$")>;
+def : InstRW<[FXb, Lat30], (instregex "PFMF$")>;
+def : InstRW<[FXb, Lat30], (instregex "TB$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "PGIN$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "CRDTE(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "PTLB$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "CSP(G)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LPTEA$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STRAG$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LURA(G)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STUR(A|G)$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "MVCOS$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "LASP$")>;
+def : InstRW<[LSU], (instregex "PALB$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "PC$")>;
+def : InstRW<[FXb, Lat30], (instregex "PR$")>;
+def : InstRW<[FXb, Lat30], (instregex "PT(I)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "RP$")>;
+def : InstRW<[FXb, Lat30], (instregex "BS(G|A)$")>;
+def : InstRW<[FXb, Lat20], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "BAKR$")>;
+def : InstRW<[FXb, Lat30], (instregex "EREG(G)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "PTFF$")>;
+def : InstRW<[FXb, LSU, Lat20], (instregex "SCK$")>;
+def : InstRW<[FXb, Lat30], (instregex "SCKPF$")>;
+def : InstRW<[FXb, LSU, Lat20], (instregex "SCKC$")>;
+def : InstRW<[LSU, GroupAlone], (instregex "SPT$")>;
+def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone],
+ (instregex "STCK(F)?$")>;
+def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone],
+ (instregex "STCKE$")>;
+def : InstRW<[FXb, LSU, Lat9], (instregex "STCKC$")>;
+def : InstRW<[LSU, LSU, FXb, Lat3], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "STAP$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STIDP$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STSI$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STFL(E)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "ECAG$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "ECTG$")>;
+def : InstRW<[FXb, Lat30], (instregex "PTF$")>;
+def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "SVC$")>;
+def : InstRW<[FXb], (instregex "MC$")>;
+def : InstRW<[FXb, Lat30], (instregex "DIAG$")>;
+def : InstRW<[FXb], (instregex "TRAC(E|G)$")>;
+def : InstRW<[FXb, Lat30], (instregex "TRAP(2|4)$")>;
+def : InstRW<[FXb, Lat30], (instregex "SIGP$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "SIGA$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "LPP$")>;
+def : InstRW<[FXb, Lat30], (instregex "ECPGA$")>;
+def : InstRW<[FXb, Lat30], (instregex "E(C|P)CTR$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "L(C|P|S)CTL$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[FXb, Lat30], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[FXb, Lat30], (instregex "RCHP$")>;
+def : InstRW<[FXb, Lat30], (instregex "SCHM$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STC(PS|RW)$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "TPI$")>;
+def : InstRW<[FXb, Lat30], (instregex "SAL$")>;
+
}
diff --git a/lib/Target/SystemZ/SystemZScheduleZ196.td b/lib/Target/SystemZ/SystemZScheduleZ196.td
index 126eac2e2072..128049a09086 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -310,6 +310,9 @@ def : InstRW<[FXU], (instregex "ALGF(I|R)$")>;
def : InstRW<[FXU], (instregex "ALGR(K)?$")>;
def : InstRW<[FXU], (instregex "ALR(K)?$")>;
def : InstRW<[FXU], (instregex "AR(K)?$")>;
+def : InstRW<[FXU], (instregex "A(L)?HHHR$")>;
+def : InstRW<[FXU, FXU, Lat3], (instregex "A(L)?HHLR$")>;
+def : InstRW<[FXU], (instregex "ALSIH(N)?$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>;
// Logical addition with carry
@@ -333,6 +336,8 @@ def : InstRW<[FXU], (instregex "SLGF(I|R)$")>;
def : InstRW<[FXU], (instregex "SLGR(K)?$")>;
def : InstRW<[FXU], (instregex "SLR(K)?$")>;
def : InstRW<[FXU], (instregex "SR(K)?$")>;
+def : InstRW<[FXU], (instregex "S(L)?HHHR$")>;
+def : InstRW<[FXU, FXU, Lat3], (instregex "S(L)?HHLR$")>;
// Subtraction with borrow
def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>;
@@ -468,6 +473,8 @@ def : InstRW<[FXU], (instregex "CLIH$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>;
def : InstRW<[FXU], (instregex "CLR$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>;
+def : InstRW<[FXU], (instregex "C(L)?HHR$")>;
+def : InstRW<[FXU, FXU, Lat3], (instregex "C(L)?HLR$")>;
// Compare halfword
def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CH(Y|RL)?$")>;
@@ -634,37 +641,9 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>;
-// Move with key
-def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVCK$")>;
-
-// Monitor call
-def : InstRW<[FXU], (instregex "MC$")>;
-
-// Extract CPU attribute
-def : InstRW<[FXU, Lat30], (instregex "ECAG$")>;
-
-// Extract CPU Time
-def : InstRW<[FXU, Lat5, LSU], (instregex "ECTG$")>;
-
-// Extract PSW
-def : InstRW<[FXU, Lat30], (instregex "EPSW$")>;
-
// Execute
def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>;
-// Program return
-def : InstRW<[FXU, Lat30], (instregex "PR$")>;
-
-// Inline assembly
-def : InstRW<[FXU, LSU, Lat15], (instregex "STCK$")>;
-def : InstRW<[FXU, LSU, Lat12], (instregex "STCKF$")>;
-def : InstRW<[LSU, FXU, Lat5], (instregex "STCKE$")>;
-def : InstRW<[FXU, LSU, Lat5], (instregex "STFLE$")>;
-def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
-
-// Store real address
-def : InstRW<[FXU, LSU, Lat5], (instregex "STRAG$")>;
-
//===----------------------------------------------------------------------===//
// .insn directive instructions
//===----------------------------------------------------------------------===//
@@ -1058,5 +1037,160 @@ def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>;
def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>;
def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "EPSW$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>;
+def : InstRW<[FXU, Lat3], (instregex "IPK$")>;
+def : InstRW<[LSU], (instregex "SPKA$")>;
+def : InstRW<[LSU], (instregex "SSM$")>;
+def : InstRW<[FXU], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXU, Lat3], (instregex "IAC$")>;
+def : InstRW<[LSU], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>;
+def : InstRW<[FXU, Lat30], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "SPX$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STPX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "ISKE$")>;
+def : InstRW<[FXU, Lat30], (instregex "IVSK$")>;
+def : InstRW<[FXU, Lat30], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "RRB(E|M)$")>;
+def : InstRW<[FXU, Lat30], (instregex "PFMF$")>;
+def : InstRW<[FXU, Lat30], (instregex "TB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PGIN$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "PTLB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "CSP(G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LPTEA$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STRAG$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LURA(G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STUR(A|G)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>;
+def : InstRW<[LSU, Lat30], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>;
+def : InstRW<[LSU], (instregex "PALB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>;
+def : InstRW<[FXU, Lat30], (instregex "PR$")>;
+def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "RP$")>;
+def : InstRW<[FXU, Lat30], (instregex "BS(G|A)$")>;
+def : InstRW<[FXU, Lat20], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "BAKR$")>;
+def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "PTFF$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SCK$")>;
+def : InstRW<[FXU, Lat30], (instregex "SCKPF$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SCKC$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SPT$")>;
+def : InstRW<[FXU, LSU, Lat15], (instregex "STCK$")>;
+def : InstRW<[FXU, LSU, Lat12], (instregex "STCKF$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STCKE$")>;
+def : InstRW<[FXU, LSU, Lat9], (instregex "STCKC$")>;
+def : InstRW<[FXU, LSU, Lat8], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "STAP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STIDP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STSI$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STFL(E)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "ECAG$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "ECTG$")>;
+def : InstRW<[FXU, Lat30], (instregex "PTF$")>;
+def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
+def : InstRW<[FXU], (instregex "MC$")>;
+def : InstRW<[FXU, Lat30], (instregex "DIAG$")>;
+def : InstRW<[FXU], (instregex "TRAC(E|G)$")>;
+def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>;
+def : InstRW<[FXU, Lat30], (instregex "SIGP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "SIGA$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LPP$")>;
+def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>;
+def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "L(C|P|S)CTL$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[FXU, Lat30], (instregex "RCHP$")>;
+def : InstRW<[FXU, Lat30], (instregex "SCHM$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STC(PS|RW)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TPI$")>;
+def : InstRW<[FXU, Lat30], (instregex "SAL$")>;
+
}
diff --git a/lib/Target/SystemZ/SystemZScheduleZEC12.td b/lib/Target/SystemZ/SystemZScheduleZEC12.td
index d38ca64d2e9b..76b378454631 100644
--- a/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -320,6 +320,9 @@ def : InstRW<[FXU], (instregex "ALGF(I|R)$")>;
def : InstRW<[FXU], (instregex "ALGR(K)?$")>;
def : InstRW<[FXU], (instregex "ALR(K)?$")>;
def : InstRW<[FXU], (instregex "AR(K)?$")>;
+def : InstRW<[FXU], (instregex "A(L)?HHHR$")>;
+def : InstRW<[FXU, Lat2], (instregex "A(L)?HHLR$")>;
+def : InstRW<[FXU], (instregex "ALSIH(N)?$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>;
// Logical addition with carry
@@ -343,6 +346,8 @@ def : InstRW<[FXU], (instregex "SLGF(I|R)$")>;
def : InstRW<[FXU], (instregex "SLGR(K)?$")>;
def : InstRW<[FXU], (instregex "SLR(K)?$")>;
def : InstRW<[FXU], (instregex "SR(K)?$")>;
+def : InstRW<[FXU], (instregex "S(L)?HHHR$")>;
+def : InstRW<[FXU, Lat2], (instregex "S(L)?HHLR$")>;
// Subtraction with borrow
def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>;
@@ -478,6 +483,8 @@ def : InstRW<[FXU], (instregex "CLIH$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>;
def : InstRW<[FXU], (instregex "CLR$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>;
+def : InstRW<[FXU], (instregex "C(L)?HHR$")>;
+def : InstRW<[FXU, Lat2], (instregex "C(L)?HLR$")>;
// Compare halfword
def : InstRW<[FXU, LSU, Lat6], (instregex "CH(Y|RL)?$")>;
@@ -672,37 +679,9 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>;
-// Move with key
-def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVCK$")>;
-
-// Monitor call
-def : InstRW<[FXU], (instregex "MC$")>;
-
-// Extract CPU attribute
-def : InstRW<[FXU, Lat30], (instregex "ECAG$")>;
-
-// Extract CPU Time
-def : InstRW<[FXU, Lat5, LSU], (instregex "ECTG$")>;
-
-// Extract PSW
-def : InstRW<[FXU, Lat30], (instregex "EPSW$")>;
-
// Execute
def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>;
-// Program return
-def : InstRW<[FXU, Lat30], (instregex "PR$")>;
-
-// Inline assembly
-def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "STCK(F)?$")>;
-def : InstRW<[LSU, LSU, LSU, LSU, FXU, FXU, Lat20, GroupAlone],
- (instregex "STCKE$")>;
-def : InstRW<[FXU, LSU, Lat5], (instregex "STFLE$")>;
-def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
-
-// Store real address
-def : InstRW<[FXU, LSU, Lat5], (instregex "STRAG$")>;
-
//===----------------------------------------------------------------------===//
// .insn directive instructions
//===----------------------------------------------------------------------===//
@@ -1102,5 +1081,161 @@ def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>;
def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>;
def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "EPSW$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>;
+def : InstRW<[FXU, Lat3], (instregex "IPK$")>;
+def : InstRW<[LSU], (instregex "SPKA$")>;
+def : InstRW<[LSU], (instregex "SSM$")>;
+def : InstRW<[FXU], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXU, Lat3], (instregex "IAC$")>;
+def : InstRW<[LSU], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>;
+def : InstRW<[FXU, Lat30], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "SPX$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STPX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "ISKE$")>;
+def : InstRW<[FXU, Lat30], (instregex "IVSK$")>;
+def : InstRW<[FXU, Lat30], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "RRB(E|M)$")>;
+def : InstRW<[FXU, Lat30], (instregex "PFMF$")>;
+def : InstRW<[FXU, Lat30], (instregex "TB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PGIN$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "CRDTE(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "PTLB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "CSP(G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LPTEA$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STRAG$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LURA(G)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STUR(A|G)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>;
+def : InstRW<[LSU, Lat30], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>;
+def : InstRW<[LSU], (instregex "PALB$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>;
+def : InstRW<[FXU, Lat30], (instregex "PR$")>;
+def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "RP$")>;
+def : InstRW<[FXU, Lat30], (instregex "BS(G|A)$")>;
+def : InstRW<[FXU, Lat20], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "BAKR$")>;
+def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "PTFF$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SCK$")>;
+def : InstRW<[FXU, Lat30], (instregex "SCKPF$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SCKC$")>;
+def : InstRW<[FXU, LSU, Lat20], (instregex "SPT$")>;
+def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "STCK(F)?$")>;
+def : InstRW<[LSU, LSU, LSU, LSU, FXU, FXU, Lat20, GroupAlone],
+ (instregex "STCKE$")>;
+def : InstRW<[FXU, LSU, Lat9], (instregex "STCKC$")>;
+def : InstRW<[FXU, LSU, Lat8], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat30], (instregex "STAP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STIDP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STSI$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STFL(E)?$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "ECAG$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "ECTG$")>;
+def : InstRW<[FXU, Lat30], (instregex "PTF$")>;
+def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
+def : InstRW<[FXU], (instregex "MC$")>;
+def : InstRW<[FXU, Lat30], (instregex "DIAG$")>;
+def : InstRW<[FXU], (instregex "TRAC(E|G)$")>;
+def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>;
+def : InstRW<[FXU, Lat30], (instregex "SIGP$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "SIGA$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LPP$")>;
+def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>;
+def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "L(C|P|S)CTL$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat30], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[FXU, Lat30], (instregex "RCHP$")>;
+def : InstRW<[FXU, Lat30], (instregex "SCHM$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "STC(PS|RW)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TPI$")>;
+def : InstRW<[FXU, Lat30], (instregex "SAL$")>;
+
}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index 0ab0c2f25915..eb4a0962f7eb 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -37,12 +37,13 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
const TargetMachine &TM)
: SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
- HasPopulationCount(false), HasMessageSecurityAssist4(false),
+ HasPopulationCount(false), HasMessageSecurityAssist3(false),
+ HasMessageSecurityAssist4(false), HasResetReferenceBitsMultiple(false),
HasFastSerialization(false), HasInterlockedAccess1(false),
HasMiscellaneousExtensions(false),
HasExecutionHint(false), HasLoadAndTrap(false),
HasTransactionalExecution(false), HasProcessorAssist(false),
- HasDFPZonedConversion(false),
+ HasDFPZonedConversion(false), HasEnhancedDAT2(false),
HasVector(false), HasLoadStoreOnCond2(false),
HasLoadAndZeroRightmostByte(false), HasMessageSecurityAssist5(false),
HasDFPPackedConversion(false),
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index be480f03c572..b05a1bb6cafd 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -39,7 +39,9 @@ protected:
bool HasHighWord;
bool HasFPExtension;
bool HasPopulationCount;
+ bool HasMessageSecurityAssist3;
bool HasMessageSecurityAssist4;
+ bool HasResetReferenceBitsMultiple;
bool HasFastSerialization;
bool HasInterlockedAccess1;
bool HasMiscellaneousExtensions;
@@ -48,6 +50,7 @@ protected:
bool HasTransactionalExecution;
bool HasProcessorAssist;
bool HasDFPZonedConversion;
+ bool HasEnhancedDAT2;
bool HasVector;
bool HasLoadStoreOnCond2;
bool HasLoadAndZeroRightmostByte;
@@ -109,9 +112,18 @@ public:
bool hasPopulationCount() const { return HasPopulationCount; }
// Return true if the target has the message-security-assist
+ // extension facility 3.
+ bool hasMessageSecurityAssist3() const { return HasMessageSecurityAssist3; }
+
+ // Return true if the target has the message-security-assist
// extension facility 4.
bool hasMessageSecurityAssist4() const { return HasMessageSecurityAssist4; }
+ // Return true if the target has the reset-reference-bits-multiple facility.
+ bool hasResetReferenceBitsMultiple() const {
+ return HasResetReferenceBitsMultiple;
+ }
+
// Return true if the target has the fast-serialization facility.
bool hasFastSerialization() const { return HasFastSerialization; }
@@ -138,6 +150,9 @@ public:
// Return true if the target has the DFP zoned-conversion facility.
bool hasDFPZonedConversion() const { return HasDFPZonedConversion; }
+ // Return true if the target has the enhanced-DAT facility 2.
+ bool hasEnhancedDAT2() const { return HasEnhancedDAT2; }
+
// Return true if the target has the load-and-zero-rightmost-byte facility.
bool hasLoadAndZeroRightmostByte() const {
return HasLoadAndZeroRightmostByte;
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 422c16b8eb62..ce5c57e0f519 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -238,7 +238,7 @@ SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-void SystemZTTIImpl::getUnrollingPreferences(Loop *L,
+void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// Find out if L contains a call, what the machine instruction count
// estimate is, and how many stores there are.
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index bdba7601eb78..6923fc6fc910 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -45,7 +45,8 @@ public:
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
/// @}
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 39cb1ca336f2..129794171464 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -57,17 +57,19 @@ def BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops),
}
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
-// Placemarkers to indicate the start or end of a block or loop scope. These
-// use/clobber VALUE_STACK to prevent them from being moved into the middle of
-// an expression tree.
+// Placemarkers to indicate the start or end of a block, loop, or try scope.
+// These use/clobber VALUE_STACK to prevent them from being moved into the
+// middle of an expression tree.
let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in {
def BLOCK : I<(outs), (ins Signature:$sig), [], "block \t$sig", 0x02>;
def LOOP : I<(outs), (ins Signature:$sig), [], "loop \t$sig", 0x03>;
+def TRY : I<(outs), (ins Signature:$sig), [], "try \t$sig", 0x06>;
-// END_BLOCK, END_LOOP, and END_FUNCTION are represented with the same opcode
-// in wasm.
+// END_BLOCK, END_LOOP, END_TRY, and END_FUNCTION are represented with the same
+// opcode in wasm.
def END_BLOCK : I<(outs), (ins), [], "end_block", 0x0b>;
def END_LOOP : I<(outs), (ins), [], "end_loop", 0x0b>;
+def END_TRY : I<(outs), (ins), [], "end_try", 0x0b>;
let isTerminator = 1, isBarrier = 1 in
def END_FUNCTION : I<(outs), (ins), [], "end_function", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
@@ -112,6 +114,20 @@ let isReturn = 1 in {
def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable", 0x00>;
+def THROW_I32 : I<(outs), (ins i32imm:$tag, I32:$obj),
+ [(int_wasm_throw imm:$tag, I32:$obj)], "throw \t$tag, $obj",
+ 0x08>;
+def THROW_I64 : I<(outs), (ins i32imm:$tag, I64:$obj),
+ [(int_wasm_throw imm:$tag, I64:$obj)], "throw \t$tag, $obj",
+ 0x08>;
+def RETHROW : I<(outs), (ins i32imm:$rel_depth), [], "rethrow \t$rel_depth",
+ 0x09>;
+
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
} // Defs = [ARGUMENTS]
+
+// rethrow takes a relative depth as an argument, for which currently only 0 is
+// possible for C++. Once other languages need depths other than 0, depths will
+// be computed in CFGStackify.
+def : Pat<(int_wasm_rethrow), (RETHROW 0)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 947c0329bb6e..f0b6a3e35dba 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -897,7 +897,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
}
}
- // Look for orphan landingpads, can occur in blocks with no predecesors
+ // Look for orphan landingpads, can occur in blocks with no predecessors
for (BasicBlock &BB : F) {
Instruction *I = BB.getFirstNonPHI();
if (auto *LPI = dyn_cast<LandingPadInst>(I))
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d30cc724c203..825f23dc52d9 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -49,8 +49,11 @@ static const char OpPrecedence[] = {
4, // IC_MINUS
5, // IC_MULTIPLY
5, // IC_DIVIDE
- 6, // IC_RPAREN
- 7, // IC_LPAREN
+ 5, // IC_MOD
+ 6, // IC_NOT
+ 7, // IC_NEG
+ 8, // IC_RPAREN
+ 9, // IC_LPAREN
0, // IC_IMM
0 // IC_REGISTER
};
@@ -92,6 +95,9 @@ private:
IC_MINUS,
IC_MULTIPLY,
IC_DIVIDE,
+ IC_MOD,
+ IC_NOT,
+ IC_NEG,
IC_RPAREN,
IC_LPAREN,
IC_IMM,
@@ -111,6 +117,10 @@ private:
SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
SmallVector<ICToken, 4> PostfixStack;
+ bool isUnaryOperator(const InfixCalculatorTok Op) {
+ return Op == IC_NEG || Op == IC_NOT;
+ }
+
public:
int64_t popOperand() {
assert (!PostfixStack.empty() && "Poped an empty stack!");
@@ -192,6 +202,22 @@ private:
ICToken Op = PostfixStack[i];
if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
OperandStack.push_back(Op);
+ } else if (isUnaryOperator(Op.first)) {
+ assert (OperandStack.size() > 0 && "Too few operands.");
+ ICToken Operand = OperandStack.pop_back_val();
+ assert (Operand.first == IC_IMM &&
+ "Unary operation with a register!");
+ switch (Op.first) {
+ default:
+ report_fatal_error("Unexpected operator!");
+ break;
+ case IC_NEG:
+ OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
+ break;
+ case IC_NOT:
+ OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
+ break;
+ }
} else {
assert (OperandStack.size() > 1 && "Too few operands.");
int64_t Val;
@@ -222,6 +248,12 @@ private:
Val = Op1.second / Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
+ case IC_MOD:
+ assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
+ "Modulo operation with an immediate and a register!");
+ Val = Op1.second % Op2.second;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
case IC_OR:
assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
"Or operation with an immediate and a register!");
@@ -271,6 +303,7 @@ private:
IES_NOT,
IES_MULTIPLY,
IES_DIVIDE,
+ IES_MOD,
IES_LBRAC,
IES_RBRAC,
IES_LPAREN,
@@ -421,10 +454,16 @@ private:
default:
State = IES_ERROR;
break;
+ case IES_OR:
+ case IES_XOR:
+ case IES_AND:
+ case IES_LSHIFT:
+ case IES_RSHIFT:
case IES_PLUS:
case IES_NOT:
case IES_MULTIPLY:
case IES_DIVIDE:
+ case IES_MOD:
case IES_LPAREN:
case IES_RPAREN:
case IES_LBRAC:
@@ -432,11 +471,12 @@ private:
case IES_INTEGER:
case IES_REGISTER:
State = IES_MINUS;
- // Only push the minus operator if it is not a unary operator.
- if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
- CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
- CurrState == IES_LPAREN || CurrState == IES_LBRAC))
+ // push minus operator if it is not a negate operator
+ if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
+ CurrState == IES_INTEGER || CurrState == IES_RBRAC)
IC.pushOperator(IC_MINUS);
+ else
+ IC.pushOperator(IC_NEG);
if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
// If we already have a BaseReg, then assume this is the IndexReg with
// a scale of 1.
@@ -458,9 +498,21 @@ private:
default:
State = IES_ERROR;
break;
+ case IES_OR:
+ case IES_XOR:
+ case IES_AND:
+ case IES_LSHIFT:
+ case IES_RSHIFT:
case IES_PLUS:
+ case IES_MINUS:
case IES_NOT:
+ case IES_MULTIPLY:
+ case IES_DIVIDE:
+ case IES_MOD:
+ case IES_LPAREN:
+ case IES_LBRAC:
State = IES_NOT;
+ IC.pushOperator(IC_NOT);
break;
}
PrevState = CurrState;
@@ -525,6 +577,7 @@ private:
case IES_LSHIFT:
case IES_RSHIFT:
case IES_DIVIDE:
+ case IES_MOD:
case IES_MULTIPLY:
case IES_LPAREN:
State = IES_INTEGER;
@@ -539,26 +592,6 @@ private:
}
// Get the scale and replace the 'Register * Scale' with '0'.
IC.popOperator();
- } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
- PrevState == IES_OR || PrevState == IES_AND ||
- PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
- PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
- PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
- PrevState == IES_NOT || PrevState == IES_XOR) &&
- CurrState == IES_MINUS) {
- // Unary minus. No need to pop the minus operand because it was never
- // pushed.
- IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
- } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
- PrevState == IES_OR || PrevState == IES_AND ||
- PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
- PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
- PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
- PrevState == IES_NOT || PrevState == IES_XOR) &&
- CurrState == IES_NOT) {
- // Unary not. No need to pop the not operand because it was never
- // pushed.
- IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
} else {
IC.pushOperand(IC_IMM, TmpInt);
}
@@ -594,6 +627,19 @@ private:
break;
}
}
+ void onMod() {
+ PrevState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_RPAREN:
+ State = IES_MOD;
+ IC.pushOperator(IC_MOD);
+ break;
+ }
+ }
void onLBrac() {
PrevState = State;
switch (State) {
@@ -647,18 +693,8 @@ private:
case IES_RSHIFT:
case IES_MULTIPLY:
case IES_DIVIDE:
+ case IES_MOD:
case IES_LPAREN:
- // FIXME: We don't handle this type of unary minus or not, yet.
- if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
- PrevState == IES_OR || PrevState == IES_AND ||
- PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
- PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
- PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
- PrevState == IES_NOT || PrevState == IES_XOR) &&
- (CurrState == IES_MINUS || CurrState == IES_NOT)) {
- State = IES_ERROR;
- break;
- }
State = IES_LPAREN;
IC.pushOperator(IC_LPAREN);
break;
@@ -1302,6 +1338,8 @@ bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine
SM.onXor();
else if (Name.equals_lower("and"))
SM.onAnd();
+ else if (Name.equals_lower("mod"))
+ SM.onMod();
else
return false;
return true;
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index caf98bffb80d..8f2017e990c5 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -396,7 +396,7 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
if (!SB->getFragment()) {
Asm.getContext().reportError(
Fixup.getLoc(),
- "symbol '" + B->getSymbol().getName() +
+ "symbol '" + SB->getName() +
"' can not be undefined in a subtraction expression");
return false;
}
@@ -408,7 +408,7 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
// pedantic compatibility with 'as'.
Type = A->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF
: (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF;
- Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
+ Value2 = Writer->getSymbolAddress(*SB, Layout);
FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
}
@@ -468,8 +468,8 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer,
const MCFixup &Fixup,
MCValue Target,
uint64_t &FixedValue) {
- assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
- !is64Bit() &&
+ const MCSymbolRefExpr *SymA = Target.getSymA();
+ assert(SymA->getKind() == MCSymbolRefExpr::VK_TLVP && !is64Bit() &&
"Should only be called with a 32-bit TLVP relocation!");
unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
@@ -480,15 +480,14 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer,
// subtraction from the picbase. For 32-bit pic the addend is the difference
// between the picbase and the next address. For 32-bit static the addend is
// zero.
- if (Target.getSymB()) {
+ if (auto *SymB = Target.getSymB()) {
// If this is a subtraction then we're pcrel.
uint32_t FixupAddress =
Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
IsPCRel = 1;
- FixedValue =
- FixupAddress -
- Writer->getSymbolAddress(Target.getSymB()->getSymbol(), Layout) +
- Target.getConstant();
+ FixedValue = FixupAddress -
+ Writer->getSymbolAddress(SymB->getSymbol(), Layout) +
+ Target.getConstant();
FixedValue += 1ULL << Log2Size;
} else {
FixedValue = 0;
@@ -499,8 +498,7 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer,
MRE.r_word0 = Value;
MRE.r_word1 =
(IsPCRel << 24) | (Log2Size << 25) | (MachO::GENERIC_RELOC_TLV << 28);
- Writer->addRelocation(&Target.getSymA()->getSymbol(), Fragment->getParent(),
- MRE);
+ Writer->addRelocation(&SymA->getSymbol(), Fragment->getParent(), MRE);
}
void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 5892f1de33ee..807f7a6ddb19 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -44,7 +44,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
const MCAsmBackend &MAB) const {
unsigned FixupKind = Fixup.getKind();
if (IsCrossSection) {
- if (FixupKind != FK_Data_4) {
+ if (FixupKind != FK_Data_4 && FixupKind != llvm::X86::reloc_signed_4byte) {
Ctx.reportError(Fixup.getLoc(), "Cannot represent this expression");
return COFF::IMAGE_REL_AMD64_ADDR32;
}
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index fe105298f5c1..7437ebacfac3 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -300,6 +300,8 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
"Intel Atom processors">;
def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
"Intel Silvermont processors">;
+def ProcIntelGLM : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM",
+ "Intel Goldmont processors">;
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
@@ -430,6 +432,34 @@ class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
def : SilvermontProc<"silvermont">;
def : SilvermontProc<"slm">; // Legacy alias.
+class GoldmontProc<string Name> : ProcessorModel<Name, SLMModel, [
+ ProcIntelGLM,
+ FeatureX87,
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeaturePOPCNT,
+ FeaturePCLMUL,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeatureCallRegIndirect,
+ FeatureSlowLEA,
+ FeatureSlowIncDec,
+ FeatureSlowBTMem,
+ FeatureLAHFSAHF,
+ FeatureMPX,
+ FeatureSHA,
+ FeatureRDSEED,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureXSAVEC,
+ FeatureXSAVES,
+ FeatureCLFLUSHOPT
+]>;
+def : GoldmontProc<"goldmont">;
+
// "Arrandale" along with corei3 and corei5
class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureX87,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index f777e5628988..b89914f8893e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5065,6 +5065,20 @@ static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
}
+// Return true if the instruction zeroes the unused upper part of the
+// destination and accepts mask.
+static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case X86ISD::PCMPEQM:
+ case X86ISD::PCMPGTM:
+ case X86ISD::CMPM:
+ case X86ISD::CMPMU:
+ return true;
+ }
+}
+
/// Insert i1-subvector to i1-vector.
static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -5097,6 +5111,22 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// 3. Subvector should be inserted in the middle (for example v2i1
// to v16i1, index 2)
+ // If this node widens - by concatenating zeroes - the type of the result
+ // of a node with instruction that zeroes all upper (irrelevant) bits of the
+ // output register, mark this node as legal to enable replacing them with
+ // the v8i1 version of the previous instruction during instruction selection.
+ // For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg,
+ // while zeroing all the upper remaining 60 bits of the register. if the
+ // result of such instruction is inserted into an allZeroVector, then we can
+ // safely remove insert_vector (in instruction selection) as the cmp instr
+ // already zeroed the rest of the register.
+ if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 &&
+ (isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) ||
+ (SubVec.getOpcode() == ISD::AND &&
+ (isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) ||
+ isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode())))))
+ return Op;
+
// extend to natively supported kshift
MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
MVT WideOpVT = OpVT;
@@ -7919,6 +7949,60 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
}
+// Return true if all the operands of the given CONCAT_VECTORS node are zeros
+// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0)
+static bool isExpandWithZeros(const SDValue &Op) {
+ assert(Op.getOpcode() == ISD::CONCAT_VECTORS &&
+ "Expand with zeros only possible in CONCAT_VECTORS nodes!");
+
+ for (unsigned i = 1; i < Op.getNumOperands(); i++)
+ if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode()))
+ return false;
+
+ return true;
+}
+
+// Returns true if the given node is a type promotion (by concatenating i1
+// zeros) of the result of a node that already zeros all upper bits of
+// k-register.
+static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) {
+ unsigned Opc = Op.getOpcode();
+
+ assert(Opc == ISD::CONCAT_VECTORS &&
+ Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
+ "Unexpected node to check for type promotion!");
+
+ // As long as we are concatenating zeros to the upper part of a previous node
+ // result, climb up the tree until a node with different opcode is
+ // encountered
+ while (Opc == ISD::INSERT_SUBVECTOR || Opc == ISD::CONCAT_VECTORS) {
+ if (Opc == ISD::INSERT_SUBVECTOR) {
+ if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) &&
+ Op.getConstantOperandVal(2) == 0)
+ Op = Op.getOperand(1);
+ else
+ return SDValue();
+ } else { // Opc == ISD::CONCAT_VECTORS
+ if (isExpandWithZeros(Op))
+ Op = Op.getOperand(0);
+ else
+ return SDValue();
+ }
+ Opc = Op.getOpcode();
+ }
+
+ // Check if the first inserted node zeroes the upper bits, or an 'and' result
+ // of a node that zeros the upper bits (its masked version).
+ if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) ||
+ (Op.getOpcode() == ISD::AND &&
+ (isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) ||
+ isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) {
+ return Op;
+ }
+
+ return SDValue();
+}
+
static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
const X86Subtarget &Subtarget,
SelectionDAG & DAG) {
@@ -7929,6 +8013,17 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
assert(isPowerOf2_32(NumOfOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
+ // If this node promotes - by concatenating zeroes - the type of the result
+ // of a node with instruction that zeroes all upper (irrelevant) bits of the
+ // output register, mark it as legal and catch the pattern in instruction
+ // selection to avoid emitting extra insturctions (for zeroing upper bits).
+ if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op)) {
+ SDValue ZeroC = DAG.getConstant(0, dl, MVT::i64);
+ SDValue AllZeros = DAG.getSplatBuildVector(ResVT, dl, ZeroC);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, AllZeros, Promoted,
+ ZeroC);
+ }
+
SDValue Undef = DAG.getUNDEF(ResVT);
if (NumOfOperands > 2) {
// Specialize the cases when all, or all but one, of the operands are undef.
@@ -27012,6 +27107,9 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
+ unsigned InputSizeInBits = MaskVT.getSizeInBits();
+ unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
+ MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
bool ContainsZeros = false;
APInt Zeroable(NumMaskElts, false);
@@ -27027,7 +27125,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle,
- MaskVT.getScalarSizeInBits(), Mask,
+ MaskScalarSizeInBits, Mask,
0, Zeroable, Subtarget);
if (0 < ShiftAmt) {
PermuteImm = (unsigned)ShiftAmt;
@@ -27043,10 +27141,6 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
return SM_SentinelUndef <= M && M < (int)NumMaskElts;
}) && "Expected unary shuffle");
- unsigned InputSizeInBits = MaskVT.getSizeInBits();
- unsigned MaskScalarSizeInBits = InputSizeInBits / Mask.size();
- MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
-
// Handle PSHUFLW/PSHUFHW repeated patterns.
if (MaskScalarSizeInBits == 16) {
SmallVector<int, 4> RepeatedMask;
@@ -35072,7 +35166,7 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
/// that is commonly recognized as an idiom (has no register dependency), so
/// that's better/smaller than loading a splat 1 constant.
static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) {
- assert(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB &&
+ assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
"Unexpected opcode for increment/decrement transform");
// Pseudo-legality check: getOnesVector() expects one of these types, so bail
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 01a70323224c..cc5c09cbf0e5 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -185,6 +185,20 @@ def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
v2f64x_info>;
+class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
+ ValueType _vt> {
+ RegisterClass KRC = _krc;
+ RegisterClass KRCWM = _krcwm;
+ ValueType KVT = _vt;
+}
+
+def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
+def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
+def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
+def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
+def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
+def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
+
// This multiclass generates the masking variants from the non-masking
// variant. It only provides the assembly pieces for the masking variants.
// It assumes custom ISel patterns for masking which can be provided as
@@ -1735,17 +1749,217 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
avx512vl_i64_info, HasAVX512>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
-let Predicates = [HasAVX512, NoVLX] in {
-def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
- (COPY_TO_REGCLASS (VPCMPGTDZrr
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
-def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
- (COPY_TO_REGCLASS (VPCMPEQDZrr
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
-}
+multiclass avx512_icmp_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ SDNode OpNode, string InstrStr,
+ list<Predicate> Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask,
+ _.RC:$src1, _.RC:$src2),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert
+ (_.LdFrag addr:$src2))))))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2),
+ NewInf.KRC)>;
+}
+}
+
+multiclass avx512_icmp_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ SDNode OpNode, string InstrStr,
+ list<Predicate> Preds>
+ : avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbk) _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2),
+ NewInf.KRC)>;
+}
+}
+
+// VPCMPEQB - i8
+defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQBZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQBZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQBZ256", [HasBWI, HasVLX]>;
+
+// VPCMPEQW - i16
+defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQWZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQWZ256", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQWZ256", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQWZ", [HasBWI]>;
+
+// VPCMPEQD - i32
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpeqm,
+ "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQDZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQDZ", [HasAVX512]>;
+
+// VPCMPEQQ - i64
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpeqm,
+ "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQQZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQQZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQQZ", [HasAVX512]>;
+
+// VPCMPGTB - i8
+defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTBZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTBZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTBZ256", [HasBWI, HasVLX]>;
+
+// VPCMPGTW - i16
+defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTWZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTWZ256", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTWZ256", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTWZ", [HasBWI]>;
+
+// VPCMPGTD - i32
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpgtm,
+ "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTDZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTDZ", [HasAVX512]>;
+
+// VPCMPGTQ - i64
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpgtm,
+ "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTQZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTQZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTQZ", [HasAVX512]>;
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
X86VectorVTInfo _> {
@@ -1908,6 +2122,237 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ SDNode OpNode, string InstrStr,
+ list<Predicate> Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
+ _.RC:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask,
+ _.RC:$src1,
+ _.RC:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert
+ (_.LdFrag addr:$src2))),
+ imm:$cc)))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask,
+ _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+}
+}
+
+multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ SDNode OpNode, string InstrStr,
+ list<Predicate> Preds>
+ : avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmib) _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)),
+ imm:$cc)))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmibk) _.KRCWM:$mask,
+ _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+}
+}
+
+// VPCMPB - i8
+defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpm,
+ "VPCMPBZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpm,
+ "VPCMPBZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpm,
+ "VPCMPBZ256", [HasBWI, HasVLX]>;
+
+// VPCMPW - i16
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpm,
+ "VPCMPWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpm,
+ "VPCMPWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpm,
+ "VPCMPWZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpm,
+ "VPCMPWZ256", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpm,
+ "VPCMPWZ256", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpm,
+ "VPCMPWZ", [HasBWI]>;
+
+// VPCMPD - i32
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpm,
+ "VPCMPDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpm,
+ "VPCMPDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpm,
+ "VPCMPDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpm,
+ "VPCMPDZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpm,
+ "VPCMPDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpm,
+ "VPCMPDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpm,
+ "VPCMPDZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpm,
+ "VPCMPDZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpm,
+ "VPCMPDZ", [HasAVX512]>;
+
+// VPCMPQ - i64
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpm,
+ "VPCMPQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpm,
+ "VPCMPQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpm,
+ "VPCMPQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpm,
+ "VPCMPQZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpm,
+ "VPCMPQZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpm,
+ "VPCMPQZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpm,
+ "VPCMPQZ", [HasAVX512]>;
+
+// VPCMPUB - i8
+defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpmu,
+ "VPCMPUBZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpmu,
+ "VPCMPUBZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpmu,
+ "VPCMPUBZ256", [HasBWI, HasVLX]>;
+
+// VPCMPUW - i16
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpmu,
+ "VPCMPUWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpmu,
+ "VPCMPUWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpmu,
+ "VPCMPUWZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpmu,
+ "VPCMPUWZ256", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpmu,
+ "VPCMPUWZ256", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpmu,
+ "VPCMPUWZ", [HasBWI]>;
+
+// VPCMPUD - i32
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpmu,
+ "VPCMPUDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpmu,
+ "VPCMPUDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpmu,
+ "VPCMPUDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpmu,
+ "VPCMPUDZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpmu,
+ "VPCMPUDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpmu,
+ "VPCMPUDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpmu,
+ "VPCMPUDZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpmu,
+ "VPCMPUDZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpmu,
+ "VPCMPUDZ", [HasAVX512]>;
+
+// VPCMPUQ - i64
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpmu,
+ "VPCMPUQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpmu,
+ "VPCMPUQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpmu,
+ "VPCMPUQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpmu,
+ "VPCMPUQZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpmu,
+ "VPCMPUQZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpmu,
+ "VPCMPUQZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpmu,
+ "VPCMPUQZ", [HasAVX512]>;
+
multiclass avx512_vcmp_common<X86VectorVTInfo _> {
defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
@@ -1998,21 +2443,108 @@ defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
-def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
- (COPY_TO_REGCLASS (VCMPPSZrri
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
- imm:$cc), VK8)>;
-def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
- (COPY_TO_REGCLASS (VPCMPDZrri
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
- imm:$cc), VK8)>;
-def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
- (COPY_TO_REGCLASS (VPCMPUDZrri
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
- imm:$cc), VK8)>;
+multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ string InstrStr, list<Predicate> Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (X86cmpm (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
+ _.RC:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (X86cmpm (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (X86cmpm (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbi) _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+}
+}
+
+multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ string InstrStr, list<Predicate> Preds>
+ : avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> {
+
+let Predicates = Preds in
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (X86cmpmRnd (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc,
+ (i32 FROUND_NO_EXC))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1,
+ _.RC:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+}
+
+
+// VCMPPS - f32
+defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v8i1_info, "VCMPPSZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v16i1_info, "VCMPPSZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v32i1_info, "VCMPPSZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v64i1_info, "VCMPPSZ128",
+ [HasAVX512, HasVLX]>;
+
+defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v16i1_info, "VCMPPSZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v32i1_info, "VCMPPSZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v64i1_info, "VCMPPSZ256",
+ [HasAVX512, HasVLX]>;
+
+defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v32i1_info, "VCMPPSZ",
+ [HasAVX512]>;
+defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v64i1_info, "VCMPPSZ",
+ [HasAVX512]>;
+
+// VCMPPD - f64
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v4i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v8i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v16i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v32i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v64i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+
+defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v8i1_info, "VCMPPDZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v16i1_info, "VCMPPDZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v32i1_info, "VCMPPDZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v64i1_info, "VCMPPDZ256",
+ [HasAVX512, HasVLX]>;
+
+defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v16i1_info, "VCMPPDZ",
+ [HasAVX512]>;
+defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v32i1_info, "VCMPPDZ",
+ [HasAVX512]>;
+defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v64i1_info, "VCMPPDZ",
+ [HasAVX512]>;
// ----------------------------------------------------------------
// FPClass
@@ -2498,6 +3030,69 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
+multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
+def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr)
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
+
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
+ (i64 0)),
+ (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr)
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
+ (i8 8)), (i8 8))>;
+
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (and VK8:$mask,
+ (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
+ (i64 0)),
+ (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk)
+ (COPY_TO_REGCLASS VK8:$mask, VK16),
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
+ (i8 8)), (i8 8))>;
+}
+
+multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
+ AVX512VLVectorVTInfo _> {
+def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri)
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
+ imm:$cc), VK8)>;
+
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
+ (i64 0)),
+ (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri)
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
+ imm:$cc),
+ (i8 8)), (i8 8))>;
+
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (and VK8:$mask,
+ (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))),
+ (i64 0)),
+ (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik)
+ (COPY_TO_REGCLASS VK8:$mask, VK16),
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
+ imm:$cc),
+ (i8 8)), (i8 8))>;
+}
+
+let Predicates = [HasAVX512, NoVLX] in {
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">;
+
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>;
+}
+
// Mask setting all 0s or 1s
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
let Predicates = [HasAVX512] in
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index f98c2a7e802d..e34a90e975b8 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -75,6 +75,8 @@ private:
bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI,
@@ -270,6 +272,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
return true;
if (selectUadde(I, MRI, MF))
return true;
+ if (selectMergeValues(I, MRI, MF))
+ return true;
if (selectExtract(I, MRI, MF))
return true;
if (selectInsert(I, MRI, MF))
@@ -914,6 +918,55 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+bool X86InstructionSelector::selectMergeValues(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ if (I.getOpcode() != TargetOpcode::G_MERGE_VALUES)
+ return false;
+
+ // Split to inserts.
+ unsigned DstReg = I.getOperand(0).getReg();
+ unsigned SrcReg0 = I.getOperand(1).getReg();
+
+ const LLT DstTy = MRI.getType(DstReg);
+ const LLT SrcTy = MRI.getType(SrcReg0);
+ unsigned SrcSize = SrcTy.getSizeInBits();
+
+ const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+
+ // For the first src use insertSubReg.
+ unsigned DefReg = MRI.createGenericVirtualRegister(DstTy);
+ MRI.setRegBank(DefReg, RegBank);
+ if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF))
+ return false;
+
+ for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) {
+
+ unsigned Tmp = MRI.createGenericVirtualRegister(DstTy);
+ MRI.setRegBank(Tmp, RegBank);
+
+ MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::G_INSERT), Tmp)
+ .addReg(DefReg)
+ .addReg(I.getOperand(Idx).getReg())
+ .addImm((Idx - 1) * SrcSize);
+
+ DefReg = Tmp;
+
+ if (!select(InsertInst))
+ return false;
+ }
+
+ MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), DstReg)
+ .addReg(DefReg);
+
+ if (!select(CopyInst))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+}
InstructionSelector *
llvm::createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &Subtarget,
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index a584eabcc1b2..a5fa3340c3f1 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -56,7 +56,7 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
- for (unsigned BinOp : {G_ADD, G_SUB, G_MUL})
+ for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
for (auto Ty : {s8, s16, s32})
setAction({BinOp, Ty}, Legal);
@@ -117,7 +117,7 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
- for (unsigned BinOp : {G_ADD, G_SUB, G_MUL})
+ for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
for (auto Ty : {s8, s16, s32, s64})
setAction({BinOp, Ty}, Legal);
@@ -228,10 +228,14 @@ void X86LegalizerInfo::setLegalizerInfoAVX() {
for (auto Ty : {v8s32, v4s64})
setAction({MemOp, Ty}, Legal);
- for (auto Ty : {v32s8, v16s16, v8s32, v4s64})
+ for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) {
setAction({G_INSERT, Ty}, Legal);
- for (auto Ty : {v16s8, v8s16, v4s32, v2s64})
+ setAction({G_EXTRACT, 1, Ty}, Legal);
+ }
+ for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) {
setAction({G_INSERT, 1, Ty}, Legal);
+ setAction({G_EXTRACT, Ty}, Legal);
+ }
}
void X86LegalizerInfo::setLegalizerInfoAVX2() {
@@ -280,10 +284,14 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() {
for (auto Ty : {v16s32, v8s64})
setAction({MemOp, Ty}, Legal);
- for (auto Ty : {v64s8, v32s16, v16s32, v8s64})
+ for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) {
setAction({G_INSERT, Ty}, Legal);
- for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64})
+ setAction({G_EXTRACT, 1, Ty}, Legal);
+ }
+ for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) {
setAction({G_INSERT, 1, Ty}, Legal);
+ setAction({G_EXTRACT, Ty}, Legal);
+ }
/************ VLX *******************/
if (!Subtarget.hasVLX())
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index e36a47506ba0..24845beac22d 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -11,10 +11,23 @@
//
//===----------------------------------------------------------------------===//
+#include "X86.h"
+
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "X86CallLowering.h"
+#include "X86LegalizerInfo.h"
+#include "X86RegisterBankInfo.h"
+#endif
#include "X86Subtarget.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/Triple.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#endif
#include "llvm/IR/Attributes.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Function.h"
@@ -336,6 +349,35 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
return *this;
}
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+namespace {
+
+struct X86GISelActualAccessor : public GISelAccessor {
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+
+ const CallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+
+ const InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
+};
+
+} // end anonymous namespace
+#endif
+
X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
const X86TargetMachine &TM,
unsigned StackAlignOverride)
@@ -360,6 +402,19 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
setPICStyle(PICStyles::StubPIC);
else if (isTargetELF())
setPICStyle(PICStyles::GOT);
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+ GISelAccessor *GISel = new GISelAccessor();
+#else
+ X86GISelActualAccessor *GISel = new X86GISelActualAccessor();
+
+ GISel->CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering()));
+ GISel->Legalizer.reset(new X86LegalizerInfo(*this, TM));
+
+ auto *RBI = new X86RegisterBankInfo(*getRegisterInfo());
+ GISel->RegBankInfo.reset(RBI);
+ GISel->InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI));
+#endif
+ setGISelAccessor(*GISel);
}
const CallLowering *X86Subtarget::getCallLowering() const {
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 550e95c39ab5..fa0afe29586b 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -58,7 +58,7 @@ protected:
};
enum X86ProcFamilyEnum {
- Others, IntelAtom, IntelSLM
+ Others, IntelAtom, IntelSLM, IntelGLM
};
/// X86 processor family: Intel Atom, and others
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index a9f42cacf788..8d891c983fab 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -15,9 +15,6 @@
#include "X86.h"
#include "X86CallLowering.h"
#include "X86LegalizerInfo.h"
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-#include "X86RegisterBankInfo.h"
-#endif
#include "X86MacroFusion.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
@@ -31,7 +28,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExecutionDepsFix.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
@@ -212,35 +208,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
X86TargetMachine::~X86TargetMachine() = default;
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-namespace {
-
-struct X86GISelActualAccessor : public GISelAccessor {
- std::unique_ptr<CallLowering> CallLoweringInfo;
- std::unique_ptr<LegalizerInfo> Legalizer;
- std::unique_ptr<RegisterBankInfo> RegBankInfo;
- std::unique_ptr<InstructionSelector> InstSelector;
-
- const CallLowering *getCallLowering() const override {
- return CallLoweringInfo.get();
- }
-
- const InstructionSelector *getInstructionSelector() const override {
- return InstSelector.get();
- }
-
- const LegalizerInfo *getLegalizerInfo() const override {
- return Legalizer.get();
- }
-
- const RegisterBankInfo *getRegBankInfo() const override {
- return RegBankInfo.get();
- }
-};
-
-} // end anonymous namespace
-#endif
-
const X86Subtarget *
X86TargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
@@ -280,20 +247,6 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
resetTargetOptions(F);
I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this,
Options.StackAlignmentOverride);
-#ifndef LLVM_BUILD_GLOBAL_ISEL
- GISelAccessor *GISel = new GISelAccessor();
-#else
- X86GISelActualAccessor *GISel = new X86GISelActualAccessor();
-
- GISel->CallLoweringInfo.reset(new X86CallLowering(*I->getTargetLowering()));
- GISel->Legalizer.reset(new X86LegalizerInfo(*I, *this));
-
- auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo());
- GISel->RegBankInfo.reset(RBI);
- GISel->InstSelector.reset(createX86InstructionSelector(
- *this, *I, *RBI));
-#endif
- I->setGISelAccessor(*GISel);
}
return I.get();
}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 1bf267d34ec2..aaa6d58bd134 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -40,6 +40,7 @@ public:
~X86TargetMachine() override;
const X86Subtarget *getSubtargetImpl(const Function &F) const override;
+ const X86Subtarget *getSubtargetImpl() const = delete;
TargetIRAnalysis getTargetIRAnalysis() override;
diff --git a/lib/Transforms/Coroutines/CoroInstr.h b/lib/Transforms/Coroutines/CoroInstr.h
index 5c666bdfea1f..9a8cc5a2591c 100644
--- a/lib/Transforms/Coroutines/CoroInstr.h
+++ b/lib/Transforms/Coroutines/CoroInstr.h
@@ -58,10 +58,10 @@ public:
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_subfn_addr;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -70,10 +70,10 @@ public:
class LLVM_LIBRARY_VISIBILITY CoroAllocInst : public IntrinsicInst {
public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_alloc;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -175,10 +175,10 @@ public:
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_id;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -187,10 +187,10 @@ public:
class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst {
public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_frame;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -203,10 +203,10 @@ public:
Value *getFrame() const { return getArgOperand(FrameArg); }
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_free;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -221,10 +221,10 @@ public:
Value *getMem() const { return getArgOperand(MemArg); }
// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_begin;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -233,10 +233,10 @@ public:
class LLVM_LIBRARY_VISIBILITY CoroSaveInst : public IntrinsicInst {
public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_save;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -254,10 +254,10 @@ public:
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_promise;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -279,10 +279,10 @@ public:
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_suspend;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -291,10 +291,10 @@ public:
class LLVM_LIBRARY_VISIBILITY CoroSizeInst : public IntrinsicInst {
public:
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_size;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
@@ -310,10 +310,10 @@ public:
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const IntrinsicInst *I) {
+ static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_end;
}
- static inline bool classof(const Value *V) {
+ static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 087a8aa2c624..5b1b58b89c32 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -56,10 +56,6 @@ RunSLPVectorization("vectorize-slp", cl::Hidden,
cl::desc("Run the SLP vectorization passes"));
static cl::opt<bool>
-RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
- cl::desc("Run the BB vectorization passes"));
-
-static cl::opt<bool>
UseGVNAfterVectorization("use-gvn-after-vectorization",
cl::init(false), cl::Hidden,
cl::desc("Run GVN instead of Early CSE after vectorization passes"));
@@ -138,8 +134,8 @@ static cl::opt<int> PreInlineThreshold(
"(default = 75)"));
static cl::opt<bool> EnableEarlyCSEMemSSA(
- "enable-earlycse-memssa", cl::init(false), cl::Hidden,
- cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = off)"));
+ "enable-earlycse-memssa", cl::init(true), cl::Hidden,
+ cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = on)"));
static cl::opt<bool> EnableGVNHoist(
"enable-gvn-hoist", cl::init(false), cl::Hidden,
@@ -166,7 +162,6 @@ PassManagerBuilder::PassManagerBuilder() {
Inliner = nullptr;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
- BBVectorize = RunBBVectorization;
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
RerollLoops = RunLoopRerolling;
@@ -263,11 +258,12 @@ void PassManagerBuilder::populateFunctionPassManager(
// Do PGO instrumentation generation or use pass as the option specified.
void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
- if (!EnablePGOInstrGen && PGOInstrUse.empty())
+ if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
return;
// Perform the preinline and cleanup passes for O1 and above.
// And avoid doing them if optimizing for size.
- if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) {
+ if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner &&
+ PGOSampleUse.empty()) {
// Create preinline pass. We construct an InlineParams object and specify
// the threshold here to avoid the command line options of the regular
// inliner to influence pre-inlining. The only fields of InlineParams we
@@ -383,26 +379,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (RerollLoops)
MPM.add(createLoopRerollPass());
- if (!RunSLPAfterLoopVectorization) {
- if (SLPVectorize)
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
-
- if (BBVectorize) {
- MPM.add(createBBVectorizePass());
- addInstructionCombiningPass(MPM);
- addExtensionsToPM(EP_Peephole, MPM);
- if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(NewGVN
- ? createNewGVNPass()
- : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
- else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
-
- // BBVectorize may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass(OptLevel));
- }
- }
+ if (!RunSLPAfterLoopVectorization && SLPVectorize)
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
@@ -634,28 +612,10 @@ void PassManagerBuilder::populateModulePassManager(
addInstructionCombiningPass(MPM);
}
- if (RunSLPAfterLoopVectorization) {
- if (SLPVectorize) {
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
- if (OptLevel > 1 && ExtraVectorizerPasses) {
- MPM.add(createEarlyCSEPass());
- }
- }
-
- if (BBVectorize) {
- MPM.add(createBBVectorizePass());
- addInstructionCombiningPass(MPM);
- addExtensionsToPM(EP_Peephole, MPM);
- if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(NewGVN
- ? createNewGVNPass()
- : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
- else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
-
- // BBVectorize may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass(OptLevel));
+ if (RunSLPAfterLoopVectorization && SLPVectorize) {
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
+ MPM.add(createEarlyCSEPass());
}
}
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 656421ee58df..ac4765f96075 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -1484,7 +1484,8 @@ bool SampleProfileLoader::runOnFunction(Function &F) {
PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
ModuleAnalysisManager &AM) {
- SampleProfileLoader SampleLoader(SampleProfileFile);
+ SampleProfileLoader SampleLoader(
+ ProfileFileName.empty() ? SampleProfileFile : ProfileFileName);
SampleLoader.doInitialization(M);
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 802f470ffe1f..8d494fe9cde2 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -371,6 +371,7 @@ void splitAndWriteThinLTOBitcode(
/*GenerateHash=*/true, &ModHash);
W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
&MergedMIndex);
+ W.writeSymtab();
W.writeStrtab();
OS << Buffer;
@@ -385,6 +386,7 @@ void splitAndWriteThinLTOBitcode(
/*GenerateHash=*/false, &ModHash);
W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
&MergedMIndex);
+ W2.writeSymtab();
W2.writeStrtab();
*ThinLinkOS << Buffer;
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index d3d8cefe9735..db98be2c98f5 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2301,10 +2301,10 @@ static Instruction *foldXorToXor(BinaryOperator &I) {
// (~B | A) ^ (~A | B) -> A ^ B
// (~A | B) ^ (A | ~B) -> A ^ B
// (B | ~A) ^ (A | ~B) -> A ^ B
- if ((match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) ||
- (match(Op0, m_c_Or(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B)))))) {
+ if ((match(Op0, m_Or(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B)))) ||
+ (match(Op0, m_Or(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_c_Or(m_Specific(A), m_Not(m_Specific(B)))))) {
I.setOperand(0, A);
I.setOperand(1, B);
return &I;
@@ -2314,10 +2314,10 @@ static Instruction *foldXorToXor(BinaryOperator &I) {
// (~B & A) ^ (~A & B) -> A ^ B
// (~A & B) ^ (A & ~B) -> A ^ B
// (B & ~A) ^ (A & ~B) -> A ^ B
- if ((match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B)))) ||
- (match(Op0, m_c_And(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B)))))) {
+ if ((match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))) ||
+ (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))))) {
I.setOperand(0, A);
I.setOperand(1, B);
return &I;
@@ -2456,10 +2456,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
- // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+ // not (cmp A, B) = !cmp A, B
ICmpInst::Predicate Pred;
- if (match(Op0, m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))) &&
- match(Op1, m_AllOnes())) {
+ if (match(&I, m_Not(m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))))) {
cast<CmpInst>(Op0)->setPredicate(CmpInst::getInversePredicate(Pred));
return replaceInstUsesWith(I, Op0);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index dbed7ad4eae8..3770021de100 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1985,7 +1985,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *X = nullptr;
// bitreverse(bitreverse(x)) -> x
- if (match(IIOperand, m_Intrinsic<Intrinsic::bitreverse>(m_Value(X))))
+ if (match(IIOperand, m_BitReverse(m_Value(X))))
return replaceInstUsesWith(CI, X);
break;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 6ad32490a328..58b8b2f52629 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -112,10 +112,10 @@ static bool subWithOverflow(Constant *&Result, Constant *In1,
/// Given an icmp instruction, return true if any use of this comparison is a
/// branch on sign bit comparison.
-static bool isBranchOnSignBitCheck(ICmpInst &I, bool isSignBit) {
+static bool hasBranchUse(ICmpInst &I) {
for (auto *U : I.users())
if (isa<BranchInst>(U))
- return isSignBit;
+ return true;
return false;
}
@@ -1448,12 +1448,13 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
// of a test and branch. So we avoid canonicalizing in such situations
// because test and branch instruction has better branch displacement
// than compare and branch instruction.
- if (!isBranchOnSignBitCheck(Cmp, IsSignBit) && !Cmp.isEquality()) {
- if (auto *AI = Intersection.getSingleElement())
- return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder->getInt(*AI));
- if (auto *AD = Difference.getSingleElement())
- return new ICmpInst(ICmpInst::ICMP_NE, X, Builder->getInt(*AD));
- }
+ if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
+ return nullptr;
+
+ if (auto *AI = Intersection.getSingleElement())
+ return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder->getInt(*AI));
+ if (auto *AD = Difference.getSingleElement())
+ return new ICmpInst(ICmpInst::ICMP_NE, X, Builder->getInt(*AD));
}
return nullptr;
@@ -3301,12 +3302,12 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
return nullptr;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ const CmpInst::Predicate Pred = I.getPredicate();
Value *A, *B, *C, *D;
if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
Value *OtherVal = A == Op1 ? B : A;
- return new ICmpInst(I.getPredicate(), OtherVal,
- Constant::getNullValue(A->getType()));
+ return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
}
if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
@@ -3316,26 +3317,25 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
Op1->hasOneUse()) {
Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue());
Value *Xor = Builder->CreateXor(C, NC);
- return new ICmpInst(I.getPredicate(), A, Xor);
+ return new ICmpInst(Pred, A, Xor);
}
// A^B == A^D -> B == D
if (A == C)
- return new ICmpInst(I.getPredicate(), B, D);
+ return new ICmpInst(Pred, B, D);
if (A == D)
- return new ICmpInst(I.getPredicate(), B, C);
+ return new ICmpInst(Pred, B, C);
if (B == C)
- return new ICmpInst(I.getPredicate(), A, D);
+ return new ICmpInst(Pred, A, D);
if (B == D)
- return new ICmpInst(I.getPredicate(), A, C);
+ return new ICmpInst(Pred, A, C);
}
}
if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
// A == (A^B) -> B == 0
Value *OtherVal = A == Op0 ? B : A;
- return new ICmpInst(I.getPredicate(), OtherVal,
- Constant::getNullValue(A->getType()));
+ return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
}
// (X&Z) == (Y&Z) -> (X^Y) & Z == 0
@@ -3380,8 +3380,7 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
APInt Pow2 = Cst1->getValue() + 1;
if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
- return new ICmpInst(I.getPredicate(), A,
- Builder->CreateTrunc(B, A->getType()));
+ return new ICmpInst(Pred, A, Builder->CreateTrunc(B, A->getType()));
}
// (A >> C) == (B >> C) --> (A^B) u< (1 << C)
@@ -3393,12 +3392,11 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
unsigned TypeBits = Cst1->getBitWidth();
unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
if (ShAmt < TypeBits && ShAmt != 0) {
- ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE
- ? ICmpInst::ICMP_UGE
- : ICmpInst::ICMP_ULT;
+ ICmpInst::Predicate NewPred =
+ Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
- return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal));
+ return new ICmpInst(NewPred, Xor, Builder->getInt(CmpVal));
}
}
@@ -3412,8 +3410,7 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
I.getName() + ".mask");
- return new ICmpInst(I.getPredicate(), And,
- Constant::getNullValue(Cst1->getType()));
+ return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType()));
}
}
@@ -3437,7 +3434,7 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
CmpV <<= ShAmt;
Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
- return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
+ return new ICmpInst(Pred, Mask, Builder->getInt(CmpV));
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 1b0fe84dd4dd..87f11467b95e 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -131,11 +131,10 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
return true;
// A vector of constant integers can be inverted easily.
- Constant *CV;
- if (V->getType()->isVectorTy() && match(V, PatternMatch::m_Constant(CV))) {
+ if (V->getType()->isVectorTy() && isa<Constant>(V)) {
unsigned NumElts = V->getType()->getVectorNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
- Constant *Elt = CV->getAggregateElement(i);
+ Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
if (!Elt)
return false;
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index ca370c73fca4..26bee204e5a4 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -661,6 +661,9 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
if (NumElements == 1) {
LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
".unpack");
+ AAMDNodes AAMD;
+ LI.getAAMetadata(AAMD);
+ NewLoad->setAAMetadata(AAMD);
return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
UndefValue::get(T), NewLoad, 0, Name));
}
@@ -690,6 +693,10 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
Name + ".elt");
auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
auto *L = IC.Builder->CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
+ // Propagate AA metadata. It'll still be valid on the narrowed load.
+ AAMDNodes AAMD;
+ LI.getAAMetadata(AAMD);
+ L->setAAMetadata(AAMD);
V = IC.Builder->CreateInsertValue(V, L, i);
}
@@ -702,6 +709,9 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
auto NumElements = AT->getNumElements();
if (NumElements == 1) {
LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
+ AAMDNodes AAMD;
+ LI.getAAMetadata(AAMD);
+ NewLoad->setAAMetadata(AAMD);
return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
UndefValue::get(T), NewLoad, 0, Name));
}
@@ -734,6 +744,9 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
Name + ".elt");
auto *L = IC.Builder->CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
Name + ".unpack");
+ AAMDNodes AAMD;
+ LI.getAAMetadata(AAMD);
+ L->setAAMetadata(AAMD);
V = IC.Builder->CreateInsertValue(V, L, i);
Offset += EltSize;
}
@@ -1192,7 +1205,11 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
AddrName);
auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
- IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+ llvm::Instruction *NS =
+ IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+ AAMDNodes AAMD;
+ SI.getAAMetadata(AAMD);
+ NS->setAAMetadata(AAMD);
}
return true;
@@ -1239,7 +1256,10 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
AddrName);
auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
auto EltAlign = MinAlign(Align, Offset);
- IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+ Instruction *NS = IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+ AAMDNodes AAMD;
+ SI.getAAMetadata(AAMD);
+ NS->setAAMetadata(AAMD);
Offset += EltSize;
}
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 365c4ba75154..579639a6194e 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -227,8 +227,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (I.hasNoUnsignedWrap())
Shl->setHasNoUnsignedWrap();
if (I.hasNoSignedWrap()) {
- uint64_t V;
- if (match(NewCst, m_ConstantInt(V)) && V != Width - 1)
+ const APInt *V;
+ if (match(NewCst, m_APInt(V)) && *V != Width - 1)
Shl->setHasNoSignedWrap();
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 33951e66497a..80c6595904e1 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1167,6 +1167,23 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *I = canonicalizeSelectToShuffle(SI))
return I;
+ // Canonicalize a one-use integer compare with a non-canonical predicate by
+ // inverting the predicate and swapping the select operands. This matches a
+ // compare canonicalization for conditional branches.
+ // TODO: Should we do the same for FP compares?
+ CmpInst::Predicate Pred;
+ if (match(CondVal, m_OneUse(m_ICmp(Pred, m_Value(), m_Value()))) &&
+ !isCanonicalPredicate(Pred)) {
+ // Swap true/false values and condition.
+ CmpInst *Cond = cast<CmpInst>(CondVal);
+ Cond->setPredicate(CmpInst::getInversePredicate(Pred));
+ SI.setOperand(1, FalseVal);
+ SI.setOperand(2, TrueVal);
+ SI.swapProfMetadata();
+ Worklist.Add(Cond);
+ return &SI;
+ }
+
if (SelType->getScalarType()->isIntegerTy(1) &&
TrueVal->getType() == CondVal->getType()) {
if (match(TrueVal, m_One())) {
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 02fac4fb37a4..723414635d6f 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2425,9 +2425,15 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
Builder->SetInsertPoint(L);
Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
L->getPointerOperand(), Indices);
+ Instruction *NL = Builder->CreateLoad(GEP);
+ // Whatever aliasing information we had for the orignal load must also
+ // hold for the smaller load, so propagate the annotations.
+ AAMDNodes Nodes;
+ L->getAAMetadata(Nodes);
+ NL->setAAMetadata(Nodes);
// Returning the load directly will cause the main loop to insert it in
// the wrong spot, so use replaceInstUsesWith().
- return replaceInstUsesWith(EV, Builder->CreateLoad(GEP));
+ return replaceInstUsesWith(EV, NL);
}
// We could simplify extracts from other values. Note that nested extracts may
// already be simplified implicitly by the above: extract (extract (insert) )
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index c3810366bf22..a49c9b68c97d 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -38,6 +38,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -340,6 +341,49 @@ void ConstantHoistingPass::collectConstantCandidates(
}
}
+
+/// \brief Check the operand for instruction Inst at index Idx.
+void ConstantHoistingPass::collectConstantCandidates(
+ ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx) {
+ Value *Opnd = Inst->getOperand(Idx);
+
+ // Visit constant integers.
+ if (auto ConstInt = dyn_cast<ConstantInt>(Opnd)) {
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
+ return;
+ }
+
+ // Visit cast instructions that have constant integers.
+ if (auto CastInst = dyn_cast<Instruction>(Opnd)) {
+ // Only visit cast instructions, which have been skipped. All other
+ // instructions should have already been visited.
+ if (!CastInst->isCast())
+ return;
+
+ if (auto *ConstInt = dyn_cast<ConstantInt>(CastInst->getOperand(0))) {
+ // Pretend the constant is directly used by the instruction and ignore
+ // the cast instruction.
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
+ return;
+ }
+ }
+
+ // Visit constant expressions that have constant integers.
+ if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
+ // Only visit constant cast expressions.
+ if (!ConstExpr->isCast())
+ return;
+
+ if (auto ConstInt = dyn_cast<ConstantInt>(ConstExpr->getOperand(0))) {
+ // Pretend the constant is directly used by the instruction and ignore
+ // the constant expression.
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
+ return;
+ }
+ }
+}
+
+
/// \brief Scan the instruction for expensive integer constants and record them
/// in the constant candidate vector.
void ConstantHoistingPass::collectConstantCandidates(
@@ -365,44 +409,25 @@ void ConstantHoistingPass::collectConstantCandidates(
if (AI && AI->isStaticAlloca())
return;
- // Scan all operands.
- for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
- Value *Opnd = Inst->getOperand(Idx);
-
- // Visit constant integers.
- if (auto ConstInt = dyn_cast<ConstantInt>(Opnd)) {
- collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
- continue;
- }
-
- // Visit cast instructions that have constant integers.
- if (auto CastInst = dyn_cast<Instruction>(Opnd)) {
- // Only visit cast instructions, which have been skipped. All other
- // instructions should have already been visited.
- if (!CastInst->isCast())
- continue;
-
- if (auto *ConstInt = dyn_cast<ConstantInt>(CastInst->getOperand(0))) {
- // Pretend the constant is directly used by the instruction and ignore
- // the cast instruction.
- collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
- continue;
+ // Constants in GEPs that index into a struct type should not be hoisted.
+ if (isa<GetElementPtrInst>(Inst)) {
+ gep_type_iterator GTI = gep_type_begin(Inst);
+
+ // Collect constant for first operand.
+ collectConstantCandidates(ConstCandMap, Inst, 0);
+ // Scan rest operands.
+ for (unsigned Idx = 1, E = Inst->getNumOperands(); Idx != E; ++Idx, ++GTI) {
+ // Only collect constants that index into a non struct type.
+ if (!GTI.isStruct()) {
+ collectConstantCandidates(ConstCandMap, Inst, Idx);
}
}
+ return;
+ }
- // Visit constant expressions that have constant integers.
- if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
- // Only visit constant cast expressions.
- if (!ConstExpr->isCast())
- continue;
-
- if (auto ConstInt = dyn_cast<ConstantInt>(ConstExpr->getOperand(0))) {
- // Pretend the constant is directly used by the instruction and ignore
- // the constant expression.
- collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
- continue;
- }
- }
+ // Scan all operands.
+ for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
+ collectConstantCandidates(ConstCandMap, Inst, Idx);
} // end of for all operands
}
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 2f96c3064b86..a40c22c3fce9 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -917,7 +917,6 @@ LoopConstrainer::calculateSubRanges() const {
// I think we can be more aggressive here and make this nuw / nsw if the
// addition that feeds into the icmp for the latch's terminating branch is nuw
// / nsw. In any case, a wrapping 2's complement addition is safe.
- ConstantInt *One = ConstantInt::get(Ty, 1);
const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart);
const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);
@@ -948,8 +947,9 @@ LoopConstrainer::calculateSubRanges() const {
// will be an empty range. Returning an empty range is always safe.
//
- Smallest = SE.getAddExpr(End, SE.getSCEV(One));
- Greatest = SE.getAddExpr(Start, SE.getSCEV(One));
+ const SCEV *One = SE.getOne(Ty);
+ Smallest = SE.getAddExpr(End, One);
+ Greatest = SE.getAddExpr(Start, One);
}
auto Clamp = [this, Smallest, Greatest](const SCEV *S) {
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 62aa6ee48069..530a68424d5c 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -131,7 +131,7 @@ static const unsigned NoThreshold = UINT_MAX;
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
- Loop *L, const TargetTransformInfo &TTI, int OptLevel,
+ Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound) {
@@ -158,7 +158,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.AllowPeeling = true;
// Override with any target specific settings
- TTI.getUnrollingPreferences(L, UP);
+ TTI.getUnrollingPreferences(L, SE, UP);
// Apply size attributes
if (L->getHeader()->getParent()->optForSize()) {
@@ -699,7 +699,7 @@ static uint64_t getUnrolledLoopSize(
// Calculates unroll count and writes it to UP.Count.
static bool computeUnrollCount(
Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,
+ ScalarEvolution &SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,
unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
// Check for explicit Count.
@@ -770,7 +770,7 @@ static bool computeUnrollCount(
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, FullUnrollTripCount, DT, *SE, TTI,
+ L, FullUnrollTripCount, DT, SE, TTI,
UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
unsigned Boost =
getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
@@ -836,6 +836,8 @@ static bool computeUnrollCount(
} else {
UP.Count = TripCount;
}
+ if (UP.Count > UP.MaxCount)
+ UP.Count = UP.MaxCount;
if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
UP.Count != TripCount)
ORE->emit(
@@ -926,7 +928,7 @@ static bool computeUnrollCount(
}
static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE, const TargetTransformInfo &TTI,
+ ScalarEvolution &SE, const TargetTransformInfo &TTI,
AssumptionCache &AC, OptimizationRemarkEmitter &ORE,
bool PreserveLCSSA, int OptLevel,
Optional<unsigned> ProvidedCount,
@@ -948,8 +950,8 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
bool NotDuplicatable;
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, TTI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
- ProvidedRuntime, ProvidedUpperBound);
+ L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
+ ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound);
// Exit early if unrolling is disabled.
if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
return false;
@@ -977,8 +979,8 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
ExitingBlock = L->getExitingBlock();
if (ExitingBlock) {
- TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
- TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+ TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
+ TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
}
// If the loop contains a convergent operation, the prelude we'd add
@@ -1000,8 +1002,8 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
// count.
bool MaxOrZero = false;
if (!TripCount) {
- MaxTripCount = SE->getSmallConstantMaxTripCount(L);
- MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
+ MaxTripCount = SE.getSmallConstantMaxTripCount(L);
+ MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
// We can unroll by the upper bound amount if it's generally allowed or if
// we know that the loop is executed either the upper bound or zero times.
// (MaxOrZero unrolling keeps only the first loop test, so the number of
@@ -1030,7 +1032,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
// Unroll the loop.
if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
- TripMultiple, UP.PeelCount, LI, SE, &DT, &AC, &ORE,
+ TripMultiple, UP.PeelCount, LI, &SE, &DT, &AC, &ORE,
PreserveLCSSA))
return false;
@@ -1073,7 +1075,7 @@ public:
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -1157,7 +1159,7 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
if (!AllowPartialUnrolling)
AllowPartialParam = RuntimeParam = UpperBoundParam = false;
bool Changed = tryToUnrollLoop(
- &L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
+ &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
/*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
/*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam);
if (!Changed)
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 7a7624f77542..9cf01c6582b5 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -2423,8 +2423,7 @@ void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB,
AllTempInstructions.insert(Op);
PHIOfOpsPHIs[BB].push_back(Op);
TempToBlock[Op] = BB;
- if (ExistingValue)
- RealToTemp[ExistingValue] = Op;
+ RealToTemp[ExistingValue] = Op;
}
static bool okayForPHIOfOps(const Instruction *I) {
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 6da551bd7efd..cdba0062953f 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1894,6 +1894,8 @@ void ReassociatePass::EraseInst(Instruction *I) {
Op = Op->user_back();
RedoInsts.insert(Op);
}
+
+ MadeChange = true;
}
// Canonicalize expressions of the following form:
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index a52739bb76f7..a73e9aec0617 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1954,7 +1954,7 @@ static void rematerializeLiveValues(CallSite CS,
// to identify the newly generated AlternateRootPhi (.base version of phi)
// and RootOfChain (the original phi node itself) are the same, so that we
// can rematerialize the gep and casts. This is a workaround for the
- // deficieny in the findBasePointer algorithm.
+ // deficiency in the findBasePointer algorithm.
if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi))
continue;
// Now that the phi nodes are proved to be the same, assert that
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 80fbbeb6829b..4729f4ef5956 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2402,9 +2402,20 @@ private:
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ // Any !nonnull metadata or !range metadata on the old load is also valid
+ // on the new load. This is even true in some cases even when the loads
+ // are different types, for example by mapping !nonnull metadata to
+ // !range metadata by modeling the null pointer constant converted to the
+ // integer type.
+ // FIXME: Add support for range metadata here. Currently the utilities
+ // for this don't propagate range metadata in trivial cases from one
+ // integer load to another, don't handle non-addrspace-0 null pointers
+ // correctly, and don't have any support for mapping ranges as the
+ // integer type becomes winder or narrower.
+ if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
+ copyNonnullMetadata(LI, N, *NewLI);
+
// Try to preserve nonnull metadata
- if (TargetTy->isPointerTy())
- NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
V = NewLI;
// If this is an integer load past the end of the slice (which means the
@@ -3580,10 +3591,11 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
int Idx = 0, Size = Offsets.Splits.size();
for (;;) {
auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
- auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
+ auto AS = LI->getPointerAddressSpace();
+ auto *PartPtrTy = PartTy->getPointerTo(AS);
LoadInst *PLoad = IRB.CreateAlignedLoad(
getAdjustedPtr(IRB, DL, BasePtr,
- APInt(DL.getPointerSizeInBits(), PartOffset),
+ APInt(DL.getPointerSizeInBits(AS), PartOffset),
PartPtrTy, BasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 5d57ed9718fb..30d8856cfbef 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -59,6 +59,33 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
// Landing pads must be in the function where they were inserted for cleanup.
if (BB.isEHPad())
return false;
+ // taking the address of a basic block moved to another function is illegal
+ if (BB.hasAddressTaken())
+ return false;
+
+ // don't hoist code that uses another basicblock address, as it's likely to
+ // lead to unexpected behavior, like cross-function jumps
+ SmallPtrSet<User const *, 16> Visited;
+ SmallVector<User const *, 16> ToVisit;
+
+ for (Instruction const &Inst : BB)
+ ToVisit.push_back(&Inst);
+
+ while (!ToVisit.empty()) {
+ User const *Curr = ToVisit.pop_back_val();
+ if (!Visited.insert(Curr).second)
+ continue;
+ if (isa<BlockAddress const>(Curr))
+ return false; // even a reference to self is likely to be not compatible
+
+ if (isa<Instruction>(Curr) && cast<Instruction>(Curr)->getParent() != &BB)
+ continue;
+
+ for (auto const &U : Curr->operands()) {
+ if (auto *UU = dyn_cast<User>(U))
+ ToVisit.push_back(UU);
+ }
+ }
// Don't hoist code containing allocas, invokes, or vastarts.
for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 5f85e17927fa..9ad2b707e6b2 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -36,6 +36,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <algorithm>
@@ -45,6 +46,10 @@ using namespace llvm;
STATISTIC(NumRuntimeUnrolled,
"Number of loops unrolled with run-time trip counts");
+static cl::opt<bool> UnrollRuntimeMultiExit(
+ "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
+ cl::desc("Allow runtime unrolling for loops with multiple exits, when "
+ "epilog is generated"));
/// Connect the unrolling prolog code to the original loop.
/// The unrolling prolog code contains code to execute the
@@ -285,15 +290,13 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
/// The cloned blocks should be inserted between InsertTop and InsertBot.
/// If loop structure is cloned InsertTop should be new preheader, InsertBot
/// new loop exit.
-///
-static void CloneLoopBlocks(Loop *L, Value *NewIter,
- const bool CreateRemainderLoop,
- const bool UseEpilogRemainder,
- BasicBlock *InsertTop, BasicBlock *InsertBot,
- BasicBlock *Preheader,
- std::vector<BasicBlock *> &NewBlocks,
- LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
- DominatorTree *DT, LoopInfo *LI) {
+/// Return the new cloned loop that is created when CreateRemainderLoop is true.
+static Loop *
+CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
+ const bool UseEpilogRemainder, BasicBlock *InsertTop,
+ BasicBlock *InsertBot, BasicBlock *Preheader,
+ std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
@@ -418,7 +421,10 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
// Set operand 0 to refer to the loop id itself.
NewLoopID->replaceOperandWith(0, NewLoopID);
NewLoop->setLoopID(NewLoopID);
+ return NewLoop;
}
+ else
+ return nullptr;
}
/// Insert code in the prolog/epilog code when unrolling a loop with a
@@ -465,29 +471,52 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
LoopInfo *LI, ScalarEvolution *SE,
DominatorTree *DT, bool PreserveLCSSA) {
// for now, only unroll loops that contain a single exit
- if (!L->getExitingBlock())
+ if (!UnrollRuntimeMultiExit && !L->getExitingBlock())
return false;
- // Make sure the loop is in canonical form, and there is a single
- // exit block only.
+ // Make sure the loop is in canonical form.
if (!L->isLoopSimplifyForm())
return false;
// Guaranteed by LoopSimplifyForm.
BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *Header = L->getHeader();
BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop
- if (!LatchExit)
+ if (!LatchExit && !UnrollRuntimeMultiExit)
return false;
+ // These are exit blocks other than the target of the latch exiting block.
+ SmallVector<BasicBlock *, 4> OtherExits;
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ unsigned int ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
// Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
- // targets of the Latch be the single exit block out of the loop. This needs
+ // targets of the Latch be an exit block out of the loop. This needs
// to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
- BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- assert((LatchBR->getSuccessor(0) == LatchExit ||
- LatchBR->getSuccessor(1) == LatchExit) &&
- "one of the loop latch successors should be "
- "the exit block!");
- (void)LatchBR;
+ assert(!L->contains(LatchBR->getSuccessor(ExitIndex)) &&
+ "one of the loop latch successors should be the exit block!");
+ // Support runtime unrolling for multiple exit blocks and multiple exiting
+ // blocks.
+ if (!LatchExit) {
+ assert(UseEpilogRemainder && "Multi exit unrolling is currently supported "
+ "unrolling with epilog remainder only!");
+ LatchExit = LatchBR->getSuccessor(ExitIndex);
+ // We rely on LCSSA form being preserved when the exit blocks are
+ // transformed.
+ if (!PreserveLCSSA)
+ return false;
+ // TODO: Support multiple exiting blocks jumping to the `LatchExit`. This
+ // will need updating the logic in connectEpilog.
+ if (!LatchExit->getSinglePredecessor())
+ return false;
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueExitBlocks(Exits);
+ for (auto *BB : Exits)
+ if (BB != LatchExit)
+ OtherExits.push_back(BB);
+ }
+
+ assert(LatchExit && "Latch Exit should exist!");
+
// Use Scalar Evolution to compute the trip count. This allows more loops to
// be unrolled than relying on induction var simplification.
if (!SE)
@@ -495,7 +524,11 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Only unroll loops with a computable trip count, and the trip count needs
// to be an int value (allowing a pointer type is a TODO item).
- const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
+ // We calculate the backedge count by using getExitCount on the Latch block,
+ // which is proven to be the only exiting block in this loop. This is same as
+ // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
+ // exiting blocks).
+ const SCEV *BECountSC = SE->getExitCount(L, Latch);
if (isa<SCEVCouldNotCompute>(BECountSC) ||
!BECountSC->getType()->isIntegerTy())
return false;
@@ -508,7 +541,6 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
if (isa<SCEVCouldNotCompute>(TripCountSC))
return false;
- BasicBlock *Header = L->getHeader();
BasicBlock *PreHeader = L->getLoopPreheader();
BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
const DataLayout &DL = Header->getModule()->getDataLayout();
@@ -650,8 +682,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// iterations. This function adds the appropriate CFG connections.
BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
- CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
- InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
+ Loop *remainderLoop = CloneLoopBlocks(
+ L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot,
+ NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
// Insert the cloned blocks into the function.
F->getBasicBlockList().splice(InsertBot->getIterator(),
@@ -659,6 +692,42 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
NewBlocks[0]->getIterator(),
F->end());
+ // Now the loop blocks are cloned and the other exiting blocks from the
+ // remainder are connected to the original Loop's exit blocks. The remaining
+ // work is to update the phi nodes in the original loop, and take in the
+ // values from the cloned region. Also update the dominator info for
+ // OtherExits, since we have new edges into OtherExits.
+ for (auto *BB : OtherExits) {
+ for (auto &II : *BB) {
+
+ // Given we preserve LCSSA form, we know that the values used outside the
+ // loop will be used through these phi nodes at the exit blocks that are
+ // transformed below.
+ if (!isa<PHINode>(II))
+ break;
+ PHINode *Phi = cast<PHINode>(&II);
+ unsigned oldNumOperands = Phi->getNumIncomingValues();
+ // Add the incoming values from the remainder code to the end of the phi
+ // node.
+ for (unsigned i =0; i < oldNumOperands; i++){
+ Value *newVal = VMap[Phi->getIncomingValue(i)];
+ if (!newVal) {
+ assert(isa<Constant>(Phi->getIncomingValue(i)) &&
+ "VMap should exist for all values except constants!");
+ newVal = Phi->getIncomingValue(i);
+ }
+ Phi->addIncoming(newVal,
+ cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
+ }
+ }
+ // Update the dominator info because the immediate dominator is no longer the
+ // header of the original Loop. BB has edges both from L and remainder code.
+ // Since the preheader determines which loop is run (L or directly jump to
+ // the remainder code), we set the immediate dominator as the preheader.
+ if (DT)
+ DT->changeImmediateDominator(BB, PreHeader);
+ }
+
// Loop structure should be the following:
// Epilog Prolog
//
@@ -721,6 +790,19 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
if (Loop *ParentLoop = L->getParentLoop())
SE->forgetLoop(ParentLoop);
+ // Canonicalize to LoopSimplifyForm both original and remainder loops. We
+ // cannot rely on the LoopUnrollPass to do this because it only does
+ // canonicalization for parent/subloops and not the sibling loops.
+ if (OtherExits.size() > 0) {
+ // Generate dedicated exit blocks for the original loop, to preserve
+ // LoopSimplifyForm.
+ formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
+ // Generate dedicated exit blocks for the remainder loop if one exists, to
+ // preserve LoopSimplifyForm.
+ if (remainderLoop)
+ formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
+ }
+
NumRuntimeUnrolled++;
return true;
}
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 0ed33945ef40..58b70be95d99 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -528,8 +528,9 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
return false;
}
-bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
- DominatorTree *DT) {
+bool RecurrenceDescriptor::isFirstOrderRecurrence(
+ PHINode *Phi, Loop *TheLoop,
+ DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
// Ensure the phi node is in the loop header and has two incoming values.
if (Phi->getParent() != TheLoop->getHeader() ||
@@ -551,12 +552,24 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
// Get the previous value. The previous value comes from the latch edge while
// the initial value comes form the preheader edge.
auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
- if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
+ if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
+ SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
return false;
// Ensure every user of the phi node is dominated by the previous value.
// The dominance requirement ensures the loop vectorizer will not need to
// vectorize the initial value prior to the first iteration of the loop.
+ // TODO: Consider extending this sinking to handle other kinds of instructions
+ // and expressions, beyond sinking a single cast past Previous.
+ if (Phi->hasOneUse()) {
+ auto *I = Phi->user_back();
+ if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
+ DT->dominates(Previous, I->user_back())) {
+ SinkAfter[I] = Previous;
+ return true;
+ }
+ }
+
for (User *U : Phi->users())
if (auto *I = dyn_cast<Instruction>(U)) {
if (!DT->dominates(Previous, I))
diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 0a51f9a0e4a2..1c2a60a6b8b2 100644
--- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -27,7 +27,6 @@ void llvm::createMemCpyLoop(Instruction *InsertBefore,
BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop",
F, NewBB);
- OrigBB->getTerminator()->setSuccessor(0, LoopBB);
IRBuilder<> Builder(OrigBB->getTerminator());
// SrcAddr and DstAddr are expected to be pointer types,
@@ -39,6 +38,11 @@ void llvm::createMemCpyLoop(Instruction *InsertBefore,
SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
+ Builder.CreateCondBr(
+ Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
+ LoopBB);
+ OrigBB->getTerminator()->eraseFromParent();
+
IRBuilder<> LoopBuilder(LoopBB);
PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
@@ -167,6 +171,7 @@ static void createMemMoveLoop(Instruction *InsertBefore,
static void createMemSetLoop(Instruction *InsertBefore,
Value *DstAddr, Value *CopyLen, Value *SetValue,
unsigned Align, bool IsVolatile) {
+ Type *TypeOfCopyLen = CopyLen->getType();
BasicBlock *OrigBB = InsertBefore->getParent();
Function *F = OrigBB->getParent();
BasicBlock *NewBB =
@@ -174,7 +179,6 @@ static void createMemSetLoop(Instruction *InsertBefore,
BasicBlock *LoopBB
= BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB);
- OrigBB->getTerminator()->setSuccessor(0, LoopBB);
IRBuilder<> Builder(OrigBB->getTerminator());
// Cast pointer to the type of value getting stored
@@ -182,9 +186,14 @@ static void createMemSetLoop(Instruction *InsertBefore,
DstAddr = Builder.CreateBitCast(DstAddr,
PointerType::get(SetValue->getType(), dstAS));
+ Builder.CreateCondBr(
+ Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
+ LoopBB);
+ OrigBB->getTerminator()->eraseFromParent();
+
IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0);
- LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
LoopBuilder.CreateStore(
SetValue,
@@ -192,7 +201,7 @@ static void createMemSetLoop(Instruction *InsertBefore,
IsVolatile);
Value *NewIndex =
- LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1));
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
LoopIndex->addIncoming(NewIndex, LoopBB);
LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
diff --git a/lib/Transforms/Utils/OrderedInstructions.cpp b/lib/Transforms/Utils/OrderedInstructions.cpp
index 2e67e0def5b9..dc780542ce68 100644
--- a/lib/Transforms/Utils/OrderedInstructions.cpp
+++ b/lib/Transforms/Utils/OrderedInstructions.cpp
@@ -27,7 +27,6 @@ bool OrderedInstructions::dominates(const Instruction *InstA,
if (OBB == OBBMap.end())
OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first;
return OBB->second->dominates(InstA, InstB);
- } else {
- return DT->dominates(InstA->getParent(), InstB->getParent());
}
+ return DT->dominates(InstA->getParent(), InstB->getParent());
}
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
index 1260e35e934d..d4cdaede6b86 100644
--- a/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -34,6 +33,7 @@
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/OrderedInstructions.h"
#include <algorithm>
#define DEBUG_TYPE "predicateinfo"
using namespace llvm;
@@ -106,14 +106,27 @@ struct ValueDFS {
bool EdgeOnly = false;
};
+// Perform a strict weak ordering on instructions and arguments.
+static bool valueComesBefore(OrderedInstructions &OI, const Value *A,
+ const Value *B) {
+ auto *ArgA = dyn_cast_or_null<Argument>(A);
+ auto *ArgB = dyn_cast_or_null<Argument>(B);
+ if (ArgA && !ArgB)
+ return true;
+ if (ArgB && !ArgA)
+ return false;
+ if (ArgA && ArgB)
+ return ArgA->getArgNo() < ArgB->getArgNo();
+ return OI.dominates(cast<Instruction>(A), cast<Instruction>(B));
+}
+
// This compares ValueDFS structures, creating OrderedBasicBlocks where
// necessary to compare uses/defs in the same block. Doing so allows us to walk
// the minimum number of instructions necessary to compute our def/use ordering.
struct ValueDFS_Compare {
- DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>> &OBBMap;
- ValueDFS_Compare(
- DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>> &OBBMap)
- : OBBMap(OBBMap) {}
+ OrderedInstructions &OI;
+ ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {}
+
bool operator()(const ValueDFS &A, const ValueDFS &B) const {
if (&A == &B)
return false;
@@ -196,23 +209,12 @@ struct ValueDFS_Compare {
auto *ArgA = dyn_cast_or_null<Argument>(ADef);
auto *ArgB = dyn_cast_or_null<Argument>(BDef);
- if (ArgA && !ArgB)
- return true;
- if (ArgB && !ArgA)
- return false;
- if (ArgA && ArgB)
- return ArgA->getArgNo() < ArgB->getArgNo();
+ if (ArgA || ArgB)
+ return valueComesBefore(OI, ArgA, ArgB);
auto *AInst = getDefOrUser(ADef, A.U);
auto *BInst = getDefOrUser(BDef, B.U);
-
- auto *BB = AInst->getParent();
- auto LookupResult = OBBMap.find(BB);
- if (LookupResult != OBBMap.end())
- return LookupResult->second->dominates(AInst, BInst);
-
- auto Result = OBBMap.insert({BB, make_unique<OrderedBasicBlock>(BB)});
- return Result.first->second->dominates(AInst, BInst);
+ return valueComesBefore(OI, AInst, BInst);
}
};
@@ -547,38 +549,11 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
// Sort OpsToRename since we are going to iterate it.
SmallVector<Value *, 8> OpsToRename(OpSet.begin(), OpSet.end());
- std::sort(OpsToRename.begin(), OpsToRename.end(), [&](const Value *A,
- const Value *B) {
- auto *ArgA = dyn_cast_or_null<Argument>(A);
- auto *ArgB = dyn_cast_or_null<Argument>(B);
-
- // If A and B are args, order them based on their arg no.
- if (ArgA && !ArgB)
- return true;
- if (ArgB && !ArgA)
- return false;
- if (ArgA && ArgB)
- return ArgA->getArgNo() < ArgB->getArgNo();
-
- // Else, A are B are instructions.
- // If they belong to different BBs, order them by the dominance of BBs.
- auto *AInst = cast<Instruction>(A);
- auto *BInst = cast<Instruction>(B);
- if (AInst->getParent() != BInst->getParent())
- return DT.dominates(AInst->getParent(), BInst->getParent());
-
- // Else, A and B belong to the same BB.
- // Order A and B by their dominance.
- auto *BB = AInst->getParent();
- auto LookupResult = OBBMap.find(BB);
- if (LookupResult != OBBMap.end())
- return LookupResult->second->dominates(AInst, BInst);
-
- auto Result = OBBMap.insert({BB, make_unique<OrderedBasicBlock>(BB)});
- return Result.first->second->dominates(AInst, BInst);
- });
-
- ValueDFS_Compare Compare(OBBMap);
+ auto Comparator = [&](const Value *A, const Value *B) {
+ return valueComesBefore(OI, A, B);
+ };
+ std::sort(OpsToRename.begin(), OpsToRename.end(), Comparator);
+ ValueDFS_Compare Compare(OI);
// Compute liveness, and rename in O(uses) per Op.
for (auto *Op : OpsToRename) {
unsigned Counter = 0;
@@ -715,7 +690,7 @@ PredicateInfo::getValueInfo(Value *Operand) const {
PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
AssumptionCache &AC)
- : F(F), DT(DT), AC(AC) {
+ : F(F), DT(DT), AC(AC), OI(&DT) {
// Push an empty operand info so that we can detect 0 as not finding one
ValueInfos.resize(1);
buildPredicateInfo();
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 0970c436e665..e724b0a28c32 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -4781,7 +4781,7 @@ public:
SwitchLookupTable(
Module &M, uint64_t TableSize, ConstantInt *Offset,
const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
- Constant *DefaultValue, const DataLayout &DL);
+ Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
/// Build instructions with Builder to retrieve the value at
/// the position given by Index in the lookup table.
@@ -4835,7 +4835,7 @@ private:
SwitchLookupTable::SwitchLookupTable(
Module &M, uint64_t TableSize, ConstantInt *Offset,
const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
- Constant *DefaultValue, const DataLayout &DL)
+ Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
: SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) {
assert(Values.size() && "Can't build lookup table without values!");
@@ -4943,7 +4943,7 @@ SwitchLookupTable::SwitchLookupTable(
Array = new GlobalVariable(M, ArrayTy, /*constant=*/true,
GlobalVariable::PrivateLinkage, Initializer,
- "switch.table");
+ "switch.table." + FuncName);
Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
Kind = ArrayKind;
}
@@ -5333,7 +5333,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If using a bitmask, use any value to fill the lookup table holes.
Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
- SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL);
+ StringRef FuncName = SI->getParent()->getParent()->getName();
+ SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
+ FuncName);
Value *Result = Table.BuildLookup(TableIndex, Builder);
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index faa14046b1e3..ec8b0d426265 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -354,7 +354,7 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned);
typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
- const SCEV *, Type *);
+ const SCEV *, Type *, unsigned);
OperationFunctionTy Operation;
ExtensionFunctionTy Extension;
@@ -406,11 +406,11 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
const SCEV *A =
- (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0u),
- WideTy);
+ (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0),
+ WideTy, 0);
const SCEV *B =
- (SE->*Operation)((SE->*Extension)(LHS, WideTy),
- (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap, 0u);
+ (SE->*Operation)((SE->*Extension)(LHS, WideTy, 0),
+ (SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0);
if (A != B)
return false;
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
deleted file mode 100644
index 78453aaa16ce..000000000000
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ /dev/null
@@ -1,3282 +0,0 @@
-//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a basic-block vectorization pass. The algorithm was
-// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral,
-// et al. It works by looking for chains of pairable operations and then
-// pairing them.
-//
-//===----------------------------------------------------------------------===//
-
-#define BBV_NAME "bb-vectorize"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Vectorize.h"
-#include <algorithm>
-using namespace llvm;
-
-#define DEBUG_TYPE BBV_NAME
-
-static cl::opt<bool>
-IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false),
- cl::Hidden, cl::desc("Ignore target information"));
-
-static cl::opt<unsigned>
-ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden,
- cl::desc("The required chain depth for vectorization"));
-
-static cl::opt<bool>
-UseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false),
- cl::Hidden, cl::desc("Use the chain depth requirement with"
- " target information"));
-
-static cl::opt<unsigned>
-SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden,
- cl::desc("The maximum search distance for instruction pairs"));
-
-static cl::opt<bool>
-SplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden,
- cl::desc("Replicating one element to a pair breaks the chain"));
-
-static cl::opt<unsigned>
-VectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden,
- cl::desc("The size of the native vector registers"));
-
-static cl::opt<unsigned>
-MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden,
- cl::desc("The maximum number of pairing iterations"));
-
-static cl::opt<bool>
-Pow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden,
- cl::desc("Don't try to form non-2^n-length vectors"));
-
-static cl::opt<unsigned>
-MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden,
- cl::desc("The maximum number of pairable instructions per group"));
-
-static cl::opt<unsigned>
-MaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden,
- cl::desc("The maximum number of candidate instruction pairs per group"));
-
-static cl::opt<unsigned>
-MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200),
- cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"
- " a full cycle check"));
-
-static cl::opt<bool>
-NoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize boolean (i1) values"));
-
-static cl::opt<bool>
-NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize integer values"));
-
-static cl::opt<bool>
-NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize floating-point values"));
-
-// FIXME: This should default to false once pointer vector support works.
-static cl::opt<bool>
-NoPointers("bb-vectorize-no-pointers", cl::init(/*false*/ true), cl::Hidden,
- cl::desc("Don't try to vectorize pointer values"));
-
-static cl::opt<bool>
-NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize casting (conversion) operations"));
-
-static cl::opt<bool>
-NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize floating-point math intrinsics"));
-
-static cl::opt<bool>
- NoBitManipulation("bb-vectorize-no-bitmanip", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize BitManipulation intrinsics"));
-
-static cl::opt<bool>
-NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"));
-
-static cl::opt<bool>
-NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize select instructions"));
-
-static cl::opt<bool>
-NoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize comparison instructions"));
-
-static cl::opt<bool>
-NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize getelementptr instructions"));
-
-static cl::opt<bool>
-NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden,
- cl::desc("Don't try to vectorize loads and stores"));
-
-static cl::opt<bool>
-AlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden,
- cl::desc("Only generate aligned loads and stores"));
-
-static cl::opt<bool>
-NoMemOpBoost("bb-vectorize-no-mem-op-boost",
- cl::init(false), cl::Hidden,
- cl::desc("Don't boost the chain-depth contribution of loads and stores"));
-
-static cl::opt<bool>
-FastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden,
- cl::desc("Use a fast instruction dependency analysis"));
-
-#ifndef NDEBUG
-static cl::opt<bool>
-DebugInstructionExamination("bb-vectorize-debug-instruction-examination",
- cl::init(false), cl::Hidden,
- cl::desc("When debugging is enabled, output information on the"
- " instruction-examination process"));
-static cl::opt<bool>
-DebugCandidateSelection("bb-vectorize-debug-candidate-selection",
- cl::init(false), cl::Hidden,
- cl::desc("When debugging is enabled, output information on the"
- " candidate-selection process"));
-static cl::opt<bool>
-DebugPairSelection("bb-vectorize-debug-pair-selection",
- cl::init(false), cl::Hidden,
- cl::desc("When debugging is enabled, output information on the"
- " pair-selection process"));
-static cl::opt<bool>
-DebugCycleCheck("bb-vectorize-debug-cycle-check",
- cl::init(false), cl::Hidden,
- cl::desc("When debugging is enabled, output information on the"
- " cycle-checking process"));
-
-static cl::opt<bool>
-PrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair",
- cl::init(false), cl::Hidden,
- cl::desc("When debugging is enabled, dump the basic block after"
- " every pair is fused"));
-#endif
-
-STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize");
-
-namespace {
- struct BBVectorize : public BasicBlockPass {
- static char ID; // Pass identification, replacement for typeid
-
- const VectorizeConfig Config;
-
- BBVectorize(const VectorizeConfig &C = VectorizeConfig())
- : BasicBlockPass(ID), Config(C) {
- initializeBBVectorizePass(*PassRegistry::getPassRegistry());
- }
-
- BBVectorize(Pass *P, Function &F, const VectorizeConfig &C)
- : BasicBlockPass(ID), Config(C) {
- AA = &P->getAnalysis<AAResultsWrapperPass>().getAAResults();
- DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &P->getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- TLI = &P->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- TTI = IgnoreTargetInfo
- ? nullptr
- : &P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- }
-
- typedef std::pair<Value *, Value *> ValuePair;
- typedef std::pair<ValuePair, int> ValuePairWithCost;
- typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
- typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
- typedef std::pair<VPPair, unsigned> VPPairWithType;
-
- AliasAnalysis *AA;
- DominatorTree *DT;
- ScalarEvolution *SE;
- const TargetLibraryInfo *TLI;
- const TargetTransformInfo *TTI;
-
- // FIXME: const correct?
-
- bool vectorizePairs(BasicBlock &BB, bool NonPow2Len = false);
-
- bool getCandidatePairs(BasicBlock &BB,
- BasicBlock::iterator &Start,
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts, bool NonPow2Len);
-
- // FIXME: The current implementation does not account for pairs that
- // are connected in multiple ways. For example:
- // C1 = A1 / A2; C2 = A2 / A1 (which may be both direct and a swap)
- enum PairConnectionType {
- PairConnectionDirect,
- PairConnectionSwap,
- PairConnectionSplat
- };
-
- void computeConnectedPairs(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes);
-
- void buildDepMap(BasicBlock &BB,
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &PairableInstUsers);
-
- void choosePairs(DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<Value *, Value *>& ChosenPairs);
-
- void fuseChosenPairs(BasicBlock &BB,
- std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *>& ChosenPairs,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps);
-
-
- bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
-
- bool areInstsCompatible(Instruction *I, Instruction *J,
- bool IsSimpleLoadStore, bool NonPow2Len,
- int &CostSavings, int &FixedOrder);
-
- bool trackUsesOfI(DenseSet<Value *> &Users,
- AliasSetTracker &WriteSet, Instruction *I,
- Instruction *J, bool UpdateUsers = true,
- DenseSet<ValuePair> *LoadMoveSetPairs = nullptr);
-
- void computePairsConnectedTo(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- ValuePair P);
-
- bool pairsConflict(ValuePair P, ValuePair Q,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<ValuePair, std::vector<ValuePair> >
- *PairableInstUserMap = nullptr,
- DenseSet<VPPair> *PairableInstUserPairSet = nullptr);
-
- bool pairWillFormCycle(ValuePair P,
- DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers,
- DenseSet<ValuePair> &CurrentPairs);
-
- void pruneDAGFor(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
- DenseSet<VPPair> &PairableInstUserPairSet,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<ValuePair, size_t> &DAG,
- DenseSet<ValuePair> &PrunedDAG, ValuePair J,
- bool UseCycleCheck);
-
- void buildInitialDAGFor(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<ValuePair, size_t> &DAG, ValuePair J);
-
- void findBestDAGFor(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
- DenseSet<VPPair> &PairableInstUserPairSet,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth,
- int &BestEffSize, Value *II, std::vector<Value *>&JJ,
- bool UseCycleCheck);
-
- Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o);
-
- void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
- unsigned MaskOffset, unsigned NumInElem,
- unsigned NumInElem1, unsigned IdxOffset,
- std::vector<Constant*> &Mask);
-
- Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I,
- Instruction *J);
-
- bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J,
- unsigned o, Value *&LOp, unsigned numElemL,
- Type *ArgTypeL, Type *ArgTypeR, bool IBeforeJ,
- unsigned IdxOff = 0);
-
- Value *getReplacementInput(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o, bool IBeforeJ);
-
- void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
- Instruction *J, SmallVectorImpl<Value *> &ReplacedOperands,
- bool IBeforeJ);
-
- void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
- Instruction *J, Instruction *K,
- Instruction *&InsertionPt, Instruction *&K1,
- Instruction *&K2);
-
- void collectPairLoadMoveSet(BasicBlock &BB,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
- DenseSet<ValuePair> &LoadMoveSetPairs,
- Instruction *I);
-
- void collectLoadMoveSet(BasicBlock &BB,
- std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
- DenseSet<ValuePair> &LoadMoveSetPairs);
-
- bool canMoveUsesOfIAfterJ(BasicBlock &BB,
- DenseSet<ValuePair> &LoadMoveSetPairs,
- Instruction *I, Instruction *J);
-
- void moveUsesOfIAfterJ(BasicBlock &BB,
- DenseSet<ValuePair> &LoadMoveSetPairs,
- Instruction *&InsertionPt,
- Instruction *I, Instruction *J);
-
- bool vectorizeBB(BasicBlock &BB) {
- if (skipBasicBlock(BB))
- return false;
- if (!DT->isReachableFromEntry(&BB)) {
- DEBUG(dbgs() << "BBV: skipping unreachable " << BB.getName() <<
- " in " << BB.getParent()->getName() << "\n");
- return false;
- }
-
- DEBUG(if (TTI) dbgs() << "BBV: using target information\n");
-
- bool changed = false;
- // Iterate a sufficient number of times to merge types of size 1 bit,
- // then 2 bits, then 4, etc. up to half of the target vector width of the
- // target vector register.
- unsigned n = 1;
- for (unsigned v = 2;
- (TTI || v <= Config.VectorBits) &&
- (!Config.MaxIter || n <= Config.MaxIter);
- v *= 2, ++n) {
- DEBUG(dbgs() << "BBV: fusing loop #" << n <<
- " for " << BB.getName() << " in " <<
- BB.getParent()->getName() << "...\n");
- if (vectorizePairs(BB))
- changed = true;
- else
- break;
- }
-
- if (changed && !Pow2LenOnly) {
- ++n;
- for (; !Config.MaxIter || n <= Config.MaxIter; ++n) {
- DEBUG(dbgs() << "BBV: fusing for non-2^n-length vectors loop #: " <<
- n << " for " << BB.getName() << " in " <<
- BB.getParent()->getName() << "...\n");
- if (!vectorizePairs(BB, true)) break;
- }
- }
-
- DEBUG(dbgs() << "BBV: done!\n");
- return changed;
- }
-
- bool runOnBasicBlock(BasicBlock &BB) override {
- // OptimizeNone check deferred to vectorizeBB().
-
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- TTI = IgnoreTargetInfo
- ? nullptr
- : &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *BB.getParent());
-
- return vectorizeBB(BB);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- BasicBlockPass::getAnalysisUsage(AU);
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addPreserved<SCEVAAWrapperPass>();
- AU.setPreservesCFG();
- }
-
- static inline VectorType *getVecTypeForPair(Type *ElemTy, Type *Elem2Ty) {
- assert(ElemTy->getScalarType() == Elem2Ty->getScalarType() &&
- "Cannot form vector from incompatible scalar types");
- Type *STy = ElemTy->getScalarType();
-
- unsigned numElem;
- if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) {
- numElem = VTy->getNumElements();
- } else {
- numElem = 1;
- }
-
- if (VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) {
- numElem += VTy->getNumElements();
- } else {
- numElem += 1;
- }
-
- return VectorType::get(STy, numElem);
- }
-
- static inline void getInstructionTypes(Instruction *I,
- Type *&T1, Type *&T2) {
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // For stores, it is the value type, not the pointer type that matters
- // because the value is what will come from a vector register.
-
- Value *IVal = SI->getValueOperand();
- T1 = IVal->getType();
- } else {
- T1 = I->getType();
- }
-
- if (CastInst *CI = dyn_cast<CastInst>(I))
- T2 = CI->getSrcTy();
- else
- T2 = T1;
-
- if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
- T2 = SI->getCondition()->getType();
- } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) {
- T2 = SI->getOperand(0)->getType();
- } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
- T2 = CI->getOperand(0)->getType();
- }
- }
-
- // Returns the weight associated with the provided value. A chain of
- // candidate pairs has a length given by the sum of the weights of its
- // members (one weight per pair; the weight of each member of the pair
- // is assumed to be the same). This length is then compared to the
- // chain-length threshold to determine if a given chain is significant
- // enough to be vectorized. The length is also used in comparing
- // candidate chains where longer chains are considered to be better.
- // Note: when this function returns 0, the resulting instructions are
- // not actually fused.
- inline size_t getDepthFactor(Value *V) {
- // InsertElement and ExtractElement have a depth factor of zero. This is
- // for two reasons: First, they cannot be usefully fused. Second, because
- // the pass generates a lot of these, they can confuse the simple metric
- // used to compare the dags in the next iteration. Thus, giving them a
- // weight of zero allows the pass to essentially ignore them in
- // subsequent iterations when looking for vectorization opportunities
- // while still tracking dependency chains that flow through those
- // instructions.
- if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V))
- return 0;
-
- // Give a load or store half of the required depth so that load/store
- // pairs will vectorize.
- if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
- return Config.ReqChainDepth/2;
-
- return 1;
- }
-
- // Returns the cost of the provided instruction using TTI.
- // This does not handle loads and stores.
- unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2,
- TargetTransformInfo::OperandValueKind Op1VK =
- TargetTransformInfo::OK_AnyValue,
- TargetTransformInfo::OperandValueKind Op2VK =
- TargetTransformInfo::OK_AnyValue,
- const Instruction *I = nullptr) {
- switch (Opcode) {
- default: break;
- case Instruction::GetElementPtr:
- // We mark this instruction as zero-cost because scalar GEPs are usually
- // lowered to the instruction addressing mode. At the moment we don't
- // generate vector GEPs.
- return 0;
- case Instruction::Br:
- return TTI->getCFInstrCost(Opcode);
- case Instruction::PHI:
- return 0;
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- return TTI->getArithmeticInstrCost(Opcode, T1, Op1VK, Op2VK);
- case Instruction::Select:
- case Instruction::ICmp:
- case Instruction::FCmp:
- return TTI->getCmpSelInstrCost(Opcode, T1, T2, I);
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast:
- case Instruction::ShuffleVector:
- return TTI->getCastInstrCost(Opcode, T1, T2, I);
- }
-
- return 1;
- }
-
- // This determines the relative offset of two loads or stores, returning
- // true if the offset could be determined to be some constant value.
- // For example, if OffsetInElmts == 1, then J accesses the memory directly
- // after I; if OffsetInElmts == -1 then I accesses the memory
- // directly after J.
- bool getPairPtrInfo(Instruction *I, Instruction *J,
- Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment,
- unsigned &IAddressSpace, unsigned &JAddressSpace,
- int64_t &OffsetInElmts, bool ComputeOffset = true) {
- OffsetInElmts = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- LoadInst *LJ = cast<LoadInst>(J);
- IPtr = LI->getPointerOperand();
- JPtr = LJ->getPointerOperand();
- IAlignment = LI->getAlignment();
- JAlignment = LJ->getAlignment();
- IAddressSpace = LI->getPointerAddressSpace();
- JAddressSpace = LJ->getPointerAddressSpace();
- } else {
- StoreInst *SI = cast<StoreInst>(I), *SJ = cast<StoreInst>(J);
- IPtr = SI->getPointerOperand();
- JPtr = SJ->getPointerOperand();
- IAlignment = SI->getAlignment();
- JAlignment = SJ->getAlignment();
- IAddressSpace = SI->getPointerAddressSpace();
- JAddressSpace = SJ->getPointerAddressSpace();
- }
-
- if (!ComputeOffset)
- return true;
-
- const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
- const SCEV *JPtrSCEV = SE->getSCEV(JPtr);
-
- // If this is a trivial offset, then we'll get something like
- // 1*sizeof(type). With target data, which we need anyway, this will get
- // constant folded into a number.
- const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV);
- if (const SCEVConstant *ConstOffSCEV =
- dyn_cast<SCEVConstant>(OffsetSCEV)) {
- ConstantInt *IntOff = ConstOffSCEV->getValue();
- int64_t Offset = IntOff->getSExtValue();
- const DataLayout &DL = I->getModule()->getDataLayout();
- Type *VTy = IPtr->getType()->getPointerElementType();
- int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(VTy);
-
- Type *VTy2 = JPtr->getType()->getPointerElementType();
- if (VTy != VTy2 && Offset < 0) {
- int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(VTy2);
- OffsetInElmts = Offset/VTy2TSS;
- return (std::abs(Offset) % VTy2TSS) == 0;
- }
-
- OffsetInElmts = Offset/VTyTSS;
- return (std::abs(Offset) % VTyTSS) == 0;
- }
-
- return false;
- }
-
- // Returns true if the provided CallInst represents an intrinsic that can
- // be vectorized.
- bool isVectorizableIntrinsic(CallInst* I) {
- Function *F = I->getCalledFunction();
- if (!F) return false;
-
- Intrinsic::ID IID = F->getIntrinsicID();
- if (!IID) return false;
-
- switch(IID) {
- default:
- return false;
- case Intrinsic::sqrt:
- case Intrinsic::powi:
- case Intrinsic::sin:
- case Intrinsic::cos:
- case Intrinsic::log:
- case Intrinsic::log2:
- case Intrinsic::log10:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::pow:
- case Intrinsic::round:
- case Intrinsic::copysign:
- case Intrinsic::ceil:
- case Intrinsic::nearbyint:
- case Intrinsic::rint:
- case Intrinsic::trunc:
- case Intrinsic::floor:
- case Intrinsic::fabs:
- case Intrinsic::minnum:
- case Intrinsic::maxnum:
- return Config.VectorizeMath;
- case Intrinsic::bswap:
- case Intrinsic::ctpop:
- case Intrinsic::ctlz:
- case Intrinsic::cttz:
- return Config.VectorizeBitManipulations;
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- return Config.VectorizeFMA;
- }
- }
-
- bool isPureIEChain(InsertElementInst *IE) {
- InsertElementInst *IENext = IE;
- do {
- if (!isa<UndefValue>(IENext->getOperand(0)) &&
- !isa<InsertElementInst>(IENext->getOperand(0))) {
- return false;
- }
- } while ((IENext =
- dyn_cast<InsertElementInst>(IENext->getOperand(0))));
-
- return true;
- }
- };
-
- // This function implements one vectorization iteration on the provided
- // basic block. It returns true if the block is changed.
- bool BBVectorize::vectorizePairs(BasicBlock &BB, bool NonPow2Len) {
- bool ShouldContinue;
- BasicBlock::iterator Start = BB.getFirstInsertionPt();
-
- std::vector<Value *> AllPairableInsts;
- DenseMap<Value *, Value *> AllChosenPairs;
- DenseSet<ValuePair> AllFixedOrderPairs;
- DenseMap<VPPair, unsigned> AllPairConnectionTypes;
- DenseMap<ValuePair, std::vector<ValuePair> > AllConnectedPairs,
- AllConnectedPairDeps;
-
- do {
- std::vector<Value *> PairableInsts;
- DenseMap<Value *, std::vector<Value *> > CandidatePairs;
- DenseSet<ValuePair> FixedOrderPairs;
- DenseMap<ValuePair, int> CandidatePairCostSavings;
- ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
- FixedOrderPairs,
- CandidatePairCostSavings,
- PairableInsts, NonPow2Len);
- if (PairableInsts.empty()) continue;
-
- // Build the candidate pair set for faster lookups.
- DenseSet<ValuePair> CandidatePairsSet;
- for (DenseMap<Value *, std::vector<Value *> >::iterator I =
- CandidatePairs.begin(), E = CandidatePairs.end(); I != E; ++I)
- for (std::vector<Value *>::iterator J = I->second.begin(),
- JE = I->second.end(); J != JE; ++J)
- CandidatePairsSet.insert(ValuePair(I->first, *J));
-
- // Now we have a map of all of the pairable instructions and we need to
- // select the best possible pairing. A good pairing is one such that the
- // users of the pair are also paired. This defines a (directed) forest
- // over the pairs such that two pairs are connected iff the second pair
- // uses the first.
-
- // Note that it only matters that both members of the second pair use some
- // element of the first pair (to allow for splatting).
-
- DenseMap<ValuePair, std::vector<ValuePair> > ConnectedPairs,
- ConnectedPairDeps;
- DenseMap<VPPair, unsigned> PairConnectionTypes;
- computeConnectedPairs(CandidatePairs, CandidatePairsSet,
- PairableInsts, ConnectedPairs, PairConnectionTypes);
- if (ConnectedPairs.empty()) continue;
-
- for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator
- I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
- I != IE; ++I)
- for (std::vector<ValuePair>::iterator J = I->second.begin(),
- JE = I->second.end(); J != JE; ++J)
- ConnectedPairDeps[*J].push_back(I->first);
-
- // Build the pairable-instruction dependency map
- DenseSet<ValuePair> PairableInstUsers;
- buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
-
- // There is now a graph of the connected pairs. For each variable, pick
- // the pairing with the largest dag meeting the depth requirement on at
- // least one branch. Then select all pairings that are part of that dag
- // and remove them from the list of available pairings and pairable
- // variables.
-
- DenseMap<Value *, Value *> ChosenPairs;
- choosePairs(CandidatePairs, CandidatePairsSet,
- CandidatePairCostSavings,
- PairableInsts, FixedOrderPairs, PairConnectionTypes,
- ConnectedPairs, ConnectedPairDeps,
- PairableInstUsers, ChosenPairs);
-
- if (ChosenPairs.empty()) continue;
- AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(),
- PairableInsts.end());
- AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end());
-
- // Only for the chosen pairs, propagate information on fixed-order pairs,
- // pair connections, and their types to the data structures used by the
- // pair fusion procedures.
- for (DenseMap<Value *, Value *>::iterator I = ChosenPairs.begin(),
- IE = ChosenPairs.end(); I != IE; ++I) {
- if (FixedOrderPairs.count(*I))
- AllFixedOrderPairs.insert(*I);
- else if (FixedOrderPairs.count(ValuePair(I->second, I->first)))
- AllFixedOrderPairs.insert(ValuePair(I->second, I->first));
-
- for (DenseMap<Value *, Value *>::iterator J = ChosenPairs.begin();
- J != IE; ++J) {
- DenseMap<VPPair, unsigned>::iterator K =
- PairConnectionTypes.find(VPPair(*I, *J));
- if (K != PairConnectionTypes.end()) {
- AllPairConnectionTypes.insert(*K);
- } else {
- K = PairConnectionTypes.find(VPPair(*J, *I));
- if (K != PairConnectionTypes.end())
- AllPairConnectionTypes.insert(*K);
- }
- }
- }
-
- for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator
- I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
- I != IE; ++I)
- for (std::vector<ValuePair>::iterator J = I->second.begin(),
- JE = I->second.end(); J != JE; ++J)
- if (AllPairConnectionTypes.count(VPPair(I->first, *J))) {
- AllConnectedPairs[I->first].push_back(*J);
- AllConnectedPairDeps[*J].push_back(I->first);
- }
- } while (ShouldContinue);
-
- if (AllChosenPairs.empty()) return false;
- NumFusedOps += AllChosenPairs.size();
-
- // A set of pairs has now been selected. It is now necessary to replace the
- // paired instructions with vector instructions. For this procedure each
- // operand must be replaced with a vector operand. This vector is formed
- // by using build_vector on the old operands. The replaced values are then
- // replaced with a vector_extract on the result. Subsequent optimization
- // passes should coalesce the build/extract combinations.
-
- fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs,
- AllPairConnectionTypes,
- AllConnectedPairs, AllConnectedPairDeps);
-
- // It is important to cleanup here so that future iterations of this
- // function have less work to do.
- (void)SimplifyInstructionsInBlock(&BB, TLI);
- return true;
- }
-
- // This function returns true if the provided instruction is capable of being
- // fused into a vector instruction. This determination is based only on the
- // type and other attributes of the instruction.
- bool BBVectorize::isInstVectorizable(Instruction *I,
- bool &IsSimpleLoadStore) {
- IsSimpleLoadStore = false;
-
- if (CallInst *C = dyn_cast<CallInst>(I)) {
- if (!isVectorizableIntrinsic(C))
- return false;
- } else if (LoadInst *L = dyn_cast<LoadInst>(I)) {
- // Vectorize simple loads if possbile:
- IsSimpleLoadStore = L->isSimple();
- if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
- return false;
- } else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
- // Vectorize simple stores if possbile:
- IsSimpleLoadStore = S->isSimple();
- if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
- return false;
- } else if (CastInst *C = dyn_cast<CastInst>(I)) {
- // We can vectorize casts, but not casts of pointer types, etc.
- if (!Config.VectorizeCasts)
- return false;
-
- Type *SrcTy = C->getSrcTy();
- if (!SrcTy->isSingleValueType())
- return false;
-
- Type *DestTy = C->getDestTy();
- if (!DestTy->isSingleValueType())
- return false;
- } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
- if (!Config.VectorizeSelect)
- return false;
- // We can vectorize a select if either all operands are scalars,
- // or all operands are vectors. Trying to "widen" a select between
- // vectors that has a scalar condition results in a malformed select.
- // FIXME: We could probably be smarter about this by rewriting the select
- // with different types instead.
- return (SI->getCondition()->getType()->isVectorTy() ==
- SI->getTrueValue()->getType()->isVectorTy());
- } else if (isa<CmpInst>(I)) {
- if (!Config.VectorizeCmp)
- return false;
- } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) {
- if (!Config.VectorizeGEP)
- return false;
-
- // Currently, vector GEPs exist only with one index.
- if (G->getNumIndices() != 1)
- return false;
- } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) ||
- isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) {
- return false;
- }
-
- Type *T1, *T2;
- getInstructionTypes(I, T1, T2);
-
- // Not every type can be vectorized...
- if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) ||
- !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
- return false;
-
- if (T1->getScalarSizeInBits() == 1) {
- if (!Config.VectorizeBools)
- return false;
- } else {
- if (!Config.VectorizeInts && T1->isIntOrIntVectorTy())
- return false;
- }
-
- if (T2->getScalarSizeInBits() == 1) {
- if (!Config.VectorizeBools)
- return false;
- } else {
- if (!Config.VectorizeInts && T2->isIntOrIntVectorTy())
- return false;
- }
-
- if (!Config.VectorizeFloats
- && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
- return false;
-
- // Don't vectorize target-specific types.
- if (T1->isX86_FP80Ty() || T1->isPPC_FP128Ty() || T1->isX86_MMXTy())
- return false;
- if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy())
- return false;
-
- if (!Config.VectorizePointers && (T1->getScalarType()->isPointerTy() ||
- T2->getScalarType()->isPointerTy()))
- return false;
-
- if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
- T2->getPrimitiveSizeInBits() >= Config.VectorBits))
- return false;
-
- return true;
- }
-
- // This function returns true if the two provided instructions are compatible
- // (meaning that they can be fused into a vector instruction). This assumes
- // that I has already been determined to be vectorizable and that J is not
- // in the use dag of I.
- bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
- bool IsSimpleLoadStore, bool NonPow2Len,
- int &CostSavings, int &FixedOrder) {
- DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I <<
- " <-> " << *J << "\n");
-
- CostSavings = 0;
- FixedOrder = 0;
-
- // Loads and stores can be merged if they have different alignments,
- // but are otherwise the same.
- if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment |
- (NonPow2Len ? Instruction::CompareUsingScalarTypes : 0)))
- return false;
-
- Type *IT1, *IT2, *JT1, *JT2;
- getInstructionTypes(I, IT1, IT2);
- getInstructionTypes(J, JT1, JT2);
- unsigned MaxTypeBits = std::max(
- IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(),
- IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits());
- if (!TTI && MaxTypeBits > Config.VectorBits)
- return false;
-
- // FIXME: handle addsub-type operations!
-
- if (IsSimpleLoadStore) {
- Value *IPtr, *JPtr;
- unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
- int64_t OffsetInElmts = 0;
- if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
- IAddressSpace, JAddressSpace, OffsetInElmts) &&
- std::abs(OffsetInElmts) == 1) {
- FixedOrder = (int) OffsetInElmts;
- unsigned BottomAlignment = IAlignment;
- if (OffsetInElmts < 0) BottomAlignment = JAlignment;
-
- Type *aTypeI = isa<StoreInst>(I) ?
- cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
- Type *aTypeJ = isa<StoreInst>(J) ?
- cast<StoreInst>(J)->getValueOperand()->getType() : J->getType();
- Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
-
- if (Config.AlignedOnly) {
- // An aligned load or store is possible only if the instruction
- // with the lower offset has an alignment suitable for the
- // vector type.
- const DataLayout &DL = I->getModule()->getDataLayout();
- unsigned VecAlignment = DL.getPrefTypeAlignment(VType);
- if (BottomAlignment < VecAlignment)
- return false;
- }
-
- if (TTI) {
- unsigned ICost = TTI->getMemoryOpCost(I->getOpcode(), aTypeI,
- IAlignment, IAddressSpace);
- unsigned JCost = TTI->getMemoryOpCost(J->getOpcode(), aTypeJ,
- JAlignment, JAddressSpace);
- unsigned VCost = TTI->getMemoryOpCost(I->getOpcode(), VType,
- BottomAlignment,
- IAddressSpace);
-
- ICost += TTI->getAddressComputationCost(aTypeI);
- JCost += TTI->getAddressComputationCost(aTypeJ);
- VCost += TTI->getAddressComputationCost(VType);
-
- if (VCost > ICost + JCost)
- return false;
-
- // We don't want to fuse to a type that will be split, even
- // if the two input types will also be split and there is no other
- // associated cost.
- unsigned VParts = TTI->getNumberOfParts(VType);
- if (VParts > 1)
- return false;
- else if (!VParts && VCost == ICost + JCost)
- return false;
-
- CostSavings = ICost + JCost - VCost;
- }
- } else {
- return false;
- }
- } else if (TTI) {
- TargetTransformInfo::OperandValueKind Op1VK =
- TargetTransformInfo::OK_AnyValue;
- TargetTransformInfo::OperandValueKind Op2VK =
- TargetTransformInfo::OK_AnyValue;
- unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2, Op1VK, Op2VK, I);
- unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2, Op1VK, Op2VK, J);
- Type *VT1 = getVecTypeForPair(IT1, JT1),
- *VT2 = getVecTypeForPair(IT2, JT2);
-
- // On some targets (example X86) the cost of a vector shift may vary
- // depending on whether the second operand is a Uniform or
- // NonUniform Constant.
- switch (I->getOpcode()) {
- default : break;
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
-
- // If both I and J are scalar shifts by constant, then the
- // merged vector shift count would be either a constant splat value
- // or a non-uniform vector of constants.
- if (ConstantInt *CII = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (ConstantInt *CIJ = dyn_cast<ConstantInt>(J->getOperand(1)))
- Op2VK = CII == CIJ ? TargetTransformInfo::OK_UniformConstantValue :
- TargetTransformInfo::OK_NonUniformConstantValue;
- } else {
- // Check for a splat of a constant or for a non uniform vector
- // of constants.
- Value *IOp = I->getOperand(1);
- Value *JOp = J->getOperand(1);
- if ((isa<ConstantVector>(IOp) || isa<ConstantDataVector>(IOp)) &&
- (isa<ConstantVector>(JOp) || isa<ConstantDataVector>(JOp))) {
- Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
- Constant *SplatValue = cast<Constant>(IOp)->getSplatValue();
- if (SplatValue != nullptr &&
- SplatValue == cast<Constant>(JOp)->getSplatValue())
- Op2VK = TargetTransformInfo::OK_UniformConstantValue;
- }
- }
- }
-
- // Note that this procedure is incorrect for insert and extract element
- // instructions (because combining these often results in a shuffle),
- // but this cost is ignored (because insert and extract element
- // instructions are assigned a zero depth factor and are not really
- // fused in general).
- unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2, Op1VK, Op2VK, I);
-
- if (VCost > ICost + JCost)
- return false;
-
- // We don't want to fuse to a type that will be split, even
- // if the two input types will also be split and there is no other
- // associated cost.
- unsigned VParts1 = TTI->getNumberOfParts(VT1),
- VParts2 = TTI->getNumberOfParts(VT2);
- if (VParts1 > 1 || VParts2 > 1)
- return false;
- else if ((!VParts1 || !VParts2) && VCost == ICost + JCost)
- return false;
-
- CostSavings = ICost + JCost - VCost;
- }
-
- // The powi,ctlz,cttz intrinsics are special because only the first
- // argument is vectorized, the second arguments must be equal.
- CallInst *CI = dyn_cast<CallInst>(I);
- Function *FI;
- if (CI && (FI = CI->getCalledFunction())) {
- Intrinsic::ID IID = FI->getIntrinsicID();
- if (IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
- IID == Intrinsic::cttz) {
- Value *A1I = CI->getArgOperand(1),
- *A1J = cast<CallInst>(J)->getArgOperand(1);
- const SCEV *A1ISCEV = SE->getSCEV(A1I),
- *A1JSCEV = SE->getSCEV(A1J);
- return (A1ISCEV == A1JSCEV);
- }
-
- if (IID && TTI) {
- FastMathFlags FMFCI;
- if (auto *FPMOCI = dyn_cast<FPMathOperator>(CI))
- FMFCI = FPMOCI->getFastMathFlags();
- SmallVector<Value *, 4> IArgs(CI->arg_operands());
- unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, IArgs, FMFCI);
-
- CallInst *CJ = cast<CallInst>(J);
-
- FastMathFlags FMFCJ;
- if (auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ))
- FMFCJ = FPMOCJ->getFastMathFlags();
-
- SmallVector<Value *, 4> JArgs(CJ->arg_operands());
- unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, JArgs, FMFCJ);
-
- assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
- "Intrinsic argument counts differ");
- SmallVector<Type*, 4> Tys;
- SmallVector<Value *, 4> VecArgs;
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
- if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
- IID == Intrinsic::cttz) && i == 1) {
- Tys.push_back(CI->getArgOperand(i)->getType());
- VecArgs.push_back(CI->getArgOperand(i));
- }
- else {
- Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
- CJ->getArgOperand(i)->getType()));
- // Add both operands, and then count their scalarization overhead
- // with VF 1.
- VecArgs.push_back(CI->getArgOperand(i));
- VecArgs.push_back(CJ->getArgOperand(i));
- }
- }
-
- // Compute the scalarization cost here with the original operands (to
- // check for uniqueness etc), and then call getIntrinsicInstrCost()
- // with the constructed vector types.
- Type *RetTy = getVecTypeForPair(IT1, JT1);
- unsigned ScalarizationCost = 0;
- if (!RetTy->isVoidTy())
- ScalarizationCost += TTI->getScalarizationOverhead(RetTy, true, false);
- ScalarizationCost += TTI->getOperandsScalarizationOverhead(VecArgs, 1);
-
- FastMathFlags FMFV = FMFCI;
- FMFV &= FMFCJ;
- unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV,
- ScalarizationCost);
-
- if (VCost > ICost + JCost)
- return false;
-
- // We don't want to fuse to a type that will be split, even
- // if the two input types will also be split and there is no other
- // associated cost.
- unsigned RetParts = TTI->getNumberOfParts(RetTy);
- if (RetParts > 1)
- return false;
- else if (!RetParts && VCost == ICost + JCost)
- return false;
-
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
- if (!Tys[i]->isVectorTy())
- continue;
-
- unsigned NumParts = TTI->getNumberOfParts(Tys[i]);
- if (NumParts > 1)
- return false;
- else if (!NumParts && VCost == ICost + JCost)
- return false;
- }
-
- CostSavings = ICost + JCost - VCost;
- }
- }
-
- return true;
- }
-
- // Figure out whether or not J uses I and update the users and write-set
- // structures associated with I. Specifically, Users represents the set of
- // instructions that depend on I. WriteSet represents the set
- // of memory locations that are dependent on I. If UpdateUsers is true,
- // and J uses I, then Users is updated to contain J and WriteSet is updated
- // to contain any memory locations to which J writes. The function returns
- // true if J uses I. By default, alias analysis is used to determine
- // whether J reads from memory that overlaps with a location in WriteSet.
- // If LoadMoveSet is not null, then it is a previously-computed map
- // where the key is the memory-based user instruction and the value is
- // the instruction to be compared with I. So, if LoadMoveSet is provided,
- // then the alias analysis is not used. This is necessary because this
- // function is called during the process of moving instructions during
- // vectorization and the results of the alias analysis are not stable during
- // that process.
- bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users,
- AliasSetTracker &WriteSet, Instruction *I,
- Instruction *J, bool UpdateUsers,
- DenseSet<ValuePair> *LoadMoveSetPairs) {
- bool UsesI = false;
-
- // This instruction may already be marked as a user due, for example, to
- // being a member of a selected pair.
- if (Users.count(J))
- UsesI = true;
-
- if (!UsesI)
- for (User::op_iterator JU = J->op_begin(), JE = J->op_end();
- JU != JE; ++JU) {
- Value *V = *JU;
- if (I == V || Users.count(V)) {
- UsesI = true;
- break;
- }
- }
- if (!UsesI && J->mayReadFromMemory()) {
- if (LoadMoveSetPairs) {
- UsesI = LoadMoveSetPairs->count(ValuePair(J, I));
- } else {
- for (AliasSetTracker::iterator W = WriteSet.begin(),
- WE = WriteSet.end(); W != WE; ++W) {
- if (W->aliasesUnknownInst(J, *AA)) {
- UsesI = true;
- break;
- }
- }
- }
- }
-
- if (UsesI && UpdateUsers) {
- if (J->mayWriteToMemory()) WriteSet.add(J);
- Users.insert(J);
- }
-
- return UsesI;
- }
-
- // This function iterates over all instruction pairs in the provided
- // basic block and collects all candidate pairs for vectorization.
- bool BBVectorize::getCandidatePairs(BasicBlock &BB,
- BasicBlock::iterator &Start,
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts, bool NonPow2Len) {
- size_t TotalPairs = 0;
- BasicBlock::iterator E = BB.end();
- if (Start == E) return false;
-
- bool ShouldContinue = false, IAfterStart = false;
- for (BasicBlock::iterator I = Start++; I != E; ++I) {
- if (I == Start) IAfterStart = true;
-
- bool IsSimpleLoadStore;
- if (!isInstVectorizable(&*I, IsSimpleLoadStore))
- continue;
-
- // Look for an instruction with which to pair instruction *I...
- DenseSet<Value *> Users;
- AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory())
- WriteSet.add(&*I);
-
- bool JAfterStart = IAfterStart;
- BasicBlock::iterator J = std::next(I);
- for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
- if (J == Start)
- JAfterStart = true;
-
- // Determine if J uses I, if so, exit the loop.
- bool UsesI = trackUsesOfI(Users, WriteSet, &*I, &*J, !Config.FastDep);
- if (Config.FastDep) {
- // Note: For this heuristic to be effective, independent operations
- // must tend to be intermixed. This is likely to be true from some
- // kinds of grouped loop unrolling (but not the generic LLVM pass),
- // but otherwise may require some kind of reordering pass.
-
- // When using fast dependency analysis,
- // stop searching after first use:
- if (UsesI) break;
- } else {
- if (UsesI) continue;
- }
-
- // J does not use I, and comes before the first use of I, so it can be
- // merged with I if the instructions are compatible.
- int CostSavings, FixedOrder;
- if (!areInstsCompatible(&*I, &*J, IsSimpleLoadStore, NonPow2Len,
- CostSavings, FixedOrder))
- continue;
-
- // J is a candidate for merging with I.
- if (PairableInsts.empty() ||
- PairableInsts[PairableInsts.size() - 1] != &*I) {
- PairableInsts.push_back(&*I);
- }
-
- CandidatePairs[&*I].push_back(&*J);
- ++TotalPairs;
- if (TTI)
- CandidatePairCostSavings.insert(
- ValuePairWithCost(ValuePair(&*I, &*J), CostSavings));
-
- if (FixedOrder == 1)
- FixedOrderPairs.insert(ValuePair(&*I, &*J));
- else if (FixedOrder == -1)
- FixedOrderPairs.insert(ValuePair(&*J, &*I));
-
- // The next call to this function must start after the last instruction
- // selected during this invocation.
- if (JAfterStart) {
- Start = std::next(J);
- IAfterStart = JAfterStart = false;
- }
-
- DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair "
- << *I << " <-> " << *J << " (cost savings: " <<
- CostSavings << ")\n");
-
- // If we have already found too many pairs, break here and this function
- // will be called again starting after the last instruction selected
- // during this invocation.
- if (PairableInsts.size() >= Config.MaxInsts ||
- TotalPairs >= Config.MaxPairs) {
- ShouldContinue = true;
- break;
- }
- }
-
- if (ShouldContinue)
- break;
- }
-
- DEBUG(dbgs() << "BBV: found " << PairableInsts.size()
- << " instructions with candidate pairs\n");
-
- return ShouldContinue;
- }
-
- // Finds candidate pairs connected to the pair P = <PI, PJ>. This means that
- // it looks for pairs such that both members have an input which is an
- // output of PI or PJ.
- void BBVectorize::computePairsConnectedTo(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- ValuePair P) {
- StoreInst *SI, *SJ;
-
- // For each possible pairing for this variable, look at the uses of
- // the first value...
- for (Value::user_iterator I = P.first->user_begin(),
- E = P.first->user_end();
- I != E; ++I) {
- User *UI = *I;
- if (isa<LoadInst>(UI)) {
- // A pair cannot be connected to a load because the load only takes one
- // operand (the address) and it is a scalar even after vectorization.
- continue;
- } else if ((SI = dyn_cast<StoreInst>(UI)) &&
- P.first == SI->getPointerOperand()) {
- // Similarly, a pair cannot be connected to a store through its
- // pointer operand.
- continue;
- }
-
- // For each use of the first variable, look for uses of the second
- // variable...
- for (User *UJ : P.second->users()) {
- if ((SJ = dyn_cast<StoreInst>(UJ)) &&
- P.second == SJ->getPointerOperand())
- continue;
-
- // Look for <I, J>:
- if (CandidatePairsSet.count(ValuePair(UI, UJ))) {
- VPPair VP(P, ValuePair(UI, UJ));
- ConnectedPairs[VP.first].push_back(VP.second);
- PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect));
- }
-
- // Look for <J, I>:
- if (CandidatePairsSet.count(ValuePair(UJ, UI))) {
- VPPair VP(P, ValuePair(UJ, UI));
- ConnectedPairs[VP.first].push_back(VP.second);
- PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap));
- }
- }
-
- if (Config.SplatBreaksChain) continue;
- // Look for cases where just the first value in the pair is used by
- // both members of another pair (splatting).
- for (Value::user_iterator J = P.first->user_begin(); J != E; ++J) {
- User *UJ = *J;
- if ((SJ = dyn_cast<StoreInst>(UJ)) &&
- P.first == SJ->getPointerOperand())
- continue;
-
- if (CandidatePairsSet.count(ValuePair(UI, UJ))) {
- VPPair VP(P, ValuePair(UI, UJ));
- ConnectedPairs[VP.first].push_back(VP.second);
- PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
- }
- }
- }
-
- if (Config.SplatBreaksChain) return;
- // Look for cases where just the second value in the pair is used by
- // both members of another pair (splatting).
- for (Value::user_iterator I = P.second->user_begin(),
- E = P.second->user_end();
- I != E; ++I) {
- User *UI = *I;
- if (isa<LoadInst>(UI))
- continue;
- else if ((SI = dyn_cast<StoreInst>(UI)) &&
- P.second == SI->getPointerOperand())
- continue;
-
- for (Value::user_iterator J = P.second->user_begin(); J != E; ++J) {
- User *UJ = *J;
- if ((SJ = dyn_cast<StoreInst>(UJ)) &&
- P.second == SJ->getPointerOperand())
- continue;
-
- if (CandidatePairsSet.count(ValuePair(UI, UJ))) {
- VPPair VP(P, ValuePair(UI, UJ));
- ConnectedPairs[VP.first].push_back(VP.second);
- PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
- }
- }
- }
- }
-
- // This function figures out which pairs are connected. Two pairs are
- // connected if some output of the first pair forms an input to both members
- // of the second pair.
- void BBVectorize::computeConnectedPairs(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes) {
- for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
- PE = PairableInsts.end(); PI != PE; ++PI) {
- DenseMap<Value *, std::vector<Value *> >::iterator PP =
- CandidatePairs.find(*PI);
- if (PP == CandidatePairs.end())
- continue;
-
- for (std::vector<Value *>::iterator P = PP->second.begin(),
- E = PP->second.end(); P != E; ++P)
- computePairsConnectedTo(CandidatePairs, CandidatePairsSet,
- PairableInsts, ConnectedPairs,
- PairConnectionTypes, ValuePair(*PI, *P));
- }
-
- DEBUG(size_t TotalPairs = 0;
- for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator I =
- ConnectedPairs.begin(), IE = ConnectedPairs.end(); I != IE; ++I)
- TotalPairs += I->second.size();
- dbgs() << "BBV: found " << TotalPairs
- << " pair connections.\n");
- }
-
- // This function builds a set of use tuples such that <A, B> is in the set
- // if B is in the use dag of A. If B is in the use dag of A, then B
- // depends on the output of A.
- void BBVectorize::buildDepMap(
- BasicBlock &BB,
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &PairableInstUsers) {
- DenseSet<Value *> IsInPair;
- for (DenseMap<Value *, std::vector<Value *> >::iterator C =
- CandidatePairs.begin(), E = CandidatePairs.end(); C != E; ++C) {
- IsInPair.insert(C->first);
- IsInPair.insert(C->second.begin(), C->second.end());
- }
-
- // Iterate through the basic block, recording all users of each
- // pairable instruction.
-
- BasicBlock::iterator E = BB.end(), EL =
- BasicBlock::iterator(cast<Instruction>(PairableInsts.back()));
- for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
- if (IsInPair.find(&*I) == IsInPair.end())
- continue;
-
- DenseSet<Value *> Users;
- AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory())
- WriteSet.add(&*I);
-
- for (BasicBlock::iterator J = std::next(I); J != E; ++J) {
- (void)trackUsesOfI(Users, WriteSet, &*I, &*J);
-
- if (J == EL)
- break;
- }
-
- for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end();
- U != E; ++U) {
- if (IsInPair.find(*U) == IsInPair.end()) continue;
- PairableInstUsers.insert(ValuePair(&*I, *U));
- }
-
- if (I == EL)
- break;
- }
- }
-
- // Returns true if an input to pair P is an output of pair Q and also an
- // input of pair Q is an output of pair P. If this is the case, then these
- // two pairs cannot be simultaneously fused.
- bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<ValuePair, std::vector<ValuePair> > *PairableInstUserMap,
- DenseSet<VPPair> *PairableInstUserPairSet) {
- // Two pairs are in conflict if they are mutual Users of eachother.
- bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) ||
- PairableInstUsers.count(ValuePair(P.first, Q.second)) ||
- PairableInstUsers.count(ValuePair(P.second, Q.first)) ||
- PairableInstUsers.count(ValuePair(P.second, Q.second));
- bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first, P.first)) ||
- PairableInstUsers.count(ValuePair(Q.first, P.second)) ||
- PairableInstUsers.count(ValuePair(Q.second, P.first)) ||
- PairableInstUsers.count(ValuePair(Q.second, P.second));
- if (PairableInstUserMap) {
- // FIXME: The expensive part of the cycle check is not so much the cycle
- // check itself but this edge insertion procedure. This needs some
- // profiling and probably a different data structure.
- if (PUsesQ) {
- if (PairableInstUserPairSet->insert(VPPair(Q, P)).second)
- (*PairableInstUserMap)[Q].push_back(P);
- }
- if (QUsesP) {
- if (PairableInstUserPairSet->insert(VPPair(P, Q)).second)
- (*PairableInstUserMap)[P].push_back(Q);
- }
- }
-
- return (QUsesP && PUsesQ);
- }
-
- // This function walks the use graph of current pairs to see if, starting
- // from P, the walk returns to P.
- bool BBVectorize::pairWillFormCycle(ValuePair P,
- DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
- DenseSet<ValuePair> &CurrentPairs) {
- DEBUG(if (DebugCycleCheck)
- dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> "
- << *P.second << "\n");
- // A lookup table of visisted pairs is kept because the PairableInstUserMap
- // contains non-direct associations.
- DenseSet<ValuePair> Visited;
- SmallVector<ValuePair, 32> Q;
- // General depth-first post-order traversal:
- Q.push_back(P);
- do {
- ValuePair QTop = Q.pop_back_val();
- Visited.insert(QTop);
-
- DEBUG(if (DebugCycleCheck)
- dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> "
- << *QTop.second << "\n");
- DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
- PairableInstUserMap.find(QTop);
- if (QQ == PairableInstUserMap.end())
- continue;
-
- for (std::vector<ValuePair>::iterator C = QQ->second.begin(),
- CE = QQ->second.end(); C != CE; ++C) {
- if (*C == P) {
- DEBUG(dbgs()
- << "BBV: rejected to prevent non-trivial cycle formation: "
- << QTop.first << " <-> " << C->second << "\n");
- return true;
- }
-
- if (CurrentPairs.count(*C) && !Visited.count(*C))
- Q.push_back(*C);
- }
- } while (!Q.empty());
-
- return false;
- }
-
- // This function builds the initial dag of connected pairs with the
- // pair J at the root.
- void BBVectorize::buildInitialDAGFor(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<ValuePair, size_t> &DAG, ValuePair J) {
- // Each of these pairs is viewed as the root node of a DAG. The DAG
- // is then walked (depth-first). As this happens, we keep track of
- // the pairs that compose the DAG and the maximum depth of the DAG.
- SmallVector<ValuePairWithDepth, 32> Q;
- // General depth-first post-order traversal:
- Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
- do {
- ValuePairWithDepth QTop = Q.back();
-
- // Push each child onto the queue:
- bool MoreChildren = false;
- size_t MaxChildDepth = QTop.second;
- DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
- ConnectedPairs.find(QTop.first);
- if (QQ != ConnectedPairs.end())
- for (std::vector<ValuePair>::iterator k = QQ->second.begin(),
- ke = QQ->second.end(); k != ke; ++k) {
- // Make sure that this child pair is still a candidate:
- if (CandidatePairsSet.count(*k)) {
- DenseMap<ValuePair, size_t>::iterator C = DAG.find(*k);
- if (C == DAG.end()) {
- size_t d = getDepthFactor(k->first);
- Q.push_back(ValuePairWithDepth(*k, QTop.second+d));
- MoreChildren = true;
- } else {
- MaxChildDepth = std::max(MaxChildDepth, C->second);
- }
- }
- }
-
- if (!MoreChildren) {
- // Record the current pair as part of the DAG:
- DAG.insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
- Q.pop_back();
- }
- } while (!Q.empty());
- }
-
- // Given some initial dag, prune it by removing conflicting pairs (pairs
- // that cannot be simultaneously chosen for vectorization).
- void BBVectorize::pruneDAGFor(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
- DenseSet<VPPair> &PairableInstUserPairSet,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<ValuePair, size_t> &DAG,
- DenseSet<ValuePair> &PrunedDAG, ValuePair J,
- bool UseCycleCheck) {
- SmallVector<ValuePairWithDepth, 32> Q;
- // General depth-first post-order traversal:
- Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
- do {
- ValuePairWithDepth QTop = Q.pop_back_val();
- PrunedDAG.insert(QTop.first);
-
- // Visit each child, pruning as necessary...
- SmallVector<ValuePairWithDepth, 8> BestChildren;
- DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
- ConnectedPairs.find(QTop.first);
- if (QQ == ConnectedPairs.end())
- continue;
-
- for (std::vector<ValuePair>::iterator K = QQ->second.begin(),
- KE = QQ->second.end(); K != KE; ++K) {
- DenseMap<ValuePair, size_t>::iterator C = DAG.find(*K);
- if (C == DAG.end()) continue;
-
- // This child is in the DAG, now we need to make sure it is the
- // best of any conflicting children. There could be multiple
- // conflicting children, so first, determine if we're keeping
- // this child, then delete conflicting children as necessary.
-
- // It is also necessary to guard against pairing-induced
- // dependencies. Consider instructions a .. x .. y .. b
- // such that (a,b) are to be fused and (x,y) are to be fused
- // but a is an input to x and b is an output from y. This
- // means that y cannot be moved after b but x must be moved
- // after b for (a,b) to be fused. In other words, after
- // fusing (a,b) we have y .. a/b .. x where y is an input
- // to a/b and x is an output to a/b: x and y can no longer
- // be legally fused. To prevent this condition, we must
- // make sure that a child pair added to the DAG is not
- // both an input and output of an already-selected pair.
-
- // Pairing-induced dependencies can also form from more complicated
- // cycles. The pair vs. pair conflicts are easy to check, and so
- // that is done explicitly for "fast rejection", and because for
- // child vs. child conflicts, we may prefer to keep the current
- // pair in preference to the already-selected child.
- DenseSet<ValuePair> CurrentPairs;
-
- bool CanAdd = true;
- for (SmallVectorImpl<ValuePairWithDepth>::iterator C2
- = BestChildren.begin(), E2 = BestChildren.end();
- C2 != E2; ++C2) {
- if (C2->first.first == C->first.first ||
- C2->first.first == C->first.second ||
- C2->first.second == C->first.first ||
- C2->first.second == C->first.second ||
- pairsConflict(C2->first, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : nullptr,
- UseCycleCheck ? &PairableInstUserPairSet
- : nullptr)) {
- if (C2->second >= C->second) {
- CanAdd = false;
- break;
- }
-
- CurrentPairs.insert(C2->first);
- }
- }
- if (!CanAdd) continue;
-
- // Even worse, this child could conflict with another node already
- // selected for the DAG. If that is the case, ignore this child.
- for (DenseSet<ValuePair>::iterator T = PrunedDAG.begin(),
- E2 = PrunedDAG.end(); T != E2; ++T) {
- if (T->first == C->first.first ||
- T->first == C->first.second ||
- T->second == C->first.first ||
- T->second == C->first.second ||
- pairsConflict(*T, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : nullptr,
- UseCycleCheck ? &PairableInstUserPairSet
- : nullptr)) {
- CanAdd = false;
- break;
- }
-
- CurrentPairs.insert(*T);
- }
- if (!CanAdd) continue;
-
- // And check the queue too...
- for (SmallVectorImpl<ValuePairWithDepth>::iterator C2 = Q.begin(),
- E2 = Q.end(); C2 != E2; ++C2) {
- if (C2->first.first == C->first.first ||
- C2->first.first == C->first.second ||
- C2->first.second == C->first.first ||
- C2->first.second == C->first.second ||
- pairsConflict(C2->first, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : nullptr,
- UseCycleCheck ? &PairableInstUserPairSet
- : nullptr)) {
- CanAdd = false;
- break;
- }
-
- CurrentPairs.insert(C2->first);
- }
- if (!CanAdd) continue;
-
- // Last but not least, check for a conflict with any of the
- // already-chosen pairs.
- for (DenseMap<Value *, Value *>::iterator C2 =
- ChosenPairs.begin(), E2 = ChosenPairs.end();
- C2 != E2; ++C2) {
- if (pairsConflict(*C2, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : nullptr,
- UseCycleCheck ? &PairableInstUserPairSet
- : nullptr)) {
- CanAdd = false;
- break;
- }
-
- CurrentPairs.insert(*C2);
- }
- if (!CanAdd) continue;
-
- // To check for non-trivial cycles formed by the addition of the
- // current pair we've formed a list of all relevant pairs, now use a
- // graph walk to check for a cycle. We start from the current pair and
- // walk the use dag to see if we again reach the current pair. If we
- // do, then the current pair is rejected.
-
- // FIXME: It may be more efficient to use a topological-ordering
- // algorithm to improve the cycle check. This should be investigated.
- if (UseCycleCheck &&
- pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs))
- continue;
-
- // This child can be added, but we may have chosen it in preference
- // to an already-selected child. Check for this here, and if a
- // conflict is found, then remove the previously-selected child
- // before adding this one in its place.
- for (SmallVectorImpl<ValuePairWithDepth>::iterator C2
- = BestChildren.begin(); C2 != BestChildren.end();) {
- if (C2->first.first == C->first.first ||
- C2->first.first == C->first.second ||
- C2->first.second == C->first.first ||
- C2->first.second == C->first.second ||
- pairsConflict(C2->first, C->first, PairableInstUsers))
- C2 = BestChildren.erase(C2);
- else
- ++C2;
- }
-
- BestChildren.push_back(ValuePairWithDepth(C->first, C->second));
- }
-
- for (SmallVectorImpl<ValuePairWithDepth>::iterator C
- = BestChildren.begin(), E2 = BestChildren.end();
- C != E2; ++C) {
- size_t DepthF = getDepthFactor(C->first.first);
- Q.push_back(ValuePairWithDepth(C->first, QTop.second+DepthF));
- }
- } while (!Q.empty());
- }
-
- // This function finds the best dag of mututally-compatible connected
- // pairs, given the choice of root pairs as an iterator range.
- void BBVectorize::findBestDAGFor(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
- DenseSet<VPPair> &PairableInstUserPairSet,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth,
- int &BestEffSize, Value *II, std::vector<Value *>&JJ,
- bool UseCycleCheck) {
- for (std::vector<Value *>::iterator J = JJ.begin(), JE = JJ.end();
- J != JE; ++J) {
- ValuePair IJ(II, *J);
- if (!CandidatePairsSet.count(IJ))
- continue;
-
- // Before going any further, make sure that this pair does not
- // conflict with any already-selected pairs (see comment below
- // near the DAG pruning for more details).
- DenseSet<ValuePair> ChosenPairSet;
- bool DoesConflict = false;
- for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(),
- E = ChosenPairs.end(); C != E; ++C) {
- if (pairsConflict(*C, IJ, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : nullptr,
- UseCycleCheck ? &PairableInstUserPairSet : nullptr)) {
- DoesConflict = true;
- break;
- }
-
- ChosenPairSet.insert(*C);
- }
- if (DoesConflict) continue;
-
- if (UseCycleCheck &&
- pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet))
- continue;
-
- DenseMap<ValuePair, size_t> DAG;
- buildInitialDAGFor(CandidatePairs, CandidatePairsSet,
- PairableInsts, ConnectedPairs,
- PairableInstUsers, ChosenPairs, DAG, IJ);
-
- // Because we'll keep the child with the largest depth, the largest
- // depth is still the same in the unpruned DAG.
- size_t MaxDepth = DAG.lookup(IJ);
-
- DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {"
- << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
- MaxDepth << " and size " << DAG.size() << "\n");
-
- // At this point the DAG has been constructed, but, may contain
- // contradictory children (meaning that different children of
- // some dag node may be attempting to fuse the same instruction).
- // So now we walk the dag again, in the case of a conflict,
- // keep only the child with the largest depth. To break a tie,
- // favor the first child.
-
- DenseSet<ValuePair> PrunedDAG;
- pruneDAGFor(CandidatePairs, PairableInsts, ConnectedPairs,
- PairableInstUsers, PairableInstUserMap,
- PairableInstUserPairSet,
- ChosenPairs, DAG, PrunedDAG, IJ, UseCycleCheck);
-
- int EffSize = 0;
- if (TTI) {
- DenseSet<Value *> PrunedDAGInstrs;
- for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
- E = PrunedDAG.end(); S != E; ++S) {
- PrunedDAGInstrs.insert(S->first);
- PrunedDAGInstrs.insert(S->second);
- }
-
- // The set of pairs that have already contributed to the total cost.
- DenseSet<ValuePair> IncomingPairs;
-
- // If the cost model were perfect, this might not be necessary; but we
- // need to make sure that we don't get stuck vectorizing our own
- // shuffle chains.
- bool HasNontrivialInsts = false;
-
- // The node weights represent the cost savings associated with
- // fusing the pair of instructions.
- for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
- E = PrunedDAG.end(); S != E; ++S) {
- if (!isa<ShuffleVectorInst>(S->first) &&
- !isa<InsertElementInst>(S->first) &&
- !isa<ExtractElementInst>(S->first))
- HasNontrivialInsts = true;
-
- bool FlipOrder = false;
-
- if (getDepthFactor(S->first)) {
- int ESContrib = CandidatePairCostSavings.find(*S)->second;
- DEBUG(if (DebugPairSelection) dbgs() << "\tweight {"
- << *S->first << " <-> " << *S->second << "} = " <<
- ESContrib << "\n");
- EffSize += ESContrib;
- }
-
- // The edge weights contribute in a negative sense: they represent
- // the cost of shuffles.
- DenseMap<ValuePair, std::vector<ValuePair> >::iterator SS =
- ConnectedPairDeps.find(*S);
- if (SS != ConnectedPairDeps.end()) {
- unsigned NumDepsDirect = 0, NumDepsSwap = 0;
- for (std::vector<ValuePair>::iterator T = SS->second.begin(),
- TE = SS->second.end(); T != TE; ++T) {
- VPPair Q(*S, *T);
- if (!PrunedDAG.count(Q.second))
- continue;
- DenseMap<VPPair, unsigned>::iterator R =
- PairConnectionTypes.find(VPPair(Q.second, Q.first));
- assert(R != PairConnectionTypes.end() &&
- "Cannot find pair connection type");
- if (R->second == PairConnectionDirect)
- ++NumDepsDirect;
- else if (R->second == PairConnectionSwap)
- ++NumDepsSwap;
- }
-
- // If there are more swaps than direct connections, then
- // the pair order will be flipped during fusion. So the real
- // number of swaps is the minimum number.
- FlipOrder = !FixedOrderPairs.count(*S) &&
- ((NumDepsSwap > NumDepsDirect) ||
- FixedOrderPairs.count(ValuePair(S->second, S->first)));
-
- for (std::vector<ValuePair>::iterator T = SS->second.begin(),
- TE = SS->second.end(); T != TE; ++T) {
- VPPair Q(*S, *T);
- if (!PrunedDAG.count(Q.second))
- continue;
- DenseMap<VPPair, unsigned>::iterator R =
- PairConnectionTypes.find(VPPair(Q.second, Q.first));
- assert(R != PairConnectionTypes.end() &&
- "Cannot find pair connection type");
- Type *Ty1 = Q.second.first->getType(),
- *Ty2 = Q.second.second->getType();
- Type *VTy = getVecTypeForPair(Ty1, Ty2);
- if ((R->second == PairConnectionDirect && FlipOrder) ||
- (R->second == PairConnectionSwap && !FlipOrder) ||
- R->second == PairConnectionSplat) {
- int ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
- VTy, VTy);
-
- if (VTy->getVectorNumElements() == 2) {
- if (R->second == PairConnectionSplat)
- ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
- TargetTransformInfo::SK_Broadcast, VTy));
- else
- ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
- TargetTransformInfo::SK_Reverse, VTy));
- }
-
- DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
- *Q.second.first << " <-> " << *Q.second.second <<
- "} -> {" <<
- *S->first << " <-> " << *S->second << "} = " <<
- ESContrib << "\n");
- EffSize -= ESContrib;
- }
- }
- }
-
- // Compute the cost of outgoing edges. We assume that edges outgoing
- // to shuffles, inserts or extracts can be merged, and so contribute
- // no additional cost.
- if (!S->first->getType()->isVoidTy()) {
- Type *Ty1 = S->first->getType(),
- *Ty2 = S->second->getType();
- Type *VTy = getVecTypeForPair(Ty1, Ty2);
-
- bool NeedsExtraction = false;
- for (User *U : S->first->users()) {
- if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(U)) {
- // Shuffle can be folded if it has no other input
- if (isa<UndefValue>(SI->getOperand(1)))
- continue;
- }
- if (isa<ExtractElementInst>(U))
- continue;
- if (PrunedDAGInstrs.count(U))
- continue;
- NeedsExtraction = true;
- break;
- }
-
- if (NeedsExtraction) {
- int ESContrib;
- if (Ty1->isVectorTy()) {
- ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
- Ty1, VTy);
- ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
- TargetTransformInfo::SK_ExtractSubvector, VTy, 0, Ty1));
- } else
- ESContrib = (int) TTI->getVectorInstrCost(
- Instruction::ExtractElement, VTy, 0);
-
- DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
- *S->first << "} = " << ESContrib << "\n");
- EffSize -= ESContrib;
- }
-
- NeedsExtraction = false;
- for (User *U : S->second->users()) {
- if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(U)) {
- // Shuffle can be folded if it has no other input
- if (isa<UndefValue>(SI->getOperand(1)))
- continue;
- }
- if (isa<ExtractElementInst>(U))
- continue;
- if (PrunedDAGInstrs.count(U))
- continue;
- NeedsExtraction = true;
- break;
- }
-
- if (NeedsExtraction) {
- int ESContrib;
- if (Ty2->isVectorTy()) {
- ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
- Ty2, VTy);
- ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
- TargetTransformInfo::SK_ExtractSubvector, VTy,
- Ty1->isVectorTy() ? Ty1->getVectorNumElements() : 1, Ty2));
- } else
- ESContrib = (int) TTI->getVectorInstrCost(
- Instruction::ExtractElement, VTy, 1);
- DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
- *S->second << "} = " << ESContrib << "\n");
- EffSize -= ESContrib;
- }
- }
-
- // Compute the cost of incoming edges.
- if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) {
- Instruction *S1 = cast<Instruction>(S->first),
- *S2 = cast<Instruction>(S->second);
- for (unsigned o = 0; o < S1->getNumOperands(); ++o) {
- Value *O1 = S1->getOperand(o), *O2 = S2->getOperand(o);
-
- // Combining constants into vector constants (or small vector
- // constants into larger ones are assumed free).
- if (isa<Constant>(O1) && isa<Constant>(O2))
- continue;
-
- if (FlipOrder)
- std::swap(O1, O2);
-
- ValuePair VP = ValuePair(O1, O2);
- ValuePair VPR = ValuePair(O2, O1);
-
- // Internal edges are not handled here.
- if (PrunedDAG.count(VP) || PrunedDAG.count(VPR))
- continue;
-
- Type *Ty1 = O1->getType(),
- *Ty2 = O2->getType();
- Type *VTy = getVecTypeForPair(Ty1, Ty2);
-
- // Combining vector operations of the same type is also assumed
- // folded with other operations.
- if (Ty1 == Ty2) {
- // If both are insert elements, then both can be widened.
- InsertElementInst *IEO1 = dyn_cast<InsertElementInst>(O1),
- *IEO2 = dyn_cast<InsertElementInst>(O2);
- if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2))
- continue;
- // If both are extract elements, and both have the same input
- // type, then they can be replaced with a shuffle
- ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1),
- *EIO2 = dyn_cast<ExtractElementInst>(O2);
- if (EIO1 && EIO2 &&
- EIO1->getOperand(0)->getType() ==
- EIO2->getOperand(0)->getType())
- continue;
- // If both are a shuffle with equal operand types and only two
- // unqiue operands, then they can be replaced with a single
- // shuffle
- ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1),
- *SIO2 = dyn_cast<ShuffleVectorInst>(O2);
- if (SIO1 && SIO2 &&
- SIO1->getOperand(0)->getType() ==
- SIO2->getOperand(0)->getType()) {
- SmallSet<Value *, 4> SIOps;
- SIOps.insert(SIO1->getOperand(0));
- SIOps.insert(SIO1->getOperand(1));
- SIOps.insert(SIO2->getOperand(0));
- SIOps.insert(SIO2->getOperand(1));
- if (SIOps.size() <= 2)
- continue;
- }
- }
-
- int ESContrib;
- // This pair has already been formed.
- if (IncomingPairs.count(VP)) {
- continue;
- } else if (IncomingPairs.count(VPR)) {
- ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
- VTy, VTy);
-
- if (VTy->getVectorNumElements() == 2)
- ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
- TargetTransformInfo::SK_Reverse, VTy));
- } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) {
- ESContrib = (int) TTI->getVectorInstrCost(
- Instruction::InsertElement, VTy, 0);
- ESContrib += (int) TTI->getVectorInstrCost(
- Instruction::InsertElement, VTy, 1);
- } else if (!Ty1->isVectorTy()) {
- // O1 needs to be inserted into a vector of size O2, and then
- // both need to be shuffled together.
- ESContrib = (int) TTI->getVectorInstrCost(
- Instruction::InsertElement, Ty2, 0);
- ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
- VTy, Ty2);
- } else if (!Ty2->isVectorTy()) {
- // O2 needs to be inserted into a vector of size O1, and then
- // both need to be shuffled together.
- ESContrib = (int) TTI->getVectorInstrCost(
- Instruction::InsertElement, Ty1, 0);
- ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
- VTy, Ty1);
- } else {
- Type *TyBig = Ty1, *TySmall = Ty2;
- if (Ty2->getVectorNumElements() > Ty1->getVectorNumElements())
- std::swap(TyBig, TySmall);
-
- ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
- VTy, TyBig);
- if (TyBig != TySmall)
- ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
- TyBig, TySmall);
- }
-
- DEBUG(if (DebugPairSelection) dbgs() << "\tcost {"
- << *O1 << " <-> " << *O2 << "} = " <<
- ESContrib << "\n");
- EffSize -= ESContrib;
- IncomingPairs.insert(VP);
- }
- }
- }
-
- if (!HasNontrivialInsts) {
- DEBUG(if (DebugPairSelection) dbgs() <<
- "\tNo non-trivial instructions in DAG;"
- " override to zero effective size\n");
- EffSize = 0;
- }
- } else {
- for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
- E = PrunedDAG.end(); S != E; ++S)
- EffSize += (int) getDepthFactor(S->first);
- }
-
- DEBUG(if (DebugPairSelection)
- dbgs() << "BBV: found pruned DAG for pair {"
- << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
- MaxDepth << " and size " << PrunedDAG.size() <<
- " (effective size: " << EffSize << ")\n");
- if (((TTI && !UseChainDepthWithTI) ||
- MaxDepth >= Config.ReqChainDepth) &&
- EffSize > 0 && EffSize > BestEffSize) {
- BestMaxDepth = MaxDepth;
- BestEffSize = EffSize;
- BestDAG = PrunedDAG;
- }
- }
- }
-
- // Given the list of candidate pairs, this function selects those
- // that will be fused into vector instructions.
- void BBVectorize::choosePairs(
- DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
- DenseSet<ValuePair> &CandidatePairsSet,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<Value *, Value *>& ChosenPairs) {
- bool UseCycleCheck =
- CandidatePairsSet.size() <= Config.MaxCandPairsForCycleCheck;
-
- DenseMap<Value *, std::vector<Value *> > CandidatePairs2;
- for (DenseSet<ValuePair>::iterator I = CandidatePairsSet.begin(),
- E = CandidatePairsSet.end(); I != E; ++I) {
- std::vector<Value *> &JJ = CandidatePairs2[I->second];
- if (JJ.empty()) JJ.reserve(32);
- JJ.push_back(I->first);
- }
-
- DenseMap<ValuePair, std::vector<ValuePair> > PairableInstUserMap;
- DenseSet<VPPair> PairableInstUserPairSet;
- for (std::vector<Value *>::iterator I = PairableInsts.begin(),
- E = PairableInsts.end(); I != E; ++I) {
- // The number of possible pairings for this variable:
- size_t NumChoices = CandidatePairs.lookup(*I).size();
- if (!NumChoices) continue;
-
- std::vector<Value *> &JJ = CandidatePairs[*I];
-
- // The best pair to choose and its dag:
- size_t BestMaxDepth = 0;
- int BestEffSize = 0;
- DenseSet<ValuePair> BestDAG;
- findBestDAGFor(CandidatePairs, CandidatePairsSet,
- CandidatePairCostSavings,
- PairableInsts, FixedOrderPairs, PairConnectionTypes,
- ConnectedPairs, ConnectedPairDeps,
- PairableInstUsers, PairableInstUserMap,
- PairableInstUserPairSet, ChosenPairs,
- BestDAG, BestMaxDepth, BestEffSize, *I, JJ,
- UseCycleCheck);
-
- if (BestDAG.empty())
- continue;
-
- // A dag has been chosen (or not) at this point. If no dag was
- // chosen, then this instruction, I, cannot be paired (and is no longer
- // considered).
-
- DEBUG(dbgs() << "BBV: selected pairs in the best DAG for: "
- << *cast<Instruction>(*I) << "\n");
-
- for (DenseSet<ValuePair>::iterator S = BestDAG.begin(),
- SE2 = BestDAG.end(); S != SE2; ++S) {
- // Insert the members of this dag into the list of chosen pairs.
- ChosenPairs.insert(ValuePair(S->first, S->second));
- DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " <<
- *S->second << "\n");
-
- // Remove all candidate pairs that have values in the chosen dag.
- std::vector<Value *> &KK = CandidatePairs[S->first];
- for (std::vector<Value *>::iterator K = KK.begin(), KE = KK.end();
- K != KE; ++K) {
- if (*K == S->second)
- continue;
-
- CandidatePairsSet.erase(ValuePair(S->first, *K));
- }
-
- std::vector<Value *> &LL = CandidatePairs2[S->second];
- for (std::vector<Value *>::iterator L = LL.begin(), LE = LL.end();
- L != LE; ++L) {
- if (*L == S->first)
- continue;
-
- CandidatePairsSet.erase(ValuePair(*L, S->second));
- }
-
- std::vector<Value *> &MM = CandidatePairs[S->second];
- for (std::vector<Value *>::iterator M = MM.begin(), ME = MM.end();
- M != ME; ++M) {
- assert(*M != S->first && "Flipped pair in candidate list?");
- CandidatePairsSet.erase(ValuePair(S->second, *M));
- }
-
- std::vector<Value *> &NN = CandidatePairs2[S->first];
- for (std::vector<Value *>::iterator N = NN.begin(), NE = NN.end();
- N != NE; ++N) {
- assert(*N != S->second && "Flipped pair in candidate list?");
- CandidatePairsSet.erase(ValuePair(*N, S->first));
- }
- }
- }
-
- DEBUG(dbgs() << "BBV: selected " << ChosenPairs.size() << " pairs.\n");
- }
-
- std::string getReplacementName(Instruction *I, bool IsInput, unsigned o,
- unsigned n = 0) {
- if (!I->hasName())
- return "";
-
- return (I->getName() + (IsInput ? ".v.i" : ".v.r") + utostr(o) +
- (n > 0 ? "." + utostr(n) : "")).str();
- }
-
- // Returns the value that is to be used as the pointer input to the vector
- // instruction that fuses I with J.
- Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
- Instruction *I, Instruction *J, unsigned o) {
- Value *IPtr, *JPtr;
- unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
- int64_t OffsetInElmts;
-
- // Note: the analysis might fail here, that is why the pair order has
- // been precomputed (OffsetInElmts must be unused here).
- (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
- IAddressSpace, JAddressSpace,
- OffsetInElmts, false);
-
- // The pointer value is taken to be the one with the lowest offset.
- Value *VPtr = IPtr;
-
- Type *ArgTypeI = IPtr->getType()->getPointerElementType();
- Type *ArgTypeJ = JPtr->getType()->getPointerElementType();
- Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- Type *VArgPtrType
- = PointerType::get(VArgType,
- IPtr->getType()->getPointerAddressSpace());
- return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
- /* insert before */ I);
- }
-
- void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J,
- unsigned MaskOffset, unsigned NumInElem,
- unsigned NumInElem1, unsigned IdxOffset,
- std::vector<Constant*> &Mask) {
- unsigned NumElem1 = J->getType()->getVectorNumElements();
- for (unsigned v = 0; v < NumElem1; ++v) {
- int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
- if (m < 0) {
- Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context));
- } else {
- unsigned mm = m + (int) IdxOffset;
- if (m >= (int) NumInElem1)
- mm += (int) NumInElem;
-
- Mask[v+MaskOffset] =
- ConstantInt::get(Type::getInt32Ty(Context), mm);
- }
- }
- }
-
- // Returns the value that is to be used as the vector-shuffle mask to the
- // vector instruction that fuses I with J.
- Value *BBVectorize::getReplacementShuffleMask(LLVMContext& Context,
- Instruction *I, Instruction *J) {
- // This is the shuffle mask. We need to append the second
- // mask to the first, and the numbers need to be adjusted.
-
- Type *ArgTypeI = I->getType();
- Type *ArgTypeJ = J->getType();
- Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
-
- unsigned NumElemI = ArgTypeI->getVectorNumElements();
-
- // Get the total number of elements in the fused vector type.
- // By definition, this must equal the number of elements in
- // the final mask.
- unsigned NumElem = VArgType->getVectorNumElements();
- std::vector<Constant*> Mask(NumElem);
-
- Type *OpTypeI = I->getOperand(0)->getType();
- unsigned NumInElemI = OpTypeI->getVectorNumElements();
- Type *OpTypeJ = J->getOperand(0)->getType();
- unsigned NumInElemJ = OpTypeJ->getVectorNumElements();
-
- // The fused vector will be:
- // -----------------------------------------------------
- // | NumInElemI | NumInElemJ | NumInElemI | NumInElemJ |
- // -----------------------------------------------------
- // from which we'll extract NumElem total elements (where the first NumElemI
- // of them come from the mask in I and the remainder come from the mask
- // in J.
-
- // For the mask from the first pair...
- fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI,
- 0, Mask);
-
- // For the mask from the second pair...
- fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ,
- NumInElemI, Mask);
-
- return ConstantVector::get(Mask);
- }
-
- bool BBVectorize::expandIEChain(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o, Value *&LOp,
- unsigned numElemL,
- Type *ArgTypeL, Type *ArgTypeH,
- bool IBeforeJ, unsigned IdxOff) {
- bool ExpandedIEChain = false;
- if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) {
- // If we have a pure insertelement chain, then this can be rewritten
- // into a chain that directly builds the larger type.
- if (isPureIEChain(LIE)) {
- SmallVector<Value *, 8> VectElemts(numElemL,
- UndefValue::get(ArgTypeL->getScalarType()));
- InsertElementInst *LIENext = LIE;
- do {
- unsigned Idx =
- cast<ConstantInt>(LIENext->getOperand(2))->getSExtValue();
- VectElemts[Idx] = LIENext->getOperand(1);
- } while ((LIENext =
- dyn_cast<InsertElementInst>(LIENext->getOperand(0))));
-
- LIENext = nullptr;
- Value *LIEPrev = UndefValue::get(ArgTypeH);
- for (unsigned i = 0; i < numElemL; ++i) {
- if (isa<UndefValue>(VectElemts[i])) continue;
- LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i],
- ConstantInt::get(Type::getInt32Ty(Context),
- i + IdxOff),
- getReplacementName(IBeforeJ ? I : J,
- true, o, i+1));
- LIENext->insertBefore(IBeforeJ ? J : I);
- LIEPrev = LIENext;
- }
-
- LOp = LIENext ? (Value*) LIENext : UndefValue::get(ArgTypeH);
- ExpandedIEChain = true;
- }
- }
-
- return ExpandedIEChain;
- }
-
- static unsigned getNumScalarElements(Type *Ty) {
- if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
- return VecTy->getNumElements();
- return 1;
- }
-
- // Returns the value to be used as the specified operand of the vector
- // instruction that fuses I with J.
- Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o, bool IBeforeJ) {
- Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
- Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1);
-
- // Compute the fused vector type for this operand
- Type *ArgTypeI = I->getOperand(o)->getType();
- Type *ArgTypeJ = J->getOperand(o)->getType();
- VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
-
- Instruction *L = I, *H = J;
- Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
-
- unsigned numElemL = getNumScalarElements(ArgTypeL);
- unsigned numElemH = getNumScalarElements(ArgTypeH);
-
- Value *LOp = L->getOperand(o);
- Value *HOp = H->getOperand(o);
- unsigned numElem = VArgType->getNumElements();
-
- // First, we check if we can reuse the "original" vector outputs (if these
- // exist). We might need a shuffle.
- ExtractElementInst *LEE = dyn_cast<ExtractElementInst>(LOp);
- ExtractElementInst *HEE = dyn_cast<ExtractElementInst>(HOp);
- ShuffleVectorInst *LSV = dyn_cast<ShuffleVectorInst>(LOp);
- ShuffleVectorInst *HSV = dyn_cast<ShuffleVectorInst>(HOp);
-
- // FIXME: If we're fusing shuffle instructions, then we can't apply this
- // optimization. The input vectors to the shuffle might be a different
- // length from the shuffle outputs. Unfortunately, the replacement
- // shuffle mask has already been formed, and the mask entries are sensitive
- // to the sizes of the inputs.
- bool IsSizeChangeShuffle =
- isa<ShuffleVectorInst>(L) &&
- (LOp->getType() != L->getType() || HOp->getType() != H->getType());
-
- if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) {
- // We can have at most two unique vector inputs.
- bool CanUseInputs = true;
- Value *I1, *I2 = nullptr;
- if (LEE) {
- I1 = LEE->getOperand(0);
- } else {
- I1 = LSV->getOperand(0);
- I2 = LSV->getOperand(1);
- if (I2 == I1 || isa<UndefValue>(I2))
- I2 = nullptr;
- }
-
- if (HEE) {
- Value *I3 = HEE->getOperand(0);
- if (!I2 && I3 != I1)
- I2 = I3;
- else if (I3 != I1 && I3 != I2)
- CanUseInputs = false;
- } else {
- Value *I3 = HSV->getOperand(0);
- if (!I2 && I3 != I1)
- I2 = I3;
- else if (I3 != I1 && I3 != I2)
- CanUseInputs = false;
-
- if (CanUseInputs) {
- Value *I4 = HSV->getOperand(1);
- if (!isa<UndefValue>(I4)) {
- if (!I2 && I4 != I1)
- I2 = I4;
- else if (I4 != I1 && I4 != I2)
- CanUseInputs = false;
- }
- }
- }
-
- if (CanUseInputs) {
- unsigned LOpElem =
- cast<Instruction>(LOp)->getOperand(0)->getType()
- ->getVectorNumElements();
-
- unsigned HOpElem =
- cast<Instruction>(HOp)->getOperand(0)->getType()
- ->getVectorNumElements();
-
- // We have one or two input vectors. We need to map each index of the
- // operands to the index of the original vector.
- SmallVector<std::pair<int, int>, 8> II(numElem);
- for (unsigned i = 0; i < numElemL; ++i) {
- int Idx, INum;
- if (LEE) {
- Idx =
- cast<ConstantInt>(LEE->getOperand(1))->getSExtValue();
- INum = LEE->getOperand(0) == I1 ? 0 : 1;
- } else {
- Idx = LSV->getMaskValue(i);
- if (Idx < (int) LOpElem) {
- INum = LSV->getOperand(0) == I1 ? 0 : 1;
- } else {
- Idx -= LOpElem;
- INum = LSV->getOperand(1) == I1 ? 0 : 1;
- }
- }
-
- II[i] = std::pair<int, int>(Idx, INum);
- }
- for (unsigned i = 0; i < numElemH; ++i) {
- int Idx, INum;
- if (HEE) {
- Idx =
- cast<ConstantInt>(HEE->getOperand(1))->getSExtValue();
- INum = HEE->getOperand(0) == I1 ? 0 : 1;
- } else {
- Idx = HSV->getMaskValue(i);
- if (Idx < (int) HOpElem) {
- INum = HSV->getOperand(0) == I1 ? 0 : 1;
- } else {
- Idx -= HOpElem;
- INum = HSV->getOperand(1) == I1 ? 0 : 1;
- }
- }
-
- II[i + numElemL] = std::pair<int, int>(Idx, INum);
- }
-
- // We now have an array which tells us from which index of which
- // input vector each element of the operand comes.
- VectorType *I1T = cast<VectorType>(I1->getType());
- unsigned I1Elem = I1T->getNumElements();
-
- if (!I2) {
- // In this case there is only one underlying vector input. Check for
- // the trivial case where we can use the input directly.
- if (I1Elem == numElem) {
- bool ElemInOrder = true;
- for (unsigned i = 0; i < numElem; ++i) {
- if (II[i].first != (int) i && II[i].first != -1) {
- ElemInOrder = false;
- break;
- }
- }
-
- if (ElemInOrder)
- return I1;
- }
-
- // A shuffle is needed.
- std::vector<Constant *> Mask(numElem);
- for (unsigned i = 0; i < numElem; ++i) {
- int Idx = II[i].first;
- if (Idx == -1)
- Mask[i] = UndefValue::get(Type::getInt32Ty(Context));
- else
- Mask[i] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
- }
-
- Instruction *S =
- new ShuffleVectorInst(I1, UndefValue::get(I1T),
- ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J,
- true, o));
- S->insertBefore(IBeforeJ ? J : I);
- return S;
- }
-
- VectorType *I2T = cast<VectorType>(I2->getType());
- unsigned I2Elem = I2T->getNumElements();
-
- // This input comes from two distinct vectors. The first step is to
- // make sure that both vectors are the same length. If not, the
- // smaller one will need to grow before they can be shuffled together.
- if (I1Elem < I2Elem) {
- std::vector<Constant *> Mask(I2Elem);
- unsigned v = 0;
- for (; v < I1Elem; ++v)
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- for (; v < I2Elem; ++v)
- Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
-
- Instruction *NewI1 =
- new ShuffleVectorInst(I1, UndefValue::get(I1T),
- ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- NewI1->insertBefore(IBeforeJ ? J : I);
- I1 = NewI1;
- I1Elem = I2Elem;
- } else if (I1Elem > I2Elem) {
- std::vector<Constant *> Mask(I1Elem);
- unsigned v = 0;
- for (; v < I2Elem; ++v)
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- for (; v < I1Elem; ++v)
- Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
-
- Instruction *NewI2 =
- new ShuffleVectorInst(I2, UndefValue::get(I2T),
- ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- NewI2->insertBefore(IBeforeJ ? J : I);
- I2 = NewI2;
- }
-
- // Now that both I1 and I2 are the same length we can shuffle them
- // together (and use the result).
- std::vector<Constant *> Mask(numElem);
- for (unsigned v = 0; v < numElem; ++v) {
- if (II[v].first == -1) {
- Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
- } else {
- int Idx = II[v].first + II[v].second * I1Elem;
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
- }
- }
-
- Instruction *NewOp =
- new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J, true, o));
- NewOp->insertBefore(IBeforeJ ? J : I);
- return NewOp;
- }
- }
-
- Type *ArgType = ArgTypeL;
- if (numElemL < numElemH) {
- if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH,
- ArgTypeL, VArgType, IBeforeJ, 1)) {
- // This is another short-circuit case: we're combining a scalar into
- // a vector that is formed by an IE chain. We've just expanded the IE
- // chain, now insert the scalar and we're done.
-
- Instruction *S = InsertElementInst::Create(HOp, LOp, CV0,
- getReplacementName(IBeforeJ ? I : J, true, o));
- S->insertBefore(IBeforeJ ? J : I);
- return S;
- } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL,
- ArgTypeH, IBeforeJ)) {
- // The two vector inputs to the shuffle must be the same length,
- // so extend the smaller vector to be the same length as the larger one.
- Instruction *NLOp;
- if (numElemL > 1) {
-
- std::vector<Constant *> Mask(numElemH);
- unsigned v = 0;
- for (; v < numElemL; ++v)
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- for (; v < numElemH; ++v)
- Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
-
- NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL),
- ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- } else {
- NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0,
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- }
-
- NLOp->insertBefore(IBeforeJ ? J : I);
- LOp = NLOp;
- }
-
- ArgType = ArgTypeH;
- } else if (numElemL > numElemH) {
- if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL,
- ArgTypeH, VArgType, IBeforeJ)) {
- Instruction *S =
- InsertElementInst::Create(LOp, HOp,
- ConstantInt::get(Type::getInt32Ty(Context),
- numElemL),
- getReplacementName(IBeforeJ ? I : J,
- true, o));
- S->insertBefore(IBeforeJ ? J : I);
- return S;
- } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH,
- ArgTypeL, IBeforeJ)) {
- Instruction *NHOp;
- if (numElemH > 1) {
- std::vector<Constant *> Mask(numElemL);
- unsigned v = 0;
- for (; v < numElemH; ++v)
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- for (; v < numElemL; ++v)
- Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
-
- NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH),
- ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- } else {
- NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0,
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- }
-
- NHOp->insertBefore(IBeforeJ ? J : I);
- HOp = NHOp;
- }
- }
-
- if (ArgType->isVectorTy()) {
- unsigned numElem = VArgType->getVectorNumElements();
- std::vector<Constant*> Mask(numElem);
- for (unsigned v = 0; v < numElem; ++v) {
- unsigned Idx = v;
- // If the low vector was expanded, we need to skip the extra
- // undefined entries.
- if (v >= numElemL && numElemH > numElemL)
- Idx += (numElemH - numElemL);
- Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
- }
-
- Instruction *BV = new ShuffleVectorInst(LOp, HOp,
- ConstantVector::get(Mask),
- getReplacementName(IBeforeJ ? I : J, true, o));
- BV->insertBefore(IBeforeJ ? J : I);
- return BV;
- }
-
- Instruction *BV1 = InsertElementInst::Create(
- UndefValue::get(VArgType), LOp, CV0,
- getReplacementName(IBeforeJ ? I : J,
- true, o, 1));
- BV1->insertBefore(IBeforeJ ? J : I);
- Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1,
- getReplacementName(IBeforeJ ? I : J,
- true, o, 2));
- BV2->insertBefore(IBeforeJ ? J : I);
- return BV2;
- }
-
- // This function creates an array of values that will be used as the inputs
- // to the vector instruction that fuses I with J.
- void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
- Instruction *I, Instruction *J,
- SmallVectorImpl<Value *> &ReplacedOperands,
- bool IBeforeJ) {
- unsigned NumOperands = I->getNumOperands();
-
- for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
- // Iterate backward so that we look at the store pointer
- // first and know whether or not we need to flip the inputs.
-
- if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) {
- // This is the pointer for a load/store instruction.
- ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o);
- continue;
- } else if (isa<CallInst>(I)) {
- Function *F = cast<CallInst>(I)->getCalledFunction();
- Intrinsic::ID IID = F->getIntrinsicID();
- if (o == NumOperands-1) {
- BasicBlock &BB = *I->getParent();
-
- Module *M = BB.getParent()->getParent();
- Type *ArgTypeI = I->getType();
- Type *ArgTypeJ = J->getType();
- Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
-
- ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType);
- continue;
- } else if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
- IID == Intrinsic::cttz) && o == 1) {
- // The second argument of powi/ctlz/cttz is a single integer/constant
- // and we've already checked that both arguments are equal.
- // As a result, we just keep I's second argument.
- ReplacedOperands[o] = I->getOperand(o);
- continue;
- }
- } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) {
- ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J);
- continue;
- }
-
- ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ);
- }
- }
-
- // This function creates two values that represent the outputs of the
- // original I and J instructions. These are generally vector shuffles
- // or extracts. In many cases, these will end up being unused and, thus,
- // eliminated by later passes.
- void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
- Instruction *J, Instruction *K,
- Instruction *&InsertionPt,
- Instruction *&K1, Instruction *&K2) {
- if (isa<StoreInst>(I))
- return;
-
- Type *IType = I->getType();
- Type *JType = J->getType();
-
- VectorType *VType = getVecTypeForPair(IType, JType);
- unsigned numElem = VType->getNumElements();
-
- unsigned numElemI = getNumScalarElements(IType);
- unsigned numElemJ = getNumScalarElements(JType);
-
- if (IType->isVectorTy()) {
- std::vector<Constant *> Mask1(numElemI), Mask2(numElemI);
- for (unsigned v = 0; v < numElemI; ++v) {
- Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ + v);
- }
-
- K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
- ConstantVector::get(Mask1),
- getReplacementName(K, false, 1));
- } else {
- Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
- K1 = ExtractElementInst::Create(K, CV0, getReplacementName(K, false, 1));
- }
-
- if (JType->isVectorTy()) {
- std::vector<Constant *> Mask1(numElemJ), Mask2(numElemJ);
- for (unsigned v = 0; v < numElemJ; ++v) {
- Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI + v);
- }
-
- K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
- ConstantVector::get(Mask2),
- getReplacementName(K, false, 2));
- } else {
- Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem - 1);
- K2 = ExtractElementInst::Create(K, CV1, getReplacementName(K, false, 2));
- }
-
- K1->insertAfter(K);
- K2->insertAfter(K1);
- InsertionPt = K2;
- }
-
- // Move all uses of the function I (including pairing-induced uses) after J.
- bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB,
- DenseSet<ValuePair> &LoadMoveSetPairs,
- Instruction *I, Instruction *J) {
- // Skip to the first instruction past I.
- BasicBlock::iterator L = std::next(BasicBlock::iterator(I));
-
- DenseSet<Value *> Users;
- AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory()) WriteSet.add(I);
-
- for (; cast<Instruction>(L) != J; ++L)
- (void)trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs);
-
- assert(cast<Instruction>(L) == J &&
- "Tracking has not proceeded far enough to check for dependencies");
- // If J is now in the use set of I, then trackUsesOfI will return true
- // and we have a dependency cycle (and the fusing operation must abort).
- return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSetPairs);
- }
-
- // Move all uses of the function I (including pairing-induced uses) after J.
- void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB,
- DenseSet<ValuePair> &LoadMoveSetPairs,
- Instruction *&InsertionPt,
- Instruction *I, Instruction *J) {
- // Skip to the first instruction past I.
- BasicBlock::iterator L = std::next(BasicBlock::iterator(I));
-
- DenseSet<Value *> Users;
- AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory()) WriteSet.add(I);
-
- for (; cast<Instruction>(L) != J;) {
- if (trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs)) {
- // Move this instruction
- Instruction *InstToMove = &*L++;
-
- DEBUG(dbgs() << "BBV: moving: " << *InstToMove <<
- " to after " << *InsertionPt << "\n");
- InstToMove->removeFromParent();
- InstToMove->insertAfter(InsertionPt);
- InsertionPt = InstToMove;
- } else {
- ++L;
- }
- }
- }
-
- // Collect all load instruction that are in the move set of a given first
- // pair member. These loads depend on the first instruction, I, and so need
- // to be moved after J (the second instruction) when the pair is fused.
- void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
- DenseSet<ValuePair> &LoadMoveSetPairs,
- Instruction *I) {
- // Skip to the first instruction past I.
- BasicBlock::iterator L = std::next(BasicBlock::iterator(I));
-
- DenseSet<Value *> Users;
- AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory()) WriteSet.add(I);
-
- // Note: We cannot end the loop when we reach J because J could be moved
- // farther down the use chain by another instruction pairing. Also, J
- // could be before I if this is an inverted input.
- for (BasicBlock::iterator E = BB.end(); L != E; ++L) {
- if (trackUsesOfI(Users, WriteSet, I, &*L)) {
- if (L->mayReadFromMemory()) {
- LoadMoveSet[&*L].push_back(I);
- LoadMoveSetPairs.insert(ValuePair(&*L, I));
- }
- }
- }
- }
-
- // In cases where both load/stores and the computation of their pointers
- // are chosen for vectorization, we can end up in a situation where the
- // aliasing analysis starts returning different query results as the
- // process of fusing instruction pairs continues. Because the algorithm
- // relies on finding the same use dags here as were found earlier, we'll
- // need to precompute the necessary aliasing information here and then
- // manually update it during the fusion process.
- void BBVectorize::collectLoadMoveSet(BasicBlock &BB,
- std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
- DenseSet<ValuePair> &LoadMoveSetPairs) {
- for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
- PIE = PairableInsts.end(); PI != PIE; ++PI) {
- DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
- if (P == ChosenPairs.end()) continue;
-
- Instruction *I = cast<Instruction>(P->first);
- collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet,
- LoadMoveSetPairs, I);
- }
- }
-
- // This function fuses the chosen instruction pairs into vector instructions,
- // taking care preserve any needed scalar outputs and, then, it reorders the
- // remaining instructions as needed (users of the first member of the pair
- // need to be moved to after the location of the second member of the pair
- // because the vector instruction is inserted in the location of the pair's
- // second member).
- void BBVectorize::fuseChosenPairs(BasicBlock &BB,
- std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
- DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps) {
- LLVMContext& Context = BB.getContext();
-
- // During the vectorization process, the order of the pairs to be fused
- // could be flipped. So we'll add each pair, flipped, into the ChosenPairs
- // list. After a pair is fused, the flipped pair is removed from the list.
- DenseSet<ValuePair> FlippedPairs;
- for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(),
- E = ChosenPairs.end(); P != E; ++P)
- FlippedPairs.insert(ValuePair(P->second, P->first));
- for (DenseSet<ValuePair>::iterator P = FlippedPairs.begin(),
- E = FlippedPairs.end(); P != E; ++P)
- ChosenPairs.insert(*P);
-
- DenseMap<Value *, std::vector<Value *> > LoadMoveSet;
- DenseSet<ValuePair> LoadMoveSetPairs;
- collectLoadMoveSet(BB, PairableInsts, ChosenPairs,
- LoadMoveSet, LoadMoveSetPairs);
-
- DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
-
- for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
- DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(&*PI);
- if (P == ChosenPairs.end()) {
- ++PI;
- continue;
- }
-
- if (getDepthFactor(P->first) == 0) {
- // These instructions are not really fused, but are tracked as though
- // they are. Any case in which it would be interesting to fuse them
- // will be taken care of by InstCombine.
- --NumFusedOps;
- ++PI;
- continue;
- }
-
- Instruction *I = cast<Instruction>(P->first),
- *J = cast<Instruction>(P->second);
-
- DEBUG(dbgs() << "BBV: fusing: " << *I <<
- " <-> " << *J << "\n");
-
- // Remove the pair and flipped pair from the list.
- DenseMap<Value *, Value *>::iterator FP = ChosenPairs.find(P->second);
- assert(FP != ChosenPairs.end() && "Flipped pair not found in list");
- ChosenPairs.erase(FP);
- ChosenPairs.erase(P);
-
- if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) {
- DEBUG(dbgs() << "BBV: fusion of: " << *I <<
- " <-> " << *J <<
- " aborted because of non-trivial dependency cycle\n");
- --NumFusedOps;
- ++PI;
- continue;
- }
-
- // If the pair must have the other order, then flip it.
- bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I));
- if (!FlipPairOrder && !FixedOrderPairs.count(ValuePair(I, J))) {
- // This pair does not have a fixed order, and so we might want to
- // flip it if that will yield fewer shuffles. We count the number
- // of dependencies connected via swaps, and those directly connected,
- // and flip the order if the number of swaps is greater.
- bool OrigOrder = true;
- DenseMap<ValuePair, std::vector<ValuePair> >::iterator IJ =
- ConnectedPairDeps.find(ValuePair(I, J));
- if (IJ == ConnectedPairDeps.end()) {
- IJ = ConnectedPairDeps.find(ValuePair(J, I));
- OrigOrder = false;
- }
-
- if (IJ != ConnectedPairDeps.end()) {
- unsigned NumDepsDirect = 0, NumDepsSwap = 0;
- for (std::vector<ValuePair>::iterator T = IJ->second.begin(),
- TE = IJ->second.end(); T != TE; ++T) {
- VPPair Q(IJ->first, *T);
- DenseMap<VPPair, unsigned>::iterator R =
- PairConnectionTypes.find(VPPair(Q.second, Q.first));
- assert(R != PairConnectionTypes.end() &&
- "Cannot find pair connection type");
- if (R->second == PairConnectionDirect)
- ++NumDepsDirect;
- else if (R->second == PairConnectionSwap)
- ++NumDepsSwap;
- }
-
- if (!OrigOrder)
- std::swap(NumDepsDirect, NumDepsSwap);
-
- if (NumDepsSwap > NumDepsDirect) {
- FlipPairOrder = true;
- DEBUG(dbgs() << "BBV: reordering pair: " << *I <<
- " <-> " << *J << "\n");
- }
- }
- }
-
- Instruction *L = I, *H = J;
- if (FlipPairOrder)
- std::swap(H, L);
-
- // If the pair being fused uses the opposite order from that in the pair
- // connection map, then we need to flip the types.
- DenseMap<ValuePair, std::vector<ValuePair> >::iterator HL =
- ConnectedPairs.find(ValuePair(H, L));
- if (HL != ConnectedPairs.end())
- for (std::vector<ValuePair>::iterator T = HL->second.begin(),
- TE = HL->second.end(); T != TE; ++T) {
- VPPair Q(HL->first, *T);
- DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(Q);
- assert(R != PairConnectionTypes.end() &&
- "Cannot find pair connection type");
- if (R->second == PairConnectionDirect)
- R->second = PairConnectionSwap;
- else if (R->second == PairConnectionSwap)
- R->second = PairConnectionDirect;
- }
-
- bool LBeforeH = !FlipPairOrder;
- unsigned NumOperands = I->getNumOperands();
- SmallVector<Value *, 3> ReplacedOperands(NumOperands);
- getReplacementInputsForPair(Context, L, H, ReplacedOperands,
- LBeforeH);
-
- // Make a copy of the original operation, change its type to the vector
- // type and replace its operands with the vector operands.
- Instruction *K = L->clone();
- if (L->hasName())
- K->takeName(L);
- else if (H->hasName())
- K->takeName(H);
-
- if (auto CS = CallSite(K)) {
- SmallVector<Type *, 3> Tys;
- FunctionType *Old = CS.getFunctionType();
- unsigned NumOld = Old->getNumParams();
- assert(NumOld <= ReplacedOperands.size());
- for (unsigned i = 0; i != NumOld; ++i)
- Tys.push_back(ReplacedOperands[i]->getType());
- CS.mutateFunctionType(
- FunctionType::get(getVecTypeForPair(L->getType(), H->getType()),
- Tys, Old->isVarArg()));
- } else if (!isa<StoreInst>(K))
- K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
-
- unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
- LLVMContext::MD_invariant_group};
- combineMetadata(K, H, KnownIDs);
- K->andIRFlags(H);
-
- for (unsigned o = 0; o < NumOperands; ++o)
- K->setOperand(o, ReplacedOperands[o]);
-
- K->insertAfter(J);
-
- // Instruction insertion point:
- Instruction *InsertionPt = K;
- Instruction *K1 = nullptr, *K2 = nullptr;
- replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
-
- // The use dag of the first original instruction must be moved to after
- // the location of the second instruction. The entire use dag of the
- // first instruction is disjoint from the input dag of the second
- // (by definition), and so commutes with it.
-
- moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J);
-
- if (!isa<StoreInst>(I)) {
- L->replaceAllUsesWith(K1);
- H->replaceAllUsesWith(K2);
- }
-
- // Instructions that may read from memory may be in the load move set.
- // Once an instruction is fused, we no longer need its move set, and so
- // the values of the map never need to be updated. However, when a load
- // is fused, we need to merge the entries from both instructions in the
- // pair in case those instructions were in the move set of some other
- // yet-to-be-fused pair. The loads in question are the keys of the map.
- if (I->mayReadFromMemory()) {
- std::vector<ValuePair> NewSetMembers;
- DenseMap<Value *, std::vector<Value *> >::iterator II =
- LoadMoveSet.find(I);
- if (II != LoadMoveSet.end())
- for (std::vector<Value *>::iterator N = II->second.begin(),
- NE = II->second.end(); N != NE; ++N)
- NewSetMembers.push_back(ValuePair(K, *N));
- DenseMap<Value *, std::vector<Value *> >::iterator JJ =
- LoadMoveSet.find(J);
- if (JJ != LoadMoveSet.end())
- for (std::vector<Value *>::iterator N = JJ->second.begin(),
- NE = JJ->second.end(); N != NE; ++N)
- NewSetMembers.push_back(ValuePair(K, *N));
- for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(),
- AE = NewSetMembers.end(); A != AE; ++A) {
- LoadMoveSet[A->first].push_back(A->second);
- LoadMoveSetPairs.insert(*A);
- }
- }
-
- // Before removing I, set the iterator to the next instruction.
- PI = std::next(BasicBlock::iterator(I));
- if (cast<Instruction>(PI) == J)
- ++PI;
-
- SE->forgetValue(I);
- SE->forgetValue(J);
- I->eraseFromParent();
- J->eraseFromParent();
-
- DEBUG(if (PrintAfterEveryPair) dbgs() << "BBV: block is now: \n" <<
- BB << "\n");
- }
-
- DEBUG(dbgs() << "BBV: final: \n" << BB << "\n");
- }
-}
-
-char BBVectorize::ID = 0;
-static const char bb_vectorize_name[] = "Basic-Block Vectorization";
-INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
-INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
-
-BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
- return new BBVectorize(C);
-}
-
-bool
-llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) {
- BBVectorize BBVectorizer(P, *BB.getParent(), C);
- return BBVectorizer.vectorizeBB(BB);
-}
-
-//===----------------------------------------------------------------------===//
-VectorizeConfig::VectorizeConfig() {
- VectorBits = ::VectorBits;
- VectorizeBools = !::NoBools;
- VectorizeInts = !::NoInts;
- VectorizeFloats = !::NoFloats;
- VectorizePointers = !::NoPointers;
- VectorizeCasts = !::NoCasts;
- VectorizeMath = !::NoMath;
- VectorizeBitManipulations = !::NoBitManipulation;
- VectorizeFMA = !::NoFMA;
- VectorizeSelect = !::NoSelect;
- VectorizeCmp = !::NoCmp;
- VectorizeGEP = !::NoGEP;
- VectorizeMemOps = !::NoMemOps;
- AlignedOnly = ::AlignedOnly;
- ReqChainDepth= ::ReqChainDepth;
- SearchLimit = ::SearchLimit;
- MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
- SplatBreaksChain = ::SplatBreaksChain;
- MaxInsts = ::MaxInsts;
- MaxPairs = ::MaxPairs;
- MaxIter = ::MaxIter;
- Pow2LenOnly = ::Pow2LenOnly;
- NoMemOpBoost = ::NoMemOpBoost;
- FastDep = ::FastDep;
-}
diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt
index 395f440bda47..1aea73cd4a32 100644
--- a/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/lib/Transforms/Vectorize/CMakeLists.txt
@@ -1,5 +1,4 @@
add_llvm_library(LLVMVectorize
- BBVectorize.cpp
LoadStoreVectorizer.cpp
LoopVectorize.cpp
SLPVectorizer.cpp
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index eac2867233bc..193cc4d13787 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -114,12 +114,13 @@ static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
-/// We don't vectorize loops with a known constant trip count below this number.
+/// Loops with a known constant trip count below this number are vectorized only
+/// if no scalar iteration overheads are incurred.
static cl::opt<unsigned> TinyTripCountVectorThreshold(
"vectorizer-min-trip-count", cl::init(16), cl::Hidden,
- cl::desc("Don't vectorize loops with a constant "
- "trip count that is smaller than this "
- "value."));
+ cl::desc("Loops with a constant trip count that is smaller than this "
+ "value are vectorized only if no scalar iteration overheads "
+ "are incurred."));
static cl::opt<bool> MaximizeBandwidth(
"vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
@@ -532,21 +533,34 @@ protected:
/// Returns true if we should generate a scalar version of \p IV.
bool needsScalarInduction(Instruction *IV) const;
- /// Return a constant reference to the VectorParts corresponding to \p V from
- /// the original loop. If the value has already been vectorized, the
- /// corresponding vector entry in VectorLoopValueMap is returned. If,
+ /// getOrCreateVectorValue and getOrCreateScalarValue coordinate to generate a
+ /// vector or scalar value on-demand if one is not yet available. When
+ /// vectorizing a loop, we visit the definition of an instruction before its
+ /// uses. When visiting the definition, we either vectorize or scalarize the
+ /// instruction, creating an entry for it in the corresponding map. (In some
+ /// cases, such as induction variables, we will create both vector and scalar
+ /// entries.) Then, as we encounter uses of the definition, we derive values
+ /// for each scalar or vector use unless such a value is already available.
+ /// For example, if we scalarize a definition and one of its uses is vector,
+ /// we build the required vector on-demand with an insertelement sequence
+ /// when visiting the use. Otherwise, if the use is scalar, we can use the
+ /// existing scalar definition.
+ ///
+ /// Return a value in the new loop corresponding to \p V from the original
+ /// loop at unroll index \p Part. If the value has already been vectorized,
+ /// the corresponding vector entry in VectorLoopValueMap is returned. If,
/// however, the value has a scalar entry in VectorLoopValueMap, we construct
- /// new vector values on-demand by inserting the scalar values into vectors
+ /// a new vector value on-demand by inserting the scalar values into a vector
/// with an insertelement sequence. If the value has been neither vectorized
/// nor scalarized, it must be loop invariant, so we simply broadcast the
- /// value into vectors.
- const VectorParts &getVectorValue(Value *V);
+ /// value into a vector.
+ Value *getOrCreateVectorValue(Value *V, unsigned Part);
/// Return a value in the new loop corresponding to \p V from the original
/// loop at unroll index \p Part and vector index \p Lane. If the value has
/// been vectorized but not scalarized, the necessary extractelement
/// instruction will be generated.
- Value *getScalarValue(Value *V, unsigned Part, unsigned Lane);
+ Value *getOrCreateScalarValue(Value *V, unsigned Part, unsigned Lane);
/// Try to vectorize the interleaved access group that \p Instr belongs to.
void vectorizeInterleaveGroup(Instruction *Instr);
@@ -601,90 +615,103 @@ protected:
/// UF x VF scalar values in the new loop. UF and VF are the unroll and
/// vectorization factors, respectively.
///
- /// Entries can be added to either map with initVector and initScalar, which
- /// initialize and return a constant reference to the new entry. If a
- /// non-constant reference to a vector entry is required, getVector can be
- /// used to retrieve a mutable entry. We currently directly modify the mapped
- /// values during "fix-up" operations that occur once the first phase of
- /// widening is complete. These operations include type truncation and the
- /// second phase of recurrence widening.
+ /// Entries can be added to either map with setVectorValue and setScalarValue,
+ /// which assert that an entry was not already added before. If an entry is to
+ /// replace an existing one, call resetVectorValue. This is currently needed
+ /// to modify the mapped values during "fix-up" operations that occur once the
+ /// first phase of widening is complete. These operations include type
+ /// truncation and the second phase of recurrence widening.
///
- /// Otherwise, entries from either map should be accessed using the
- /// getVectorValue or getScalarValue functions from InnerLoopVectorizer.
- /// getVectorValue and getScalarValue coordinate to generate a vector or
- /// scalar value on-demand if one is not yet available. When vectorizing a
- /// loop, we visit the definition of an instruction before its uses. When
- /// visiting the definition, we either vectorize or scalarize the
- /// instruction, creating an entry for it in the corresponding map. (In some
- /// cases, such as induction variables, we will create both vector and scalar
- /// entries.) Then, as we encounter uses of the definition, we derive values
- /// for each scalar or vector use unless such a value is already available.
- /// For example, if we scalarize a definition and one of its uses is vector,
- /// we build the required vector on-demand with an insertelement sequence
- /// when visiting the use. Otherwise, if the use is scalar, we can use the
- /// existing scalar definition.
+ /// Entries from either map can be retrieved using the getVectorValue and
+ /// getScalarValue functions, which assert that the desired value exists.
+
struct ValueMap {
/// Construct an empty map with the given unroll and vectorization factors.
- ValueMap(unsigned UnrollFactor, unsigned VecWidth)
- : UF(UnrollFactor), VF(VecWidth) {
- // The unroll and vectorization factors are only used in asserts builds
- // to verify map entries are sized appropriately.
- (void)UF;
- (void)VF;
+ ValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {}
+
+ /// \return True if the map has any vector entry for \p Key.
+ bool hasAnyVectorValue(Value *Key) const {
+ return VectorMapStorage.count(Key);
+ }
+
+ /// \return True if the map has a vector entry for \p Key and \p Part.
+ bool hasVectorValue(Value *Key, unsigned Part) const {
+ assert(Part < UF && "Queried Vector Part is too large.");
+ if (!hasAnyVectorValue(Key))
+ return false;
+ const VectorParts &Entry = VectorMapStorage.find(Key)->second;
+ assert(Entry.size() == UF && "VectorParts has wrong dimensions.");
+ return Entry[Part] != nullptr;
}
- /// \return True if the map has a vector entry for \p Key.
- bool hasVector(Value *Key) const { return VectorMapStorage.count(Key); }
-
- /// \return True if the map has a scalar entry for \p Key.
- bool hasScalar(Value *Key) const { return ScalarMapStorage.count(Key); }
-
- /// \brief Map \p Key to the given VectorParts \p Entry, and return a
- /// constant reference to the new vector map entry. The given key should
- /// not already be in the map, and the given VectorParts should be
- /// correctly sized for the current unroll factor.
- const VectorParts &initVector(Value *Key, const VectorParts &Entry) {
- assert(!hasVector(Key) && "Vector entry already initialized");
- assert(Entry.size() == UF && "VectorParts has wrong dimensions");
- VectorMapStorage[Key] = Entry;
- return VectorMapStorage[Key];
+ /// \return True if the map has any scalar entry for \p Key.
+ bool hasAnyScalarValue(Value *Key) const {
+ return ScalarMapStorage.count(Key);
}
- /// \brief Map \p Key to the given ScalarParts \p Entry, and return a
- /// constant reference to the new scalar map entry. The given key should
- /// not already be in the map, and the given ScalarParts should be
- /// correctly sized for the current unroll and vectorization factors.
- const ScalarParts &initScalar(Value *Key, const ScalarParts &Entry) {
- assert(!hasScalar(Key) && "Scalar entry already initialized");
- assert(Entry.size() == UF &&
- all_of(make_range(Entry.begin(), Entry.end()),
- [&](const SmallVectorImpl<Value *> &Values) -> bool {
- return Values.size() == VF;
- }) &&
- "ScalarParts has wrong dimensions");
- ScalarMapStorage[Key] = Entry;
- return ScalarMapStorage[Key];
+ /// \return True if the map has a scalar entry for \p Key, \p Part and
+ /// \p Part.
+ bool hasScalarValue(Value *Key, unsigned Part, unsigned Lane) const {
+ assert(Part < UF && "Queried Scalar Part is too large.");
+ assert(Lane < VF && "Queried Scalar Lane is too large.");
+ if (!hasAnyScalarValue(Key))
+ return false;
+ const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
+ assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
+ assert(Entry[Part].size() == VF && "ScalarParts has wrong dimensions.");
+ return Entry[Part][Lane] != nullptr;
}
- /// \return A reference to the vector map entry corresponding to \p Key.
- /// The key should already be in the map. This function should only be used
- /// when it's necessary to update values that have already been vectorized.
- /// This is the case for "fix-up" operations including type truncation and
- /// the second phase of recurrence vectorization. If a non-const reference
- /// isn't required, getVectorValue should be used instead.
- VectorParts &getVector(Value *Key) {
- assert(hasVector(Key) && "Vector entry not initialized");
- return VectorMapStorage.find(Key)->second;
+ /// Retrieve the existing vector value that corresponds to \p Key and
+ /// \p Part.
+ Value *getVectorValue(Value *Key, unsigned Part) {
+ assert(hasVectorValue(Key, Part) && "Getting non-existent value.");
+ return VectorMapStorage[Key][Part];
}
- /// Retrieve an entry from the vector or scalar maps. The preferred way to
- /// access an existing mapped entry is with getVectorValue or
- /// getScalarValue from InnerLoopVectorizer. Until those functions can be
- /// moved inside ValueMap, we have to declare them as friends.
- friend const VectorParts &InnerLoopVectorizer::getVectorValue(Value *V);
- friend Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part,
- unsigned Lane);
+ /// Retrieve the existing scalar value that corresponds to \p Key, \p Part
+ /// and \p Lane.
+ Value *getScalarValue(Value *Key, unsigned Part, unsigned Lane) {
+ assert(hasScalarValue(Key, Part, Lane) && "Getting non-existent value.");
+ return ScalarMapStorage[Key][Part][Lane];
+ }
+
+ /// Set a vector value associated with \p Key and \p Part. Assumes such a
+ /// value is not already set. If it is, use resetVectorValue() instead.
+ void setVectorValue(Value *Key, unsigned Part, Value *Vector) {
+ assert(!hasVectorValue(Key, Part) && "Vector value already set for part");
+ if (!VectorMapStorage.count(Key)) {
+ VectorParts Entry(UF);
+ VectorMapStorage[Key] = Entry;
+ }
+ VectorMapStorage[Key][Part] = Vector;
+ }
+
+ /// Set a scalar value associated with \p Key for \p Part and \p Lane.
+ /// Assumes such a value is not already set.
+ void setScalarValue(Value *Key, unsigned Part, unsigned Lane,
+ Value *Scalar) {
+ assert(!hasScalarValue(Key, Part, Lane) && "Scalar value already set");
+ if (!ScalarMapStorage.count(Key)) {
+ ScalarParts Entry(UF);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ Entry[Part].resize(VF, nullptr);
+ // TODO: Consider storing uniform values only per-part, as they occupy
+ // lane 0 only, keeping the other VF-1 redundant entries null.
+ ScalarMapStorage[Key] = Entry;
+ }
+ ScalarMapStorage[Key][Part][Lane] = Scalar;
+ }
+
+ /// Reset the vector value associated with \p Key for the given \p Part.
+ /// This function can be used to update values that have already been
+ /// vectorized. This is the case for "fix-up" operations including type
+ /// truncation and the second phase of recurrence vectorization.
+ void resetVectorValue(Value *Key, unsigned Part, Value *Vector) {
+ assert(hasVectorValue(Key, Part) && "Vector value not set for part");
+ VectorMapStorage[Key][Part] = Vector;
+ }
private:
/// The unroll factor. Each entry in the vector map contains UF vector
@@ -1577,6 +1604,9 @@ public:
/// Return the first-order recurrences found in the loop.
RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
+ /// Return the set of instructions to sink to handle first-order recurrences.
+ DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
+
/// Returns the widest induction type.
Type *getWidestInductionType() { return WidestIndTy; }
@@ -1779,6 +1809,9 @@ private:
InductionList Inductions;
/// Holds the phi nodes that are first-order recurrences.
RecurrenceSet FirstOrderRecurrences;
+ /// Holds instructions that need to sink past other instructions to handle
+ /// first-order recurrences.
+ DenseMap<Instruction *, Instruction *> SinkAfter;
/// Holds the widest induction type encountered.
Type *WidestIndTy;
@@ -2417,15 +2450,13 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind",
&*LoopVectorBody->getFirstInsertionPt());
Instruction *LastInduction = VecInd;
- VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Entry[Part] = LastInduction;
+ VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction);
+ if (isa<TruncInst>(EntryVal))
+ addMetadata(LastInduction, EntryVal);
LastInduction = cast<Instruction>(addFastMathFlag(
Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
}
- VectorLoopValueMap.initVector(EntryVal, Entry);
- if (isa<TruncInst>(EntryVal))
- addMetadata(Entry, EntryVal);
// Move the last step to the end of the latch block. This ensures consistent
// placement of all induction updates.
@@ -2531,13 +2562,13 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
// induction variable, and build the necessary step vectors.
if (!VectorizedIV) {
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
- VectorParts Entry(UF);
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] =
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *EntryPart =
getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
- VectorLoopValueMap.initVector(EntryVal, Entry);
- if (Trunc)
- addMetadata(Entry, Trunc);
+ VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
+ if (Trunc)
+ addMetadata(EntryPart, Trunc);
+ }
}
// If an induction variable is only used for counting loop iterations or
@@ -2637,17 +2668,14 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), VF) ? 1 : VF;
// Compute the scalar steps and save the results in VectorLoopValueMap.
- ScalarParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Entry[Part].resize(VF);
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
auto *StartIdx = getSignedIntOrFpConstant(ScalarIVTy, VF * Part + Lane);
auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
- Entry[Part][Lane] = Add;
+ VectorLoopValueMap.setScalarValue(EntryVal, Part, Lane, Add);
}
}
- VectorLoopValueMap.initScalar(EntryVal, Entry);
}
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
@@ -2665,8 +2693,7 @@ bool LoopVectorizationLegality::isUniform(Value *V) {
return LAI->isUniform(V);
}
-const InnerLoopVectorizer::VectorParts &
-InnerLoopVectorizer::getVectorValue(Value *V) {
+Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
assert(V != Induction && "The new induction variable should not be used.");
assert(!V->getType()->isVectorTy() && "Can't widen a vector");
assert(!V->getType()->isVoidTy() && "Type does not produce a value");
@@ -2675,17 +2702,16 @@ InnerLoopVectorizer::getVectorValue(Value *V) {
if (Legal->hasStride(V))
V = ConstantInt::get(V->getType(), 1);
- // If we have this scalar in the map, return it.
- if (VectorLoopValueMap.hasVector(V))
- return VectorLoopValueMap.VectorMapStorage[V];
+ // If we have a vector mapped to this value, return it.
+ if (VectorLoopValueMap.hasVectorValue(V, Part))
+ return VectorLoopValueMap.getVectorValue(V, Part);
// If the value has not been vectorized, check if it has been scalarized
// instead. If it has been scalarized, and we actually need the value in
// vector form, we will construct the vector values on demand.
- if (VectorLoopValueMap.hasScalar(V)) {
+ if (VectorLoopValueMap.hasAnyScalarValue(V)) {
- // Initialize a new vector map entry.
- VectorParts Entry(UF);
+ Value *ScalarValue = VectorLoopValueMap.getScalarValue(V, Part, 0);
// If we've scalarized a value, that value should be an instruction.
auto *I = cast<Instruction>(V);
@@ -2693,17 +2719,17 @@ InnerLoopVectorizer::getVectorValue(Value *V) {
// If we aren't vectorizing, we can just copy the scalar map values over to
// the vector map.
if (VF == 1) {
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] = getScalarValue(V, Part, 0);
- return VectorLoopValueMap.initVector(V, Entry);
+ VectorLoopValueMap.setVectorValue(V, Part, ScalarValue);
+ return ScalarValue;
}
- // Get the last scalar instruction we generated for V. If the value is
- // known to be uniform after vectorization, this corresponds to lane zero
- // of the last unroll iteration. Otherwise, the last instruction is the one
- // we created for the last vector lane of the last unroll iteration.
+ // Get the last scalar instruction we generated for V and Part. If the value
+ // is known to be uniform after vectorization, this corresponds to lane zero
+ // of the Part unroll iteration. Otherwise, the last instruction is the one
+ // we created for the last vector lane of the Part unroll iteration.
unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) ? 0 : VF - 1;
- auto *LastInst = cast<Instruction>(getScalarValue(V, UF - 1, LastLane));
+ auto *LastInst =
+ cast<Instruction>(VectorLoopValueMap.getScalarValue(V, Part, LastLane));
// Set the insert point after the last scalarized instruction. This ensures
// the insertelement sequence will directly follow the scalar definitions.
@@ -2717,52 +2743,50 @@ InnerLoopVectorizer::getVectorValue(Value *V) {
// iteration. Otherwise, we construct the vector values using insertelement
// instructions. Since the resulting vectors are stored in
// VectorLoopValueMap, we will only generate the insertelements once.
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *VectorValue = nullptr;
- if (Cost->isUniformAfterVectorization(I, VF)) {
- VectorValue = getBroadcastInstrs(getScalarValue(V, Part, 0));
- } else {
- VectorValue = UndefValue::get(VectorType::get(V->getType(), VF));
- for (unsigned Lane = 0; Lane < VF; ++Lane)
- VectorValue = Builder.CreateInsertElement(
- VectorValue, getScalarValue(V, Part, Lane),
- Builder.getInt32(Lane));
- }
- Entry[Part] = VectorValue;
+ Value *VectorValue = nullptr;
+ if (Cost->isUniformAfterVectorization(I, VF)) {
+ VectorValue = getBroadcastInstrs(ScalarValue);
+ } else {
+ VectorValue = UndefValue::get(VectorType::get(V->getType(), VF));
+ for (unsigned Lane = 0; Lane < VF; ++Lane)
+ VectorValue = Builder.CreateInsertElement(
+ VectorValue, getOrCreateScalarValue(V, Part, Lane),
+ Builder.getInt32(Lane));
}
+ VectorLoopValueMap.setVectorValue(V, Part, VectorValue);
Builder.restoreIP(OldIP);
- return VectorLoopValueMap.initVector(V, Entry);
+ return VectorValue;
}
// If this scalar is unknown, assume that it is a constant or that it is
// loop invariant. Broadcast V and save the value for future uses.
Value *B = getBroadcastInstrs(V);
- return VectorLoopValueMap.initVector(V, VectorParts(UF, B));
+ VectorLoopValueMap.setVectorValue(V, Part, B);
+ return B;
}
-Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part,
- unsigned Lane) {
+Value *InnerLoopVectorizer::getOrCreateScalarValue(Value *V, unsigned Part,
+ unsigned Lane) {
// If the value is not an instruction contained in the loop, it should
// already be scalar.
if (OrigLoop->isLoopInvariant(V))
return V;
- assert(Lane > 0 ?
- !Cost->isUniformAfterVectorization(cast<Instruction>(V), VF)
- : true && "Uniform values only have lane zero");
+ assert(Lane > 0 ? !Cost->isUniformAfterVectorization(cast<Instruction>(V), VF)
+ : true && "Uniform values only have lane zero");
// If the value from the original loop has not been vectorized, it is
// represented by UF x VF scalar values in the new loop. Return the requested
// scalar value.
- if (VectorLoopValueMap.hasScalar(V))
- return VectorLoopValueMap.ScalarMapStorage[V][Part][Lane];
+ if (VectorLoopValueMap.hasScalarValue(V, Part, Lane))
+ return VectorLoopValueMap.getScalarValue(V, Part, Lane);
// If the value has not been scalarized, get its entry in VectorLoopValueMap
// for the given unroll part. If this entry is not a vector type (i.e., the
// vectorization factor is one), there is no need to generate an
// extractelement instruction.
- auto *U = getVectorValue(V)[Part];
+ auto *U = getOrCreateVectorValue(V, Part);
if (!U->getType()->isVectorTy()) {
assert(VF == 1 && "Value not scalarized has non-vector type");
return U;
@@ -2844,7 +2868,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
Index += (VF - 1) * Group->getFactor();
for (unsigned Part = 0; Part < UF; Part++) {
- Value *NewPtr = getScalarValue(Ptr, Part, 0);
+ Value *NewPtr = getOrCreateScalarValue(Ptr, Part, 0);
// Notice current instruction could be any index. Need to adjust the address
// to the member of index 0.
@@ -2887,7 +2911,6 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
if (!Member)
continue;
- VectorParts Entry(UF);
Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF);
for (unsigned Part = 0; Part < UF; Part++) {
Value *StridedVec = Builder.CreateShuffleVector(
@@ -2899,10 +2922,11 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
StridedVec = Builder.CreateBitOrPointerCast(StridedVec, OtherVTy);
}
- Entry[Part] =
- Group->isReverse() ? reverseVector(StridedVec) : StridedVec;
+ if (Group->isReverse())
+ StridedVec = reverseVector(StridedVec);
+
+ VectorLoopValueMap.setVectorValue(Member, Part, StridedVec);
}
- VectorLoopValueMap.initVector(Member, Entry);
}
return;
}
@@ -2919,8 +2943,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
Instruction *Member = Group->getMember(i);
assert(Member && "Fail to get a member from an interleaved store group");
- Value *StoredVec =
- getVectorValue(cast<StoreInst>(Member)->getValueOperand())[Part];
+ Value *StoredVec = getOrCreateVectorValue(
+ cast<StoreInst>(Member)->getValueOperand(), Part);
if (Group->isReverse())
StoredVec = reverseVector(StoredVec);
@@ -2981,16 +3005,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
bool CreateGatherScatter =
(Decision == LoopVectorizationCostModel::CM_GatherScatter);
- VectorParts VectorGep;
+ // Either Ptr feeds a vector load/store, or a vector GEP should feed a vector
+ // gather/scatter. Otherwise Decision should have been to Scalarize.
+ assert((ConsecutiveStride || CreateGatherScatter) &&
+ "The instruction should be scalarized");
// Handle consecutive loads/stores.
- if (ConsecutiveStride) {
- Ptr = getScalarValue(Ptr, 0, 0);
- } else {
- // At this point we should vector version of GEP for Gather or Scatter
- assert(CreateGatherScatter && "The instruction should be scalarized");
- VectorGep = getVectorValue(Ptr);
- }
+ if (ConsecutiveStride)
+ Ptr = getOrCreateScalarValue(Ptr, 0, 0);
VectorParts Mask = createBlockInMask(Instr->getParent());
// Handle Stores:
@@ -2998,16 +3020,15 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
assert(!Legal->isUniform(SI->getPointerOperand()) &&
"We do not allow storing to uniform addresses");
setDebugLocFromInst(Builder, SI);
- // We don't want to update the value in the map as it might be used in
- // another expression. So don't use a reference type for "StoredVal".
- VectorParts StoredVal = getVectorValue(SI->getValueOperand());
for (unsigned Part = 0; Part < UF; ++Part) {
Instruction *NewSI = nullptr;
+ Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part);
if (CreateGatherScatter) {
Value *MaskPart = Legal->isMaskRequired(SI) ? Mask[Part] : nullptr;
- NewSI = Builder.CreateMaskedScatter(StoredVal[Part], VectorGep[Part],
- Alignment, MaskPart);
+ Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
+ NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
+ MaskPart);
} else {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr =
@@ -3016,7 +3037,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
if (Reverse) {
// If we store to reverse consecutive memory locations, then we need
// to reverse the order of elements in the stored value.
- StoredVal[Part] = reverseVector(StoredVal[Part]);
+ StoredVal = reverseVector(StoredVal);
+ // We don't want to update the value in the map as it might be used in
+ // another expression. So don't call resetVectorValue(StoredVal).
+
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
PartPtr =
@@ -3030,11 +3054,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
if (Legal->isMaskRequired(SI))
- NewSI = Builder.CreateMaskedStore(StoredVal[Part], VecPtr, Alignment,
+ NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
Mask[Part]);
else
- NewSI =
- Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment);
+ NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
}
addMetadata(NewSI, SI);
}
@@ -3044,14 +3067,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// Handle loads.
assert(LI && "Must have a load instruction");
setDebugLocFromInst(Builder, LI);
- VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Instruction *NewLI;
+ Value *NewLI;
if (CreateGatherScatter) {
Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr;
- NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, MaskPart,
+ Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
+ NewLI = Builder.CreateMaskedGather(VectorGep, Alignment, MaskPart,
nullptr, "wide.masked.gather");
- Entry[Part] = NewLI;
+ addMetadata(NewLI, LI);
} else {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr =
@@ -3073,11 +3096,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
"wide.masked.load");
else
NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
- Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI;
+
+ // Add metadata to the load, but setVectorValue to the reverse shuffle.
+ addMetadata(NewLI, LI);
+ if (Reverse)
+ NewLI = reverseVector(NewLI);
}
- addMetadata(NewLI, LI);
+ VectorLoopValueMap.setVectorValue(Instr, Part, NewLI);
}
- VectorLoopValueMap.initVector(Instr, Entry);
}
void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
@@ -3094,9 +3120,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
- // Initialize a new scalar map entry.
- ScalarParts Entry(UF);
-
VectorParts Cond;
if (IfPredicateInstr)
Cond = createBlockInMask(Instr->getParent());
@@ -3108,7 +3131,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
// For each vector unroll 'part':
for (unsigned Part = 0; Part < UF; ++Part) {
- Entry[Part].resize(VF);
// For each scalar that we create:
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
@@ -3129,7 +3151,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
// Replace the operands of the cloned instructions with their scalar
// equivalents in the new loop.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
- auto *NewOp = getScalarValue(Instr->getOperand(op), Part, Lane);
+ auto *NewOp = getOrCreateScalarValue(Instr->getOperand(op), Part, Lane);
Cloned->setOperand(op, NewOp);
}
addNewMetadata(Cloned, Instr);
@@ -3138,7 +3160,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
Builder.Insert(Cloned);
// Add the cloned scalar to the scalar map entry.
- Entry[Part][Lane] = Cloned;
+ VectorLoopValueMap.setScalarValue(Instr, Part, Lane, Cloned);
// If we just cloned a new assumption, add it the assumption cache.
if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
@@ -3150,7 +3172,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp));
}
}
- VectorLoopValueMap.initScalar(Instr, Entry);
}
PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start,
@@ -3786,10 +3807,10 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
// If the value wasn't vectorized, we must maintain the original scalar
// type. The absence of the value from VectorLoopValueMap indicates that it
// wasn't vectorized.
- if (!VectorLoopValueMap.hasVector(KV.first))
+ if (!VectorLoopValueMap.hasAnyVectorValue(KV.first))
continue;
- VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
- for (Value *&I : Parts) {
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *I = getOrCreateVectorValue(KV.first, Part);
if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))
continue;
Type *OriginalTy = I->getType();
@@ -3878,7 +3899,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
I->replaceAllUsesWith(Res);
cast<Instruction>(I)->eraseFromParent();
Erased.insert(I);
- I = Res;
+ VectorLoopValueMap.resetVectorValue(KV.first, Part, Res);
}
}
@@ -3887,15 +3908,15 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
// If the value wasn't vectorized, we must maintain the original scalar
// type. The absence of the value from VectorLoopValueMap indicates that it
// wasn't vectorized.
- if (!VectorLoopValueMap.hasVector(KV.first))
+ if (!VectorLoopValueMap.hasAnyVectorValue(KV.first))
continue;
- VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
- for (Value *&I : Parts) {
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *I = getOrCreateVectorValue(KV.first, Part);
ZExtInst *Inst = dyn_cast<ZExtInst>(I);
if (Inst && Inst->use_empty()) {
Value *NewI = Inst->getOperand(0);
Inst->eraseFromParent();
- I = NewI;
+ VectorLoopValueMap.resetVectorValue(KV.first, Part, NewI);
}
}
}
@@ -4025,28 +4046,29 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// We constructed a temporary phi node in the first phase of vectorization.
// This phi node will eventually be deleted.
- VectorParts &PhiParts = VectorLoopValueMap.getVector(Phi);
- Builder.SetInsertPoint(cast<Instruction>(PhiParts[0]));
+ Builder.SetInsertPoint(
+ cast<Instruction>(VectorLoopValueMap.getVectorValue(Phi, 0)));
// Create a phi node for the new recurrence. The current value will either be
// the initial value inserted into a vector or loop-varying vector value.
auto *VecPhi = Builder.CreatePHI(VectorInit->getType(), 2, "vector.recur");
VecPhi->addIncoming(VectorInit, LoopVectorPreHeader);
- // Get the vectorized previous value.
- auto &PreviousParts = getVectorValue(Previous);
+ // Get the vectorized previous value of the last part UF - 1. It appears last
+ // among all unrolled iterations, due to the order of their construction.
+ Value *PreviousLastPart = getOrCreateVectorValue(Previous, UF - 1);
// Set the insertion point after the previous value if it is an instruction.
// Note that the previous value may have been constant-folded so it is not
// guaranteed to be an instruction in the vector loop. Also, if the previous
// value is a phi node, we should insert after all the phi nodes to avoid
// breaking basic block verification.
- if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousParts[UF - 1]) ||
- isa<PHINode>(PreviousParts[UF - 1]))
+ if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart) ||
+ isa<PHINode>(PreviousLastPart))
Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt());
else
Builder.SetInsertPoint(
- &*++BasicBlock::iterator(cast<Instruction>(PreviousParts[UF - 1])));
+ &*++BasicBlock::iterator(cast<Instruction>(PreviousLastPart)));
// We will construct a vector for the recurrence by combining the values for
// the current and previous iterations. This is the required shuffle mask.
@@ -4061,15 +4083,16 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// Shuffle the current and previous vector and update the vector parts.
for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *PreviousPart = getOrCreateVectorValue(Previous, Part);
+ Value *PhiPart = VectorLoopValueMap.getVectorValue(Phi, Part);
auto *Shuffle =
- VF > 1
- ? Builder.CreateShuffleVector(Incoming, PreviousParts[Part],
- ConstantVector::get(ShuffleMask))
- : Incoming;
- PhiParts[Part]->replaceAllUsesWith(Shuffle);
- cast<Instruction>(PhiParts[Part])->eraseFromParent();
- PhiParts[Part] = Shuffle;
- Incoming = PreviousParts[Part];
+ VF > 1 ? Builder.CreateShuffleVector(Incoming, PreviousPart,
+ ConstantVector::get(ShuffleMask))
+ : Incoming;
+ PhiPart->replaceAllUsesWith(Shuffle);
+ cast<Instruction>(PhiPart)->eraseFromParent();
+ VectorLoopValueMap.resetVectorValue(Phi, Part, Shuffle);
+ Incoming = PreviousPart;
}
// Fix the latch value of the new recurrence in the vector loop.
@@ -4097,7 +4120,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// `Incoming`. This is analogous to the vectorized case above: extracting the
// second last element when VF > 1.
else if (UF > 1)
- ExtractForPhiUsedOutsideLoop = PreviousParts[UF - 2];
+ ExtractForPhiUsedOutsideLoop = getOrCreateVectorValue(Previous, UF - 2);
// Fix the initial value of the original recurrence in the scalar loop.
Builder.SetInsertPoint(&*LoopScalarPreHeader->begin());
@@ -4148,8 +4171,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
// This is the vector-clone of the value that leaves the loop.
- const VectorParts &VectorExit = getVectorValue(LoopExitInst);
- Type *VecTy = VectorExit[0]->getType();
+ Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType();
// Find the reduction identity variable. Zero for addition, or, xor,
// one for multiplication, -1 for And.
@@ -4187,18 +4209,17 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// Reductions do not have to start at zero. They can start with
// any loop invariant values.
- const VectorParts &VecRdxPhi = getVectorValue(Phi);
BasicBlock *Latch = OrigLoop->getLoopLatch();
Value *LoopVal = Phi->getIncomingValueForBlock(Latch);
- const VectorParts &Val = getVectorValue(LoopVal);
- for (unsigned part = 0; part < UF; ++part) {
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *VecRdxPhi = getOrCreateVectorValue(Phi, Part);
+ Value *Val = getOrCreateVectorValue(LoopVal, Part);
// Make sure to add the reduction stat value only to the
// first unroll part.
- Value *StartVal = (part == 0) ? VectorStart : Identity;
- cast<PHINode>(VecRdxPhi[part])
- ->addIncoming(StartVal, LoopVectorPreHeader);
- cast<PHINode>(VecRdxPhi[part])
- ->addIncoming(Val[part], LI->getLoopFor(LoopVectorBody)->getLoopLatch());
+ Value *StartVal = (Part == 0) ? VectorStart : Identity;
+ cast<PHINode>(VecRdxPhi)->addIncoming(StartVal, LoopVectorPreHeader);
+ cast<PHINode>(VecRdxPhi)
+ ->addIncoming(Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
}
// Before each round, move the insertion point right between
@@ -4207,7 +4228,6 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// instructions.
Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
- VectorParts &RdxParts = VectorLoopValueMap.getVector(LoopExitInst);
setDebugLocFromInst(Builder, LoopExitInst);
// If the vector reduction can be performed in a smaller type, we truncate
@@ -4216,37 +4236,42 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) {
Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
Builder.SetInsertPoint(LoopVectorBody->getTerminator());
- for (unsigned part = 0; part < UF; ++part) {
- Value *Trunc = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
+ VectorParts RdxParts(UF);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ RdxParts[Part] = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
+ Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
- : Builder.CreateZExt(Trunc, VecTy);
- for (Value::user_iterator UI = RdxParts[part]->user_begin();
- UI != RdxParts[part]->user_end();)
+ : Builder.CreateZExt(Trunc, VecTy);
+ for (Value::user_iterator UI = RdxParts[Part]->user_begin();
+ UI != RdxParts[Part]->user_end();)
if (*UI != Trunc) {
- (*UI++)->replaceUsesOfWith(RdxParts[part], Extnd);
- RdxParts[part] = Extnd;
+ (*UI++)->replaceUsesOfWith(RdxParts[Part], Extnd);
+ RdxParts[Part] = Extnd;
} else {
++UI;
}
}
Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
- for (unsigned part = 0; part < UF; ++part)
- RdxParts[part] = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
+ VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, RdxParts[Part]);
+ }
}
// Reduce all of the unrolled parts into a single vector.
- Value *ReducedPartRdx = RdxParts[0];
+ Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0);
unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
setDebugLocFromInst(Builder, ReducedPartRdx);
- for (unsigned part = 1; part < UF; ++part) {
+ for (unsigned Part = 1; Part < UF; ++Part) {
+ Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
// Floating point operations had to be 'fast' to enable the reduction.
ReducedPartRdx = addFastMathFlag(
- Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxParts[part],
+ Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,
ReducedPartRdx, "bin.rdx"));
else
ReducedPartRdx = RecurrenceDescriptor::createMinMaxOp(
- Builder, MinMaxKind, ReducedPartRdx, RdxParts[part]);
+ Builder, MinMaxKind, ReducedPartRdx, RdxPart);
}
if (VF > 1) {
@@ -4518,14 +4543,16 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
assert(BI && "Unexpected terminator found");
if (BI->isConditional()) {
- VectorParts EdgeMask = getVectorValue(BI->getCondition());
- if (BI->getSuccessor(0) != Dst)
- for (unsigned part = 0; part < UF; ++part)
- EdgeMask[part] = Builder.CreateNot(EdgeMask[part]);
+ VectorParts EdgeMask(UF);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ auto *EdgeMaskPart = getOrCreateVectorValue(BI->getCondition(), Part);
+ if (BI->getSuccessor(0) != Dst)
+ EdgeMaskPart = Builder.CreateNot(EdgeMaskPart);
- for (unsigned part = 0; part < UF; ++part)
- EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]);
+ EdgeMaskPart = Builder.CreateAnd(EdgeMaskPart, SrcMask[Part]);
+ EdgeMask[Part] = EdgeMaskPart;
+ }
EdgeMaskCache[Edge] = EdgeMask;
return EdgeMask;
@@ -4544,23 +4571,27 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
if (BCEntryIt != BlockMaskCache.end())
return BCEntryIt->second;
+ VectorParts BlockMask(UF);
+
// Loop incoming mask is all-one.
if (OrigLoop->getHeader() == BB) {
Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1);
- const VectorParts &BlockMask = getVectorValue(C);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ BlockMask[Part] = getOrCreateVectorValue(C, Part);
BlockMaskCache[BB] = BlockMask;
return BlockMask;
}
// This is the block mask. We OR all incoming edges, and with zero.
Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0);
- VectorParts BlockMask = getVectorValue(Zero);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ BlockMask[Part] = getOrCreateVectorValue(Zero, Part);
// For each pred:
- for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) {
- VectorParts EM = createEdgeMask(*it, BB);
- for (unsigned part = 0; part < UF; ++part)
- BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]);
+ for (pred_iterator It = pred_begin(BB), E = pred_end(BB); It != E; ++It) {
+ VectorParts EM = createEdgeMask(*It, BB);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ BlockMask[Part] = Builder.CreateOr(BlockMask[Part], EM[Part]);
}
BlockMaskCache[BB] = BlockMask;
@@ -4575,15 +4606,14 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
// this value when we vectorize all of the instructions that use the PHI.
if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) {
- VectorParts Entry(UF);
- for (unsigned part = 0; part < UF; ++part) {
+ for (unsigned Part = 0; Part < UF; ++Part) {
// This is phase one of vectorizing PHIs.
Type *VecTy =
(VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
- Entry[part] = PHINode::Create(
+ Value *EntryPart = PHINode::Create(
VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
+ VectorLoopValueMap.setVectorValue(P, Part, EntryPart);
}
- VectorLoopValueMap.initVector(P, Entry);
return;
}
@@ -4607,21 +4637,22 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
for (unsigned In = 0; In < NumIncoming; In++) {
VectorParts Cond =
createEdgeMask(P->getIncomingBlock(In), P->getParent());
- const VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
- for (unsigned part = 0; part < UF; ++part) {
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *In0 = getOrCreateVectorValue(P->getIncomingValue(In), Part);
// We might have single edge PHIs (blocks) - use an identity
// 'select' for the first PHI operand.
if (In == 0)
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In0[part]);
+ Entry[Part] = Builder.CreateSelect(Cond[Part], In0, In0);
else
// Select between the current value and the previous incoming edge
// based on the incoming mask.
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part], Entry[part],
+ Entry[Part] = Builder.CreateSelect(Cond[Part], In0, Entry[Part],
"predphi");
}
}
- VectorLoopValueMap.initVector(P, Entry);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ VectorLoopValueMap.setVectorValue(P, Part, Entry[Part]);
return;
}
@@ -4652,18 +4683,15 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF;
// These are the scalar results. Notice that we don't generate vector GEPs
// because scalar GEPs result in better code.
- ScalarParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Entry[Part].resize(VF);
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF);
Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
SclrGep->setName("next.gep");
- Entry[Part][Lane] = SclrGep;
+ VectorLoopValueMap.setScalarValue(P, Part, Lane, SclrGep);
}
}
- VectorLoopValueMap.initScalar(P, Entry);
return;
}
}
@@ -4713,7 +4741,6 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
// is vector-typed. Thus, to keep the representation compact, we only use
// vector-typed operands for loop-varying values.
auto *GEP = cast<GetElementPtrInst>(&I);
- VectorParts Entry(UF);
if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) {
// If we are vectorizing, but the GEP has only loop-invariant operands,
@@ -4729,8 +4756,11 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
// collectLoopScalars() and teach getVectorValue() to broadcast
// the lane-zero scalar value.
auto *Clone = Builder.Insert(GEP->clone());
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] = Builder.CreateVectorSplat(VF, Clone);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
+ VectorLoopValueMap.setVectorValue(&I, Part, EntryPart);
+ addMetadata(EntryPart, GEP);
+ }
} else {
// If the GEP has at least one loop-varying operand, we are sure to
// produce a vector of pointers. But if we are only unrolling, we want
@@ -4743,9 +4773,10 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
// The pointer operand of the new GEP. If it's loop-invariant, we
// won't broadcast it.
- auto *Ptr = OrigLoop->isLoopInvariant(GEP->getPointerOperand())
- ? GEP->getPointerOperand()
- : getVectorValue(GEP->getPointerOperand())[Part];
+ auto *Ptr =
+ OrigLoop->isLoopInvariant(GEP->getPointerOperand())
+ ? GEP->getPointerOperand()
+ : getOrCreateVectorValue(GEP->getPointerOperand(), Part);
// Collect all the indices for the new GEP. If any index is
// loop-invariant, we won't broadcast it.
@@ -4754,7 +4785,7 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
if (OrigLoop->isLoopInvariant(U.get()))
Indices.push_back(U.get());
else
- Indices.push_back(getVectorValue(U.get())[Part]);
+ Indices.push_back(getOrCreateVectorValue(U.get(), Part));
}
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
@@ -4764,12 +4795,11 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
: Builder.CreateGEP(Ptr, Indices);
assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
- Entry[Part] = NewGEP;
+ VectorLoopValueMap.setVectorValue(&I, Part, NewGEP);
+ addMetadata(NewGEP, GEP);
}
}
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, GEP);
break;
}
case Instruction::UDiv:
@@ -4800,22 +4830,20 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
// Just widen binops.
auto *BinOp = cast<BinaryOperator>(&I);
setDebugLocFromInst(Builder, BinOp);
- const VectorParts &A = getVectorValue(BinOp->getOperand(0));
- const VectorParts &B = getVectorValue(BinOp->getOperand(1));
- // Use this vector value for all users of the original instruction.
- VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]);
+ Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);
+ Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
VecOp->copyIRFlags(BinOp);
- Entry[Part] = V;
+ // Use this vector value for all users of the original instruction.
+ VectorLoopValueMap.setVectorValue(&I, Part, V);
+ addMetadata(V, BinOp);
}
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, BinOp);
break;
}
case Instruction::Select: {
@@ -4831,20 +4859,19 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
// loop. This means that we can't just use the original 'cond' value.
// We have to take the 'vectorized' value and pick the first lane.
// Instcombine will make this a no-op.
- const VectorParts &Cond = getVectorValue(I.getOperand(0));
- const VectorParts &Op0 = getVectorValue(I.getOperand(1));
- const VectorParts &Op1 = getVectorValue(I.getOperand(2));
- auto *ScalarCond = getScalarValue(I.getOperand(0), 0, 0);
+ auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), 0, 0);
- VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
- Entry[Part] = Builder.CreateSelect(
- InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]);
+ Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part);
+ Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part);
+ Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part);
+ Value *Sel =
+ Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1);
+ VectorLoopValueMap.setVectorValue(&I, Part, Sel);
+ addMetadata(Sel, &I);
}
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, &I);
break;
}
@@ -4854,22 +4881,20 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
bool FCmp = (I.getOpcode() == Instruction::FCmp);
auto *Cmp = dyn_cast<CmpInst>(&I);
setDebugLocFromInst(Builder, Cmp);
- const VectorParts &A = getVectorValue(Cmp->getOperand(0));
- const VectorParts &B = getVectorValue(Cmp->getOperand(1));
- VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *A = getOrCreateVectorValue(Cmp->getOperand(0), Part);
+ Value *B = getOrCreateVectorValue(Cmp->getOperand(1), Part);
Value *C = nullptr;
if (FCmp) {
- C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
+ C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
cast<FCmpInst>(C)->copyFastMathFlags(Cmp);
} else {
- C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
+ C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
}
- Entry[Part] = C;
+ VectorLoopValueMap.setVectorValue(&I, Part, C);
+ addMetadata(C, &I);
}
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, &I);
break;
}
@@ -4906,12 +4931,12 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
Type *DestTy =
(VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
- const VectorParts &A = getVectorValue(CI->getOperand(0));
- VectorParts Entry(UF);
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, &I);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *A = getOrCreateVectorValue(CI->getOperand(0), Part);
+ Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
+ VectorLoopValueMap.setVectorValue(&I, Part, Cast);
+ addMetadata(Cast, &I);
+ }
break;
}
@@ -4949,17 +4974,14 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
break;
}
- VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
SmallVector<Value *, 4> Args;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
Value *Arg = CI->getArgOperand(i);
// Some intrinsics have a scalar argument - don't replace it with a
// vector.
- if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) {
- const VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i));
- Arg = VectorArg[Part];
- }
+ if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i))
+ Arg = getOrCreateVectorValue(CI->getArgOperand(i), Part);
Args.push_back(Arg);
}
@@ -4992,11 +5014,10 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
if (isa<FPMathOperator>(V))
V->copyFastMathFlags(CI);
- Entry[Part] = V;
+ VectorLoopValueMap.setVectorValue(&I, Part, V);
+ addMetadata(V, &I);
}
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, &I);
break;
}
@@ -5363,7 +5384,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
- if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, DT)) {
+ if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
+ SinkAfter, DT)) {
FirstOrderRecurrences.insert(Phi);
continue;
}
@@ -7636,6 +7658,15 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV) {
// 2. Copy and widen instructions from the old loop into the new loop.
+ // Move instructions to handle first-order recurrences.
+ DenseMap<Instruction *, Instruction *> SinkAfter = Legal->getSinkAfter();
+ for (auto &Entry : SinkAfter) {
+ Entry.first->removeFromParent();
+ Entry.first->insertAfter(Entry.second);
+ DEBUG(dbgs() << "Sinking" << *Entry.first << " after" << *Entry.second
+ << " to vectorize a 1st order recurrence.\n");
+ }
+
// Collect instructions from the original loop that will become trivially dead
// in the vectorized loop. We don't need to vectorize these instructions. For
// example, original induction update instructions can become dead because we
@@ -7787,8 +7818,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- // Check the loop for a trip count threshold:
- // do not vectorize loops with a tiny trip count.
+ PredicatedScalarEvolution PSE(*SE, *L);
+
+ // Check if it is legal to vectorize the loop.
+ LoopVectorizationRequirements Requirements(*ORE);
+ LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI, ORE,
+ &Requirements, &Hints);
+ if (!LVL.canVectorize()) {
+ DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
+ emitMissedWarning(F, L, Hints, ORE);
+ return false;
+ }
+
+ // Check the function attributes to find out if this function should be
+ // optimized for size.
+ bool OptForSize =
+ Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
+
+ // Check the loop for a trip count threshold: vectorize loops with a tiny trip
+ // count by optimizing for size, to minimize overheads.
unsigned ExpectedTC = SE->getSmallConstantMaxTripCount(L);
bool HasExpectedTC = (ExpectedTC > 0);
@@ -7802,36 +7850,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) {
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
- << "This loop is not worth vectorizing.");
+ << "This loop is worth vectorizing only if no scalar "
+ << "iteration overheads are incurred.");
if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
else {
DEBUG(dbgs() << "\n");
- ORE->emit(createMissedAnalysis(Hints.vectorizeAnalysisPassName(),
- "NotBeneficial", L)
- << "vectorization is not beneficial "
- "and is not explicitly forced");
- return false;
+ // Loops with a very small trip count are considered for vectorization
+ // under OptForSize, thereby making sure the cost of their loop body is
+ // dominant, free of runtime guards and scalar iteration overheads.
+ OptForSize = true;
}
}
- PredicatedScalarEvolution PSE(*SE, *L);
-
- // Check if it is legal to vectorize the loop.
- LoopVectorizationRequirements Requirements(*ORE);
- LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI, ORE,
- &Requirements, &Hints);
- if (!LVL.canVectorize()) {
- DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
- emitMissedWarning(F, L, Hints, ORE);
- return false;
- }
-
- // Check the function attributes to find out if this function should be
- // optimized for size.
- bool OptForSize =
- Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
-
// Check the function attributes to see if implicit floats are allowed.
// FIXME: This check doesn't seem possibly correct -- what if the loop is
// an integer loop and the vector instructions selected are purely integer
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b267230d3185..b494526369d6 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -173,6 +173,11 @@ static unsigned getAltOpcode(unsigned Op) {
}
}
+/// true if the \p Value is odd, false otherwise.
+static bool isOdd(unsigned Value) {
+ return Value & 1;
+}
+
///\returns bool representing if Opcode \p Op can be part
/// of an alternate sequence which can later be merged as
/// a ShuffleVector instruction.
@@ -190,7 +195,7 @@ static unsigned isAltInst(ArrayRef<Value *> VL) {
unsigned AltOpcode = getAltOpcode(Opcode);
for (int i = 1, e = VL.size(); i < e; i++) {
Instruction *I = dyn_cast<Instruction>(VL[i]);
- if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode))
+ if (!I || I->getOpcode() != (isOdd(i) ? AltOpcode : Opcode))
return 0;
}
return Instruction::ShuffleVector;
@@ -504,7 +509,7 @@ private:
Last->NeedToGather = !Vectorized;
if (Vectorized) {
for (int i = 0, e = VL.size(); i != e; ++i) {
- assert(!ScalarToTreeEntry.count(VL[i]) && "Scalar already in tree!");
+ assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
ScalarToTreeEntry[VL[i]] = idx;
}
} else {
@@ -521,6 +526,20 @@ private:
/// Holds all of the tree entries.
std::vector<TreeEntry> VectorizableTree;
+ TreeEntry *getTreeEntry(Value *V) {
+ auto I = ScalarToTreeEntry.find(V);
+ if (I != ScalarToTreeEntry.end())
+ return &VectorizableTree[I->second];
+ return nullptr;
+ }
+
+ const TreeEntry *getTreeEntry(Value *V) const {
+ auto I = ScalarToTreeEntry.find(V);
+ if (I != ScalarToTreeEntry.end())
+ return &VectorizableTree[I->second];
+ return nullptr;
+ }
+
/// Maps a specific scalar to its tree entry.
SmallDenseMap<Value*, int> ScalarToTreeEntry;
@@ -1048,14 +1067,14 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
for (TreeEntry &EIdx : VectorizableTree) {
TreeEntry *Entry = &EIdx;
+ // No need to handle users of gathered values.
+ if (Entry->NeedToGather)
+ continue;
+
// For each lane:
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
Value *Scalar = Entry->Scalars[Lane];
- // No need to handle users of gathered values.
- if (Entry->NeedToGather)
- continue;
-
// Check if the scalar is externally used as an extra arg.
auto ExtI = ExternallyUsedValues.find(Scalar);
if (ExtI != ExternallyUsedValues.end()) {
@@ -1072,9 +1091,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
continue;
// Skip in-tree scalars that become vectors
- if (ScalarToTreeEntry.count(U)) {
- int Idx = ScalarToTreeEntry[U];
- TreeEntry *UseEntry = &VectorizableTree[Idx];
+ if (TreeEntry *UseEntry = getTreeEntry(U)) {
Value *UseScalar = UseEntry->Scalars[0];
// Some in-tree scalars will remain as scalar in vectorized
// instructions. If that is the case, the one in Lane 0 will
@@ -1083,7 +1100,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
!InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
<< ".\n");
- assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
+ assert(!UseEntry->NeedToGather && "Bad state");
continue;
}
}
@@ -1156,9 +1173,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// Check if this is a duplicate of another entry.
- if (ScalarToTreeEntry.count(VL[0])) {
- int Idx = ScalarToTreeEntry[VL[0]];
- TreeEntry *E = &VectorizableTree[Idx];
+ if (TreeEntry *E = getTreeEntry(VL[0])) {
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n");
if (E->Scalars[i] != VL[i]) {
@@ -1997,7 +2012,7 @@ int BoUpSLP::getSpillCost() {
// Update LiveValues.
LiveValues.erase(PrevInst);
for (auto &J : PrevInst->operands()) {
- if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J))
+ if (isa<Instruction>(&*J) && getTreeEntry(&*J))
LiveValues.insert(cast<Instruction>(&*J));
}
@@ -2393,9 +2408,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
CSEBlocks.insert(Insrt->getParent());
// Add to our 'need-to-extract' list.
- if (ScalarToTreeEntry.count(VL[i])) {
- int Idx = ScalarToTreeEntry[VL[i]];
- TreeEntry *E = &VectorizableTree[Idx];
+ if (TreeEntry *E = getTreeEntry(VL[i])) {
// Find which lane we need to extract.
int FoundLane = -1;
for (unsigned Lane = 0, LE = VL.size(); Lane != LE; ++Lane) {
@@ -2415,11 +2428,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
}
Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
- SmallDenseMap<Value*, int>::const_iterator Entry
- = ScalarToTreeEntry.find(VL[0]);
- if (Entry != ScalarToTreeEntry.end()) {
- int Idx = Entry->second;
- const TreeEntry *En = &VectorizableTree[Idx];
+ if (const TreeEntry *En = getTreeEntry(VL[0])) {
if (En->isSame(VL) && En->VectorizedValue)
return En->VectorizedValue;
}
@@ -2427,12 +2436,9 @@ Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
}
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
- if (ScalarToTreeEntry.count(VL[0])) {
- int Idx = ScalarToTreeEntry[VL[0]];
- TreeEntry *E = &VectorizableTree[Idx];
+ if (TreeEntry *E = getTreeEntry(VL[0]))
if (E->isSame(VL))
return vectorizeTree(E);
- }
Type *ScalarTy = VL[0]->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
@@ -2667,9 +2673,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The pointer operand uses an in-tree scalar so we add the new BitCast to
// ExternalUses list to make sure that an extract will be generated in the
// future.
- if (ScalarToTreeEntry.count(LI->getPointerOperand()))
- ExternalUses.push_back(
- ExternalUser(LI->getPointerOperand(), cast<User>(VecPtr), 0));
+ Value *PO = LI->getPointerOperand();
+ if (getTreeEntry(PO))
+ ExternalUses.push_back(ExternalUser(PO, cast<User>(VecPtr), 0));
unsigned Alignment = LI->getAlignment();
LI = Builder.CreateLoad(VecPtr);
@@ -2700,9 +2706,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The pointer operand uses an in-tree scalar so we add the new BitCast to
// ExternalUses list to make sure that an extract will be generated in the
// future.
- if (ScalarToTreeEntry.count(SI->getPointerOperand()))
- ExternalUses.push_back(
- ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0));
+ Value *PO = SI->getPointerOperand();
+ if (getTreeEntry(PO))
+ ExternalUses.push_back(ExternalUser(PO, cast<User>(VecPtr), 0));
if (!Alignment) {
Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
@@ -2783,7 +2789,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The scalar argument uses an in-tree scalar so we add the new vectorized
// call to ExternalUses list to make sure that an extract will be
// generated in the future.
- if (ScalarArg && ScalarToTreeEntry.count(ScalarArg))
+ if (ScalarArg && getTreeEntry(ScalarArg))
ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
E->VectorizedValue = V;
@@ -2819,7 +2825,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
unsigned e = E->Scalars.size();
SmallVector<Constant *, 8> Mask(e);
for (unsigned i = 0; i < e; ++i) {
- if (i & 1) {
+ if (isOdd(i)) {
Mask[i] = Builder.getInt32(e + i);
OddScalars.push_back(E->Scalars[i]);
} else {
@@ -2897,10 +2903,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// has multiple uses of the same value.
if (User && !is_contained(Scalar->users(), User))
continue;
- assert(ScalarToTreeEntry.count(Scalar) && "Invalid scalar");
-
- int Idx = ScalarToTreeEntry[Scalar];
- TreeEntry *E = &VectorizableTree[Idx];
+ TreeEntry *E = getTreeEntry(Scalar);
+ assert(E && "Invalid scalar");
assert(!E->NeedToGather && "Extracting from a gather list");
Value *Vec = E->VectorizedValue;
@@ -2986,7 +2990,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
for (User *U : Scalar->users()) {
DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
- assert((ScalarToTreeEntry.count(U) ||
+ assert((getTreeEntry(U) ||
// It is legal to replace users in the ignorelist by undef.
is_contained(UserIgnoreList, U)) &&
"Replacing out-of-tree value with undef");
@@ -3449,7 +3453,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
I = I->getNextNode()) {
ScheduleData *SD = BS->getScheduleData(I);
assert(
- SD->isPartOfBundle() == (ScalarToTreeEntry.count(SD->Inst) != 0) &&
+ SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr) &&
"scheduler and vectorizer have different opinion on what is a bundle");
SD->FirstInBundle->SchedulingPriority = Idx++;
if (SD->isSchedulingEntity()) {
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index a21928317888..fb2f509dcbaa 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -26,7 +26,6 @@ using namespace llvm;
/// initializeVectorizationPasses - Initialize all passes linked into the
/// Vectorization library.
void llvm::initializeVectorization(PassRegistry &Registry) {
- initializeBBVectorizePass(Registry);
initializeLoopVectorizePass(Registry);
initializeSLPVectorizerPass(Registry);
initializeLoadStoreVectorizerPass(Registry);
@@ -36,8 +35,8 @@ void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
initializeVectorization(*unwrap(R));
}
+// DEPRECATED: Remove after the LLVM 5 release.
void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createBBVectorizePass());
}
void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
diff --git a/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll b/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
index a8013176977d..c036fe22ab87 100644
--- a/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
+++ b/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
@@ -2,7 +2,7 @@
; RUN: opt < %s -passes='require<domtree>,break-crit-edges,print<domtree>' -disable-output 2>&1| FileCheck %s
; PR932
-; CHECK: [3] %brtrue {1,2}
+; CHECK: [3] %brtrue {{{[0-9]+}},{{[0-9]+}}}
declare void @use1(i32)
diff --git a/test/Analysis/ScalarEvolution/limit-depth.ll b/test/Analysis/ScalarEvolution/limit-depth.ll
index 5a35bfefd20a..f4154130233b 100644
--- a/test/Analysis/ScalarEvolution/limit-depth.ll
+++ b/test/Analysis/ScalarEvolution/limit-depth.ll
@@ -1,4 +1,4 @@
-; RUN: opt -scalar-evolution-max-arith-depth=0 -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -scalar-evolution-max-arith-depth=0 -scalar-evolution-max-ext-depth=0 -analyze -scalar-evolution < %s | FileCheck %s
; Check that depth set to 0 prevents getAddExpr and getMulExpr from making
; transformations in SCEV. We expect the result to be very straightforward.
@@ -42,3 +42,59 @@ define void @test_mul(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
%s2 = mul i32 %s1, %p3
ret void
}
+
+define void @test_sext(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
+; CHECK-LABEL: @test_sext
+; CHECK: %se2 = sext i64 %iv2.inc to i128
+; CHECK-NEXT: --> {(1 + (sext i64 {(sext i32 (1 + %a) to i64),+,1}<nsw><%loop> to i128))<nsw>,+,1}<nsw><%loop2>
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ %a, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add nsw i32 %iv, 1
+ %cond = icmp sle i32 %iv.inc, 50
+ br i1 %cond, label %loop, label %between
+
+between:
+ %se = sext i32 %iv.inc to i64
+ br label %loop2
+
+loop2:
+ %iv2 = phi i64 [ %se, %between ], [ %iv2.inc, %loop2 ]
+ %iv2.inc = add nsw i64 %iv2, 1
+ %cond2 = icmp sle i64 %iv2.inc, 50
+ br i1 %cond2, label %loop2, label %exit
+
+exit:
+ %se2 = sext i64 %iv2.inc to i128
+ ret void
+}
+
+define void @test_zext(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
+; CHECK-LABEL: @test_zext
+; CHECK: %ze2 = zext i64 %iv2.inc to i128
+; CHECK-NEXT: --> {(1 + (zext i64 {7,+,1}<nuw><nsw><%loop> to i128))<nuw><nsw>,+,1}<nuw><%loop2>
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 6, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add nsw i32 %iv, 1
+ %cond = icmp sle i32 %iv.inc, 50
+ br i1 %cond, label %loop, label %between
+
+between:
+ %ze = zext i32 %iv.inc to i64
+ br label %loop2
+
+loop2:
+ %iv2 = phi i64 [ %ze, %between ], [ %iv2.inc, %loop2 ]
+ %iv2.inc = add nuw i64 %iv2, 1
+ %cond2 = icmp sle i64 %iv2.inc, 50
+ br i1 %cond2, label %loop2, label %exit
+
+exit:
+ %ze2 = zext i64 %iv2.inc to i128
+ ret void
+}
diff --git a/test/Bitcode/thinlto-alias.ll b/test/Bitcode/thinlto-alias.ll
index 2c235f0620ec..81fbb767ba94 100644
--- a/test/Bitcode/thinlto-alias.ll
+++ b/test/Bitcode/thinlto-alias.ll
@@ -18,7 +18,7 @@
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; CHECK: <STRTAB_BLOCK
-; CHECK-NEXT: blob data = 'mainanalias'
+; CHECK-NEXT: blob data = 'mainanalias{{.*}}'
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
diff --git a/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll b/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
index 7f9d6d95f506..ef5fb36d8e05 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
@@ -20,7 +20,7 @@
; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=2/>
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; CHECK: <STRTAB_BLOCK
-; CHECK-NEXT: blob data = 'mainfunc'
+; CHECK-NEXT: blob data = 'mainfunc{{.*}}'
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
diff --git a/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll b/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
index b64d5bd52bfc..a4d259add609 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
@@ -33,7 +33,7 @@
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; CHECK: <STRTAB_BLOCK
-; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3'
+; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3{{.*}}'
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
diff --git a/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll b/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
index 875f397646a6..b62090efe20b 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
@@ -33,7 +33,7 @@
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; CHECK: <STRTAB_BLOCK
-; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3'
+; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3{{.*}}'
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
diff --git a/test/Bitcode/thinlto-function-summary-callgraph.ll b/test/Bitcode/thinlto-function-summary-callgraph.ll
index 566f3a077e7b..749909badd95 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph.ll
@@ -21,7 +21,7 @@
; CHECK-NEXT: <PERMODULE {{.*}} op3=1
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; CHECK: <STRTAB_BLOCK
-; CHECK-NEXT: blob data = 'undefinedglobmainfunc'
+; CHECK-NEXT: blob data = 'undefinedglobmainfunc{{.*}}'
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
diff --git a/test/Bitcode/thinlto-function-summary-refgraph.ll b/test/Bitcode/thinlto-function-summary-refgraph.ll
index b52fce791791..47a44b789223 100644
--- a/test/Bitcode/thinlto-function-summary-refgraph.ll
+++ b/test/Bitcode/thinlto-function-summary-refgraph.ll
@@ -62,7 +62,7 @@
; CHECK: </GLOBALVAL_SUMMARY_BLOCK>
; CHECK: <STRTAB_BLOCK
-; CHECK-NEXT: blob data = 'barglobalvarfuncfunc2foofunc3WXYZllvm.ctpop.i8main'
+; CHECK-NEXT: blob data = 'barglobalvarfuncfunc2foofunc3WXYZllvm.ctpop.i8main{{.*}}'
; ModuleID = 'thinlto-function-summary-refgraph.ll'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/Bitcode/thinlto-function-summary.ll b/test/Bitcode/thinlto-function-summary.ll
index 6b8bfbb292cd..5922a8b3c4d0 100644
--- a/test/Bitcode/thinlto-function-summary.ll
+++ b/test/Bitcode/thinlto-function-summary.ll
@@ -24,7 +24,7 @@
; BC-NEXT: <ALIAS {{.*}} op0=5 op1=0 op2=3
; BC-NEXT: </GLOBALVAL_SUMMARY_BLOCK
; BC: <STRTAB_BLOCK
-; BC-NEXT: blob data = 'hfoobaranon.{{................................}}.0variadicf'
+; BC-NEXT: blob data = 'hfoobaranon.{{................................}}.0variadicf{{.*}}'
; RUN: opt -name-anon-globals -module-summary < %s | llvm-dis | FileCheck %s
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index 0298315a5510..48f500eb36b5 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -158,15 +158,30 @@ define fp128 @test_quad_dump() {
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %vreg0<def>(p0) = G_EXTRACT_VECTOR_ELT %vreg1, %vreg2; (in function: vector_of_pointers_extractelement)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_extractelement
; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_extractelement:
+@var = global <2 x i16*> zeroinitializer
define void @vector_of_pointers_extractelement() {
- %dummy = extractelement <2 x i16*> undef, i32 0
+ br label %end
+
+block:
+ %dummy = extractelement <2 x i16*> %vec, i32 0
ret void
+
+end:
+ %vec = load <2 x i16*>, <2 x i16*>* undef
+ br label %block
}
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %vreg0<def>(<2 x p0>) = G_INSERT_VECTOR_ELT %vreg1, %vreg2, %vreg3; (in function: vector_of_pointers_insertelement
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_insertelement
; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_insertelement:
define void @vector_of_pointers_insertelement() {
- %dummy = insertelement <2 x i16*> undef, i16* null, i32 0
+ br label %end
+
+block:
+ %dummy = insertelement <2 x i16*> %vec, i16* null, i32 0
ret void
+
+end:
+ %vec = load <2 x i16*>, <2 x i16*>* undef
+ br label %block
}
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
index 81b42d064810..50ad83feed85 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -577,7 +577,7 @@ define i32 @constant_int_start() {
}
; CHECK-LABEL: name: test_undef
-; CHECK: [[UNDEF:%[0-9]+]](s32) = IMPLICIT_DEF
+; CHECK: [[UNDEF:%[0-9]+]](s32) = G_IMPLICIT_DEF
; CHECK: %w0 = COPY [[UNDEF]]
define i32 @test_undef() {
ret i32 undef
@@ -807,7 +807,7 @@ define float @test_frem(float %arg1, float %arg2) {
; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1
; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_SADDO [[LHS]], [[RHS]]
-; CHECK: [[TMP:%[0-9]+]](s64) = IMPLICIT_DEF
+; CHECK: [[TMP:%[0-9]+]](s64) = G_IMPLICIT_DEF
; CHECK: [[TMP1:%[0-9]+]](s64) = G_INSERT [[TMP]], [[VAL]](s32), 0
; CHECK: [[RES:%[0-9]+]](s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32
; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
@@ -824,7 +824,7 @@ define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
; CHECK: [[ZERO:%[0-9]+]](s1) = G_CONSTANT i1 false
; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_UADDE [[LHS]], [[RHS]], [[ZERO]]
-; CHECK: [[TMP:%[0-9]+]](s64) = IMPLICIT_DEF
+; CHECK: [[TMP:%[0-9]+]](s64) = G_IMPLICIT_DEF
; CHECK: [[TMP1:%[0-9]+]](s64) = G_INSERT [[TMP]], [[VAL]](s32), 0
; CHECK: [[RES:%[0-9]+]](s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32
; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
@@ -840,7 +840,7 @@ define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1
; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_SSUBO [[LHS]], [[RHS]]
-; CHECK: [[TMP:%[0-9]+]](s64) = IMPLICIT_DEF
+; CHECK: [[TMP:%[0-9]+]](s64) = G_IMPLICIT_DEF
; CHECK: [[TMP1:%[0-9]+]](s64) = G_INSERT [[TMP]], [[VAL]](s32), 0
; CHECK: [[RES:%[0-9]+]](s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32
; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
@@ -857,7 +857,7 @@ define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) {
; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
; CHECK: [[ZERO:%[0-9]+]](s1) = G_CONSTANT i1 false
; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_USUBE [[LHS]], [[RHS]], [[ZERO]]
-; CHECK: [[TMP:%[0-9]+]](s64) = IMPLICIT_DEF
+; CHECK: [[TMP:%[0-9]+]](s64) = G_IMPLICIT_DEF
; CHECK: [[TMP1:%[0-9]+]](s64) = G_INSERT [[TMP]], [[VAL]](s32), 0
; CHECK: [[RES:%[0-9]+]](s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32
; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
@@ -873,7 +873,7 @@ define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) {
; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1
; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_SMULO [[LHS]], [[RHS]]
-; CHECK: [[TMP:%[0-9]+]](s64) = IMPLICIT_DEF
+; CHECK: [[TMP:%[0-9]+]](s64) = G_IMPLICIT_DEF
; CHECK: [[TMP1:%[0-9]+]](s64) = G_INSERT [[TMP]], [[VAL]](s32), 0
; CHECK: [[RES:%[0-9]+]](s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32
; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
@@ -889,7 +889,7 @@ define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1
; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_UMULO [[LHS]], [[RHS]]
-; CHECK: [[TMP:%[0-9]+]](s64) = IMPLICIT_DEF
+; CHECK: [[TMP:%[0-9]+]](s64) = G_IMPLICIT_DEF
; CHECK: [[TMP1:%[0-9]+]](s64) = G_INSERT [[TMP]], [[VAL]](s32), 0
; CHECK: [[RES:%[0-9]+]](s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32
; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
@@ -1271,6 +1271,45 @@ define float @test_fma_intrin(float %a, float %b, float %c) {
ret float %res
}
+declare float @llvm.exp.f32(float)
+define float @test_exp_intrin(float %a) {
+; CHECK-LABEL: name: test_exp_intrin
+; CHECK: [[A:%[0-9]+]](s32) = COPY %s0
+; CHECK: [[RES:%[0-9]+]](s32) = G_FEXP [[A]]
+; CHECK: %s0 = COPY [[RES]]
+ %res = call float @llvm.exp.f32(float %a)
+ ret float %res
+}
+
+declare float @llvm.exp2.f32(float)
+define float @test_exp2_intrin(float %a) {
+; CHECK-LABEL: name: test_exp2_intrin
+; CHECK: [[A:%[0-9]+]](s32) = COPY %s0
+; CHECK: [[RES:%[0-9]+]](s32) = G_FEXP2 [[A]]
+; CHECK: %s0 = COPY [[RES]]
+ %res = call float @llvm.exp2.f32(float %a)
+ ret float %res
+}
+
+declare float @llvm.log.f32(float)
+define float @test_log_intrin(float %a) {
+; CHECK-LABEL: name: test_log_intrin
+; CHECK: [[A:%[0-9]+]](s32) = COPY %s0
+; CHECK: [[RES:%[0-9]+]](s32) = G_FLOG [[A]]
+; CHECK: %s0 = COPY [[RES]]
+ %res = call float @llvm.log.f32(float %a)
+ ret float %res
+}
+
+declare float @llvm.log2.f32(float)
+define float @test_log2_intrin(float %a) {
+; CHECK-LABEL: name: test_log2_intrin
+; CHECK: [[A:%[0-9]+]](s32) = COPY %s0
+; CHECK: [[RES:%[0-9]+]](s32) = G_FLOG2 [[A]]
+; CHECK: %s0 = COPY [[RES]]
+ %res = call float @llvm.log2.f32(float %a)
+ ret float %res
+}
declare void @llvm.lifetime.start.p0i8(i64, i8*)
declare void @llvm.lifetime.end.p0i8(i64, i8*)
define void @test_lifetime_intrin() {
@@ -1464,7 +1503,7 @@ define float @test_different_call_conv_target(float %x) {
define <2 x i32> @test_shufflevector_s32_v2s32(i32 %arg) {
; CHECK-LABEL: name: test_shufflevector_s32_v2s32
; CHECK: [[ARG:%[0-9]+]](s32) = COPY %w0
-; CHECK-DAG: [[UNDEF:%[0-9]+]](s32) = IMPLICIT_DEF
+; CHECK-DAG: [[UNDEF:%[0-9]+]](s32) = G_IMPLICIT_DEF
; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0
; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32)
; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], [[MASK]](<2 x s32>)
@@ -1477,7 +1516,7 @@ define <2 x i32> @test_shufflevector_s32_v2s32(i32 %arg) {
define i32 @test_shufflevector_v2s32_s32(<2 x i32> %arg) {
; CHECK-LABEL: name: test_shufflevector_v2s32_s32
; CHECK: [[ARG:%[0-9]+]](<2 x s32>) = COPY %d0
-; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = IMPLICIT_DEF
+; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = G_IMPLICIT_DEF
; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1
; CHECK: [[RES:%[0-9]+]](s32) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], [[C1]](s32)
; CHECK: %w0 = COPY [[RES]](s32)
@@ -1489,7 +1528,7 @@ define i32 @test_shufflevector_v2s32_s32(<2 x i32> %arg) {
define <2 x i32> @test_shufflevector_v2s32_v2s32(<2 x i32> %arg) {
; CHECK-LABEL: name: test_shufflevector_v2s32_v2s32
; CHECK: [[ARG:%[0-9]+]](<2 x s32>) = COPY %d0
-; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = IMPLICIT_DEF
+; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = G_IMPLICIT_DEF
; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1
; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0
; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32)
@@ -1502,7 +1541,7 @@ define <2 x i32> @test_shufflevector_v2s32_v2s32(<2 x i32> %arg) {
define i32 @test_shufflevector_v2s32_v3s32(<2 x i32> %arg) {
; CHECK-LABEL: name: test_shufflevector_v2s32_v3s32
; CHECK: [[ARG:%[0-9]+]](<2 x s32>) = COPY %d0
-; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = IMPLICIT_DEF
+; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = G_IMPLICIT_DEF
; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1
; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0
; CHECK-DAG: [[MASK:%[0-9]+]](<3 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32), [[C1]](s32)
@@ -1531,7 +1570,7 @@ define <4 x i32> @test_shufflevector_v2s32_v4s32(<2 x i32> %arg1, <2 x i32> %arg
define <2 x i32> @test_shufflevector_v4s32_v2s32(<4 x i32> %arg) {
; CHECK-LABEL: name: test_shufflevector_v4s32_v2s32
; CHECK: [[ARG:%[0-9]+]](<4 x s32>) = COPY %q0
-; CHECK-DAG: [[UNDEF:%[0-9]+]](<4 x s32>) = IMPLICIT_DEF
+; CHECK-DAG: [[UNDEF:%[0-9]+]](<4 x s32>) = G_IMPLICIT_DEF
; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1
; CHECK-DAG: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3
; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C3]](s32)
@@ -1570,7 +1609,7 @@ define <16 x i8> @test_shufflevector_v8s8_v16s8(<8 x i8> %arg1, <8 x i8> %arg2)
}
; CHECK-LABEL: test_constant_vector
-; CHECK: [[UNDEF:%[0-9]+]](s16) = IMPLICIT_DEF
+; CHECK: [[UNDEF:%[0-9]+]](s16) = G_IMPLICIT_DEF
; CHECK: [[F:%[0-9]+]](s16) = G_FCONSTANT half 0xH3C00
; CHECK: [[M:%[0-9]+]](<4 x s16>) = G_MERGE_VALUES [[UNDEF]](s16), [[UNDEF]](s16), [[UNDEF]](s16), [[F]](s16)
; CHECK: %d0 = COPY [[M]](<4 x s16>)
diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/test/CodeGen/AArch64/GlobalISel/call-translator.ll
index 0e593fdb7b85..8fba8e09f9ff 100644
--- a/test/CodeGen/AArch64/GlobalISel/call-translator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/call-translator.ll
@@ -64,7 +64,7 @@ define void @test_multiple_args(i64 %in) {
; CHECK: [[I8:%[0-9]+]](s8) = COPY %w1
; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
-; CHECK: [[UNDEF:%[0-9]+]](s192) = IMPLICIT_DEF
+; CHECK: [[UNDEF:%[0-9]+]](s192) = G_IMPLICIT_DEF
; CHECK: [[ARG0:%[0-9]+]](s192) = G_INSERT [[UNDEF]], [[DBL]](s64), 0
; CHECK: [[ARG1:%[0-9]+]](s192) = G_INSERT [[ARG0]], [[I64]](s64), 64
; CHECK: [[ARG2:%[0-9]+]](s192) = G_INSERT [[ARG1]], [[I8]](s8), 128
diff --git a/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll b/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
index ef4445111d7b..d9fec0ec7d46 100644
--- a/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
+++ b/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
@@ -19,7 +19,7 @@ declare i32 @llvm.eh.typeid.for(i8*)
; CHECK: [[BAD]] (landing-pad):
; CHECK: EH_LABEL
-; CHECK: [[UNDEF:%[0-9]+]](s128) = IMPLICIT_DEF
+; CHECK: [[UNDEF:%[0-9]+]](s128) = G_IMPLICIT_DEF
; CHECK: [[PTR:%[0-9]+]](p0) = COPY %x0
; CHECK: [[VAL_WITH_PTR:%[0-9]+]](s128) = G_INSERT [[UNDEF]], [[PTR]](p0), 0
; CHECK: [[SEL_PTR:%[0-9]+]](p0) = COPY %x1
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir b/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir
index e3e0175d39ac..fbacc28d7434 100644
--- a/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir
@@ -57,11 +57,11 @@ body: |
%0:_(s64) = COPY %x0
; CHECK-LABEL: name: test_combines_4
- ; CHECK: %2(<2 x s32>) = G_EXTRACT %1(s128), 0
- ; CHECK: %3(<2 x s32>) = G_ADD %2, %2
+ ; CHECK: %2(s64) = COPY %0(s64)
+ ; CHECK: %3(s64) = G_ADD %2, %2
%1:_(s128) = G_MERGE_VALUES %0, %0
- %2:_(<2 x s32>) = G_EXTRACT %1, 0
- %3:_(<2 x s32>) = G_ADD %2, %2
+ %2:_(s64) = G_EXTRACT %1, 0
+ %3:_(s64) = G_ADD %2, %2
...
---
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll b/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll
index 23e7d5163e5a..42ca367e122b 100644
--- a/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll
@@ -22,12 +22,11 @@ declare void @_Unwind_Resume(i8*)
; CHECK: [[SEL:%[0-9]+]](s32) = G_PTRTOINT [[SEL_PTR]]
; CHECK: [[STRUCT_SEL:%[0-9]+]](s64) = G_INSERT {{%[0-9]+}}, [[SEL]](s32), 0
-; CHECK: [[STRUCT:%[0-9]+]](s128) = G_MERGE_VALUES [[STRUCT_PTR]](s64), [[STRUCT_SEL]]
-
-; CHECK: [[PTR:%[0-9]+]](p0) = G_EXTRACT [[STRUCT]](s128), 0
+; CHECK: [[PTR:%[0-9]+]](p0) = G_INTTOPTR [[STRUCT_PTR]](s64)
; CHECK: G_STORE [[PTR]](p0), {{%[0-9]+}}(p0)
-; CHECK: [[SEL:%[0-9]+]](s32) = G_EXTRACT [[STRUCT]](s128), 64
+; CHECK: [[SEL_TMP:%[0-9]+]](s32) = G_EXTRACT [[STRUCT_SEL]](s64), 0
+; CHECK: [[SEL:%[0-9]+]](s32) = COPY [[SEL_TMP]]
; CHECK: G_STORE [[SEL]](s32), {{%[0-9]+}}(p0)
define void @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir b/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir
new file mode 100644
index 000000000000..dc6b59b24a9a
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir
@@ -0,0 +1,85 @@
+# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s
+
+---
+name: test_extracts_1
+body: |
+ bb.0:
+ liveins: %w0
+
+ ; Low part of extraction takes entirity of the low register entirely, so
+ ; value stored is forwarded directly from first load.
+
+ ; CHECK-LABEL: name: test_extracts_1
+ ; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD
+ ; CHECK: {{%[0-9]+}}(s64) = G_LOAD
+ ; CHECK: [[VAL:%[0-9]+]](s64) = COPY [[LO]]
+ ; CHECK: G_STORE [[VAL]]
+ %0:_(s64) = COPY %x0
+ %1:_(s32) = COPY %w1
+ %2:_(p0) = COPY %x2
+ %3:_(s128) = G_LOAD %2(p0) :: (load 16)
+ %4:_(s64) = G_EXTRACT %3(s128), 0
+ G_STORE %4(s64), %2(p0) :: (store 8)
+ RET_ReallyLR
+...
+
+---
+name: test_extracts_2
+body: |
+ bb.0:
+ liveins: %w0
+
+ ; Low extraction wipes takes whole low register. High extraction is real.
+ ; CHECK-LABEL: name: test_extracts_2
+ ; CHECK: [[LO_TMP:%[0-9]+]](s64) = G_LOAD
+ ; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD
+ ; CHECK: [[LO:%[0-9]+]](s64) = COPY [[LO_TMP]]
+ ; CHECK: [[NEWHI_TMP:%[0-9]+]](s32) = G_EXTRACT [[HI]](s64), 0
+ ; CHECK: [[NEWHI:%[0-9]+]](s32) = COPY [[NEWHI_TMP]]
+ ; CHECK: G_STORE [[LO]]
+ ; CHECK: G_STORE [[NEWHI]]
+ %0:_(s64) = COPY %x0
+ %1:_(s32) = COPY %w1
+ %2:_(p0) = COPY %x2
+ %3:_(s128) = G_LOAD %2(p0) :: (load 16)
+ %4:_(s64) = G_EXTRACT %3(s128), 0
+ %5:_(s32) = G_EXTRACT %3(s128), 64
+ G_STORE %4(s64), %2(p0) :: (store 8)
+ G_STORE %5(s32), %2(p0) :: (store 4)
+ RET_ReallyLR
+...
+
+---
+name: test_extracts_3
+body: |
+ bb.0:
+ liveins: %x0, %x1, %x2
+
+
+ ; CHECK-LABEL: name: test_extracts_3
+ ; CHECK: [[LO:%[0-9]+]](s32) = G_EXTRACT %0(s64), 32
+ ; CHECK: [[HI:%[0-9]+]](s32) = G_EXTRACT %1(s64), 0
+ ; CHECK: %3(s64) = G_MERGE_VALUES [[LO]](s32), [[HI]](s32)
+ %0:_(s64) = COPY %x0
+ %1:_(s64) = COPY %x1
+ %2:_(s128) = G_MERGE_VALUES %0, %1
+ %3:_(s64) = G_EXTRACT %2, 32
+ RET_ReallyLR
+...
+
+---
+name: test_extracts_4
+body: |
+ bb.0:
+ liveins: %x0, %x1, %x2
+
+
+ ; CHECK-LABEL: name: test_extracts_4
+ ; CHECK: [[LO_TMP:%[0-9]+]](s32) = G_EXTRACT %0(s64), 32
+ ; CHECK: %3(s32) = COPY [[LO_TMP]]
+ %0:_(s64) = COPY %x0
+ %1:_(s64) = COPY %x1
+ %2:_(s128) = G_MERGE_VALUES %0, %1
+ %3:_(s32) = G_EXTRACT %2, 32
+ RET_ReallyLR
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir b/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir
new file mode 100644
index 000000000000..e7cf59b3394e
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir
@@ -0,0 +1,15 @@
+# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s
+
+---
+name: test_implicit_def
+registers:
+body: |
+ bb.0.entry:
+ liveins:
+ ; CHECK-LABEL: name: test_implicit_def
+ ; CHECK: [[LO:%[0-9]+]](s64) = G_IMPLICIT_DEF
+ ; CHECK: [[HI:%[0-9]+]](s64) = G_IMPLICIT_DEF
+ ; CHECK: %0(s128) = G_MERGE_VALUES [[LO]](s64), [[HI]](s64)
+
+ %0:_(s128) = G_IMPLICIT_DEF
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir
index 5559e2d3a0d1..f43a9ab34ffd 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir
@@ -15,8 +15,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
-# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32sp, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll
index 2682fa7dcce1..a910585e7f5d 100644
--- a/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -108,9 +108,9 @@ if.end: ; preds = %if.then, %lor.lhs.f
; CHECK: cmp w0, #1
; CHECK: sdiv [[DIVRES:w[0-9]+]], w1, w0
; CHECK: ccmp [[DIVRES]], #16, #0, ge
-; CHECK: b.gt [[BLOCK:LBB[0-9_]+]]
-; CHECK: bl _foo
+; CHECK: b.le [[BLOCK:LBB[0-9_]+]]
; CHECK: [[BLOCK]]:
+; CHECK: bl _foo
; CHECK: orr w0, wzr, #0x7
define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
entry:
@@ -135,7 +135,7 @@ if.end:
; CHECK: cmp
; CHECK-NOT: b.
; CHECK: fccmp {{.*}}, #8, ge
-; CHECK: b.lt
+; CHECK: b.ge
define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
entry:
%cmp = icmp sgt i32 %a, 0
diff --git a/test/CodeGen/AArch64/arm64-spill-remarks.ll b/test/CodeGen/AArch64/arm64-spill-remarks.ll
index bc9340352d75..cfebeb496e18 100644
--- a/test/CodeGen/AArch64/arm64-spill-remarks.ll
+++ b/test/CodeGen/AArch64/arm64-spill-remarks.ll
@@ -3,6 +3,15 @@
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -aarch64-neon-syntax=apple 2>&1 | FileCheck -check-prefix=NO_REMARK %s
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -aarch64-neon-syntax=apple -pass-remarks-output=%t.yaml -pass-remarks-with-hotness 2>&1 | FileCheck -check-prefix=NO_REMARK %s
; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+;
+; Verify that remarks below the hotness threshold are not output.
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -aarch64-neon-syntax=apple -pass-remarks-missed=regalloc \
+; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold=500 \
+; RUN: 2>&1 | FileCheck -check-prefix=THRESHOLD %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -aarch64-neon-syntax=apple -pass-remarks-output=%t.threshold.yaml \
+; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold=500 \
+; RUN: 2>&1 | FileCheck -check-prefix=NO_REMARK %s
+; RUN: cat %t.threshold.yaml | FileCheck -check-prefix=THRESHOLD_YAML %s
; This has two nested loops, each with one value that has to be spilled and
; then reloaded.
@@ -23,6 +32,9 @@
; NO_REMARK-NOT: remark
+; THRESHOLD-NOT: (hotness: 300)
+; THRESHOLD: remark: /tmp/kk.c:2:20: 1 spills 1 reloads generated in loop (hotness: 30000)
+
; YAML: --- !Missed
; YAML: Pass: regalloc
; YAML: Name: LoopSpillReload
@@ -63,6 +75,21 @@
; YAML: - String: generated in loop
; YAML: ...
+; THRESHOLD_YAML-NOT: Hotness: 300{{$}}
+; THRESHOLD_YAML: --- !Missed
+; THRESHOLD_YAML: Pass: regalloc
+; THRESHOLD_YAML: Name: LoopSpillReload
+; THRESHOLD_YAML: DebugLoc: { File: /tmp/kk.c, Line: 2, Column: 20 }
+; THRESHOLD_YAML: Function: fpr128
+; THRESHOLD_YAML: Hotness: 30000
+; THRESHOLD_YAML: Args:
+; THRESHOLD_YAML: - NumSpills: '1'
+; THRESHOLD_YAML: - String: ' spills '
+; THRESHOLD_YAML: - NumReloads: '1'
+; THRESHOLD_YAML: - String: ' reloads '
+; THRESHOLD_YAML: - String: generated in loop
+; THRESHOLD_YAML: ...
+
define void @fpr128(<4 x float>* %p) nounwind ssp !prof !11 {
entry:
br label %loop, !dbg !8
diff --git a/test/CodeGen/AArch64/ccmp-successor-probs.mir b/test/CodeGen/AArch64/ccmp-successor-probs.mir
new file mode 100644
index 000000000000..8e81c419841b
--- /dev/null
+++ b/test/CodeGen/AArch64/ccmp-successor-probs.mir
@@ -0,0 +1,46 @@
+# RUN: llc -o - %s -mtriple=aarch64--linux-gnu -mcpu=falkor -run-pass=aarch64-ccmp | FileCheck %s
+---
+# This test checks that successor probabilties are properly updated after a
+# ccmp-conversion.
+#
+# CHECK-LABEL: name: aarch64-ccmp-successor-probs
+# CHECK: bb.0:
+# CHECK-NEXT: successors: %bb.2(0x04000000), %bb.3(0x7c000000)
+# CHECK: CCMPXr %5, %4, 0, 10, implicit-def %nzcv, implicit %nzcv
+#
+name: aarch64-ccmp-successor-probs
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: gpr64 }
+ - { id: 3, class: gpr64 }
+ - { id: 4, class: gpr64 }
+ - { id: 5, class: gpr64 }
+ - { id: 6, class: gpr64 }
+ - { id: 7, class: gpr64 }
+body : |
+ bb.0:
+ successors: %bb.1(0x7e000000), %bb.2(0x02000000)
+
+ %0 = LDRXui killed %x0, 69
+ %1 = COPY %xzr
+ %2 = SUBSXrr %1, %0, implicit-def dead %nzcv
+ %3 = SUBSXri %x1, 1, 0, implicit-def dead %nzcv
+ %4 = COPY %0
+ %5 = COPY %3
+ %6 = SUBSXrr %x1, killed %2, implicit-def %nzcv
+ Bcc 11, %bb.2, implicit %nzcv
+ B %bb.1
+
+ bb.1:
+ successors: %bb.2(0x02082082), %bb.3(0x7df7df7e)
+
+ %7 = SUBSXrr %5, %4, implicit-def %nzcv
+ Bcc 12, %bb.2, implicit %nzcv
+ B %bb.3
+
+ bb.2:
+ successors: %bb.3(0x80000000)
+
+ bb.3:
+...
diff --git a/test/CodeGen/AArch64/cond-br-tuning.ll b/test/CodeGen/AArch64/cond-br-tuning.ll
index 628d89e34a01..d966acbebfdd 100644
--- a/test/CodeGen/AArch64/cond-br-tuning.ll
+++ b/test/CodeGen/AArch64/cond-br-tuning.ll
@@ -83,7 +83,7 @@ L2:
; CHECK-LABEL: test_add_tbz:
; CHECK: adds
-; CHECK: b.ge
+; CHECK: b.pl
; CHECK: ret
define void @test_add_tbz(i32 %a, i32 %b, i32* %ptr) {
entry:
@@ -99,7 +99,7 @@ L2:
; CHECK-LABEL: test_subs_tbz:
; CHECK: subs
-; CHECK: b.ge
+; CHECK: b.pl
; CHECK: ret
define void @test_subs_tbz(i32 %a, i32 %b, i32* %ptr) {
entry:
@@ -115,7 +115,7 @@ L2:
; CHECK-LABEL: test_add_tbnz
; CHECK: adds
-; CHECK: b.lt
+; CHECK: b.mi
; CHECK: ret
define void @test_add_tbnz(i32 %a, i32 %b, i32* %ptr) {
entry:
@@ -131,7 +131,7 @@ L2:
; CHECK-LABEL: test_subs_tbnz
; CHECK: subs
-; CHECK: b.lt
+; CHECK: b.mi
; CHECK: ret
define void @test_subs_tbnz(i32 %a, i32 %b, i32* %ptr) {
entry:
diff --git a/test/CodeGen/AMDGPU/alignbit-pat.ll b/test/CodeGen/AMDGPU/alignbit-pat.ll
new file mode 100644
index 000000000000..ff5c8960fad3
--- /dev/null
+++ b/test/CodeGen/AMDGPU/alignbit-pat.ll
@@ -0,0 +1,100 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}alignbit_shr_pat:
+; GCN-DAG: s_load_dword s[[SHR:[0-9]+]]
+; GCN-DAG: load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], s[[SHR]]
+
+define amdgpu_kernel void @alignbit_shr_pat(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+bb:
+ %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp3 = and i32 %arg2, 31
+ %tmp4 = zext i32 %tmp3 to i64
+ %tmp5 = lshr i64 %tmp, %tmp4
+ %tmp6 = trunc i64 %tmp5 to i32
+ store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}alignbit_shr_pat_v:
+; GCN-DAG: load_dword v[[SHR:[0-9]+]],
+; GCN-DAG: load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], v[[SHR]]
+
+define amdgpu_kernel void @alignbit_shr_pat_v(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) {
+bb:
+ %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %gep1 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %tid
+ %tmp = load i64, i64 addrspace(1)* %gep1, align 8
+ %gep2 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i32 %tid
+ %amt = load i32, i32 addrspace(1)* %gep2, align 4
+ %tmp3 = and i32 %amt, 31
+ %tmp4 = zext i32 %tmp3 to i64
+ %tmp5 = lshr i64 %tmp, %tmp4
+ %tmp6 = trunc i64 %tmp5 to i32
+ store i32 %tmp6, i32 addrspace(1)* %gep2, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}alignbit_shr_pat_wrong_and30:
+; Negative test, wrong constant
+; GCN: v_lshr_b64
+; GCN-NOT: v_alignbit_b32
+
+define amdgpu_kernel void @alignbit_shr_pat_wrong_and30(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+bb:
+ %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp3 = and i32 %arg2, 30
+ %tmp4 = zext i32 %tmp3 to i64
+ %tmp5 = lshr i64 %tmp, %tmp4
+ %tmp6 = trunc i64 %tmp5 to i32
+ store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}alignbit_shr_pat_wrong_and63:
+; Negative test, wrong constant
+; GCN: v_lshr_b64
+; GCN-NOT: v_alignbit_b32
+
+define amdgpu_kernel void @alignbit_shr_pat_wrong_and63(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+bb:
+ %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp3 = and i32 %arg2, 63
+ %tmp4 = zext i32 %tmp3 to i64
+ %tmp5 = lshr i64 %tmp, %tmp4
+ %tmp6 = trunc i64 %tmp5 to i32
+ store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}alignbit_shr_pat_const30:
+; GCN: load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], 30
+
+define amdgpu_kernel void @alignbit_shr_pat_const30(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) {
+bb:
+ %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp5 = lshr i64 %tmp, 30
+ %tmp6 = trunc i64 %tmp5 to i32
+ store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}alignbit_shr_pat_wrong_const33:
+; Negative test, shift amount more than 31
+; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; GCN-NOT: v_alignbit_b32
+
+define amdgpu_kernel void @alignbit_shr_pat_wrong_const33(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) {
+bb:
+ %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp5 = lshr i64 %tmp, 33
+ %tmp6 = trunc i64 %tmp5 to i32
+ store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone speculatable }
diff --git a/test/CodeGen/AMDGPU/bug-vopc-commute.ll b/test/CodeGen/AMDGPU/bug-vopc-commute.ll
index 7c02d8385462..e951b5e08927 100644
--- a/test/CodeGen/AMDGPU/bug-vopc-commute.ll
+++ b/test/CodeGen/AMDGPU/bug-vopc-commute.ll
@@ -8,8 +8,8 @@
; of which were in SGPRs.
define amdgpu_vs float @main(i32 %v) {
main_body:
- %d1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 960)
- %d2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 976)
+ %d1 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 960)
+ %d2 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 976)
br i1 undef, label %ENDIF56, label %IF57
IF57: ; preds = %ENDIF
@@ -41,7 +41,7 @@ ENDIF62: ; preds = %ENDIF59
}
; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #0
+declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #0
attributes #0 = { nounwind readnone }
attributes #1 = { readnone }
diff --git a/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll b/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
index 53adf09026ec..04ad3bcccd3f 100644
--- a/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
+++ b/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
@@ -176,14 +176,13 @@ ret:
; OPT: ret
; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:
-; GCN: s_cbranch_scc1 BB3_2
-; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30
-; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0xff
+; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
+; GCN: s_cbranch_scc1 BB3_2
+; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]
; GCN: BB3_2:
-; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30
-; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0x7f
+; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]
; GCN: BB3_3:
; GCN: buffer_store_dwordx2
diff --git a/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll b/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll
index a68ddabd9560..37fd08242fba 100644
--- a/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll
+++ b/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll
@@ -16,7 +16,9 @@
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
-; CHECK: Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
+; CHECK: Printf:
+; CHECK: - '1:1:4:%d\n'
+; CHECK: - '2:1:8:%g\n'
; CHECK: Kernels:
; CHECK: - Name: test_char
@@ -1253,8 +1255,8 @@ define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a,
; NOTES-NEXT: Owner Data size Description
; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001)
; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003)
-; GFX700: AMD 0x00008b06 Unknown note type: (0x0000000a)
-; GFX800: AMD 0x00008e6a Unknown note type: (0x0000000a)
-; GFX900: AMD 0x00008b06 Unknown note type: (0x0000000a)
+; GFX700: AMD 0x00008b0a Unknown note type: (0x0000000a)
+; GFX800: AMD 0x00008e6e Unknown note type: (0x0000000a)
+; GFX900: AMD 0x00008b0a Unknown note type: (0x0000000a)
; PARSER: AMDGPU Code Object Metadata Parser Test: PASS
diff --git a/test/CodeGen/AMDGPU/combine-and-sext-bool.ll b/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
new file mode 100644
index 000000000000..cd4ac4d58ad3
--- /dev/null
+++ b/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}and_i1_sext_bool:
+; GCN: v_cmp_{{gt|le}}_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_cndmask_b32_e{{32|64}} [[VAL:v[0-9]+]], 0, v{{[0-9]+}}, [[CC]]
+; GCN: store_dword {{.*}}[[VAL]]
+; GCN-NOT: v_cndmask_b32_e64 v{{[0-9]+}}, {{0|-1}}, {{0|-1}}
+; GCN-NOT: v_and_b32_e32
+
+define amdgpu_kernel void @and_i1_sext_bool(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
+ %v = load i32, i32 addrspace(1)* %gep, align 4
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %and = and i32 %v, %ext
+ store i32 %and, i32 addrspace(1)* %gep, align 4
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+
+attributes #0 = { nounwind readnone speculatable }
diff --git a/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
index 187fb24dfb66..9e47c7d3449c 100644
--- a/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
+++ b/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
@@ -150,6 +150,26 @@ bb:
ret void
}
+; GCN-LABEL: {{^}}add_and:
+; GCN: s_and_b64 [[CC:[^,]+]],
+; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
+; GCN-NOT: v_cndmask
+
+define amdgpu_kernel void @add_and(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
+ %v = load i32, i32 addrspace(1)* %gep, align 4
+ %cmp1 = icmp ugt i32 %x, %y
+ %cmp2 = icmp ugt i32 %x, 1
+ %cmp = and i1 %cmp1, %cmp2
+ %ext = zext i1 %cmp to i32
+ %add = add i32 %v, %ext
+ store i32 %add, i32 addrspace(1)* %gep, align 4
+ ret void
+}
+
declare i1 @llvm.amdgcn.class.f32(float, i32) #0
declare i32 @llvm.amdgcn.workitem.id.x() #0
diff --git a/test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll b/test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll
new file mode 100644
index 000000000000..3637722d004d
--- /dev/null
+++ b/test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}fold_mul_neg:
+; GCN: load_dword [[V:v[0-9]+]]
+; GCN: v_or_b32_e32 [[NEG:v[0-9]]], 0x80000000, [[V]]
+; GCN: store_dword [[NEG]]
+
+define amdgpu_kernel void @fold_mul_neg(float addrspace(1)* %arg) {
+ %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %tid
+ %v = load float, float addrspace(1)* %gep, align 4
+ %cmp = fcmp fast ogt float %v, 0.000000e+00
+ %sel = select i1 %cmp, float -1.000000e+00, float 1.000000e+00
+ %mul = fmul fast float %v, %sel
+ store float %mul, float addrspace(1)* %gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}fold_mul_abs:
+; GCN: load_dword [[V:v[0-9]+]]
+; GCN: v_and_b32_e32 [[ABS:v[0-9]]], 0x7fffffff, [[V]]
+; GCN: store_dword [[ABS]]
+
+define amdgpu_kernel void @fold_mul_abs(float addrspace(1)* %arg) {
+ %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %tid
+ %v = load float, float addrspace(1)* %gep, align 4
+ %cmp = fcmp fast olt float %v, 0.000000e+00
+ %sel = select i1 %cmp, float -1.000000e+00, float 1.000000e+00
+ %mul = fmul fast float %v, %sel
+ store float %mul, float addrspace(1)* %gep, align 4
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone speculatable }
diff --git a/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll b/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
index 51f564d96909..564d2b32964f 100644
--- a/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
+++ b/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
@@ -14,24 +14,24 @@
; CHECK: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding
; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc
-define amdgpu_vs void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
+define amdgpu_vs void @main([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <4 x i32>] addrspace(2)* byval %arg3, [17 x <4 x i32>] addrspace(2)* inreg %arg4, [17 x <4 x i32>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
main_body:
- %tmp = getelementptr [2 x <16 x i8>], [2 x <16 x i8>] addrspace(2)* %arg3, i64 0, i32 1
- %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr [2 x <4 x i32>], [2 x <4 x i32>] addrspace(2)* %arg3, i64 0, i32 1
+ %tmp10 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
%tmp11 = shl i32 %arg6, 2
- %tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
+ %tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
%tmp13 = bitcast i32 %tmp12 to float
- %tmp14 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 %tmp11, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
+ %tmp14 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp10, i32 %tmp11, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
%tmp15 = bitcast i32 %tmp14 to float
- %tmp16 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 %tmp11, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
+ %tmp16 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp10, i32 %tmp11, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
%tmp17 = bitcast i32 %tmp16 to float
- %tmp18 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
+ %tmp18 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<4 x i32> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
%tmp19 = bitcast i32 %tmp18 to float
- %tmp20 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 123, i32 1, i32 1, i32 1, i32 1, i32 0)
+ %tmp20 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<4 x i32> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 123, i32 1, i32 1, i32 1, i32 1, i32 0)
%tmp21 = bitcast i32 %tmp20 to float
- %tmp22 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 1234, i32 65535, i32 1, i32 1, i32 1, i32 1, i32 0)
+ %tmp22 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<4 x i32> %tmp10, <2 x i32> zeroinitializer, i32 1234, i32 65535, i32 1, i32 1, i32 1, i32 1, i32 0)
%tmp23 = bitcast i32 %tmp22 to float
call void @llvm.amdgcn.exp.f32(i32 15, i32 12, float %tmp13, float %tmp15, float %tmp17, float %tmp19, i1 false, i1 false)
@@ -40,10 +40,10 @@ main_body:
}
; Function Attrs: nounwind readonly
-declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
; Function Attrs: nounwind readonly
-declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #0
+declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<4 x i32>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #0
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
diff --git a/test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll b/test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll
index cd9c082ed941..01b76422c03f 100644
--- a/test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll
+++ b/test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll
@@ -5,7 +5,7 @@
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:32 glc slc
define amdgpu_vs void @test1(i32 %a1, i32 %vaddr) {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
- call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+ call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata,
i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
i32 1, i32 0)
ret void
@@ -15,7 +15,7 @@ define amdgpu_vs void @test1(i32 %a1, i32 %vaddr) {
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offset:32 glc slc
define amdgpu_vs void @test1_idx(i32 %a1, i32 %vaddr) {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
- call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+ call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata,
i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 0, i32 1, i32 1,
i32 1, i32 0)
ret void
@@ -25,7 +25,7 @@ define amdgpu_vs void @test1_idx(i32 %a1, i32 %vaddr) {
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, {{s[0-9]+}} idxen offset:32 glc slc
define amdgpu_vs void @test1_scalar_offset(i32 %a1, i32 %vaddr, i32 inreg %soffset) {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
- call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+ call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata,
i32 4, i32 %vaddr, i32 %soffset, i32 32, i32 14, i32 4, i32 0, i32 1, i32 1,
i32 1, i32 0)
ret void
@@ -35,7 +35,7 @@ define amdgpu_vs void @test1_scalar_offset(i32 %a1, i32 %vaddr, i32 inreg %soffs
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:32
define amdgpu_vs void @test1_no_glc_slc(i32 %a1, i32 %vaddr) {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
- call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+ call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata,
i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 0,
i32 0, i32 0)
ret void
@@ -45,7 +45,7 @@ define amdgpu_vs void @test1_no_glc_slc(i32 %a1, i32 %vaddr) {
;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 offen offset:24 glc slc
define amdgpu_vs void @test2(i32 %a1, i32 %vaddr) {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
- call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+ call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata,
i32 3, i32 %vaddr, i32 0, i32 24, i32 13, i32 4, i32 1, i32 0, i32 1,
i32 1, i32 0)
ret void
@@ -55,7 +55,7 @@ define amdgpu_vs void @test2(i32 %a1, i32 %vaddr) {
;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:11, nfmt:4, 0 offen offset:16 glc slc
define amdgpu_vs void @test3(i32 %a1, i32 %vaddr) {
%vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
- call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
+ call void @llvm.SI.tbuffer.store.v2i32(<4 x i32> undef, <2 x i32> %vdata,
i32 2, i32 %vaddr, i32 0, i32 16, i32 11, i32 4, i32 1, i32 0, i32 1,
i32 1, i32 0)
ret void
@@ -64,12 +64,12 @@ define amdgpu_vs void @test3(i32 %a1, i32 %vaddr) {
;CHECK-LABEL: {{^}}test4:
;CHECK: tbuffer_store_format_x {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:4, nfmt:4, 0 offen offset:8 glc slc
define amdgpu_vs void @test4(i32 %vdata, i32 %vaddr) {
- call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
+ call void @llvm.SI.tbuffer.store.i32(<4 x i32> undef, i32 %vdata,
i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
i32 1, i32 0)
ret void
}
-declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.SI.tbuffer.store.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.SI.tbuffer.store.v2i32(<4 x i32>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.SI.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/AMDGPU/misched-killflags.mir b/test/CodeGen/AMDGPU/misched-killflags.mir
new file mode 100644
index 000000000000..ac3a25e5e4b3
--- /dev/null
+++ b/test/CodeGen/AMDGPU/misched-killflags.mir
@@ -0,0 +1,45 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass=post-RA-sched -o - %s | FileCheck %s
+# Make sure ScheduleDAGInstrs::fixupKills does not produce invalid kill flags.
+---
+name: func0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3
+
+ %sgpr33 = S_MOV_B32 %sgpr7
+ %sgpr32 = S_MOV_B32 %sgpr33
+ %sgpr10 = S_MOV_B32 5
+ %sgpr9 = S_MOV_B32 4
+ %sgpr8 = S_MOV_B32 3
+ BUNDLE implicit-def %sgpr6_sgpr7, implicit-def %sgpr6, implicit-def %sgpr7, implicit-def %scc {
+ %sgpr6_sgpr7 = S_GETPC_B64
+ %sgpr6 = S_ADD_U32 internal %sgpr6, 0, implicit-def %scc
+ %sgpr7 = S_ADDC_U32 internal %sgpr7,0, implicit-def %scc, implicit internal %scc
+ }
+ %sgpr4 = S_MOV_B32 %sgpr33
+ %vgpr0 = V_MOV_B32_e32 %sgpr8, implicit %exec, implicit-def %vgpr0_vgpr1_vgpr2_vgpr3, implicit %sgpr8_sgpr9_sgpr10_sgpr11
+ %vgpr1 = V_MOV_B32_e32 %sgpr9, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11
+ %vgpr2 = V_MOV_B32_e32 %sgpr10, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11
+ %vgpr3 = V_MOV_B32_e32 %sgpr11, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %exec
+ S_NOP 0, implicit killed %sgpr6_sgpr7, implicit %sgpr0_sgpr1_sgpr2_sgpr3, implicit %sgpr4, implicit killed %vgpr0_vgpr1_vgpr2_vgpr3
+ S_ENDPGM
+...
+# CHECK-LABEL: name: func0
+# CHECK: %sgpr10 = S_MOV_B32 5
+# CHECK: %sgpr9 = S_MOV_B32 4
+# CHECK: %sgpr8 = S_MOV_B32 3
+# CHECK: %sgpr33 = S_MOV_B32 killed %sgpr7
+# CHECK: %vgpr0 = V_MOV_B32_e32 %sgpr8, implicit %exec, implicit-def %vgpr0_vgpr1_vgpr2_vgpr3, implicit %sgpr8_sgpr9_sgpr10_sgpr11
+# CHECK: BUNDLE implicit-def %sgpr6_sgpr7, implicit-def %sgpr6, implicit-def %sgpr7, implicit-def %scc {
+# CHECK: %sgpr6_sgpr7 = S_GETPC_B64
+# CHECK: %sgpr6 = S_ADD_U32 internal %sgpr6, 0, implicit-def %scc
+# CHECK: %sgpr7 = S_ADDC_U32 internal %sgpr7, 0, implicit-def %scc, implicit internal %scc
+# CHECK: }
+# CHECK: %sgpr4 = S_MOV_B32 %sgpr33
+# CHECK: %vgpr1 = V_MOV_B32_e32 %sgpr9, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11
+# CHECK: %vgpr2 = V_MOV_B32_e32 %sgpr10, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11
+# CHECK: %vgpr3 = V_MOV_B32_e32 killed %sgpr11, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %exec
+# CHECK: %sgpr32 = S_MOV_B32 killed %sgpr33
+# CHECK: S_NOP 0, implicit killed %sgpr6_sgpr7, implicit %sgpr0_sgpr1_sgpr2_sgpr3, implicit %sgpr4, implicit killed %vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_ENDPGM
diff --git a/test/CodeGen/AMDGPU/mubuf.ll b/test/CodeGen/AMDGPU/mubuf.ll
index d883b87ec401..b23b21118aaa 100644
--- a/test/CodeGen/AMDGPU/mubuf.ll
+++ b/test/CodeGen/AMDGPU/mubuf.ll
@@ -55,14 +55,14 @@ entry:
; CHECK-LABEL: {{^}}soffset_max_imm:
; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc
-define amdgpu_gs void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
+define amdgpu_gs void @soffset_max_imm([6 x <4 x i32>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
main_body:
- %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
- %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
+ %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(2)* %0, i32 0, i32 0
+ %tmp1 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp0
%tmp2 = shl i32 %6, 2
- %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
+ %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
%tmp4 = add i32 %6, 16
- %tmp1.4xi32 = bitcast <16 x i8> %tmp1 to <4 x i32>
+ %tmp1.4xi32 = bitcast <4 x i32> %tmp1 to <4 x i32>
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 0, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i1 1, i1 1)
ret void
}
@@ -74,14 +74,14 @@ main_body:
; CHECK-LABEL: {{^}}soffset_no_fold:
; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41
; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
-define amdgpu_gs void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
+define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
main_body:
- %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
- %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
+ %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(2)* %0, i32 0, i32 0
+ %tmp1 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp0
%tmp2 = shl i32 %6, 2
- %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
+ %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
%tmp4 = add i32 %6, 16
- %tmp1.4xi32 = bitcast <16 x i8> %tmp1 to <4 x i32>
+ %tmp1.4xi32 = bitcast <4 x i32> %tmp1 to <4 x i32>
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 0, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i1 1, i1 1)
ret void
}
@@ -176,7 +176,7 @@ define amdgpu_kernel void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
ret void
}
-declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
attributes #0 = { nounwind readonly }
diff --git a/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir b/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir
deleted file mode 100644
index 31024277871d..000000000000
--- a/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir
+++ /dev/null
@@ -1,69 +0,0 @@
-# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=simple-register-coalescing,rename-independent-subregs -o - %s | FileCheck -check-prefix=GCN %s
----
-
-# GCN-LABEL: name: mac_invalid_operands
-# GCN: undef %18.sub0 = V_MAC_F32_e32 undef %3, undef %9, undef %18.sub0, implicit %exec
-
-name: mac_invalid_operands
-alignment: 0
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-tracksRegLiveness: true
-registers:
- - { id: 0, class: vreg_128 }
- - { id: 1, class: vreg_128 }
- - { id: 2, class: sgpr_64 }
- - { id: 3, class: vgpr_32 }
- - { id: 4, class: vgpr_32 }
- - { id: 5, class: vgpr_32 }
- - { id: 6, class: vgpr_32 }
- - { id: 7, class: sreg_64 }
- - { id: 8, class: vgpr_32 }
- - { id: 9, class: vgpr_32 }
- - { id: 10, class: vreg_64 }
- - { id: 11, class: vreg_64 }
- - { id: 12, class: vreg_128 }
- - { id: 13, class: vreg_128 }
- - { id: 14, class: vgpr_32 }
- - { id: 15, class: vreg_64 }
- - { id: 16, class: vgpr_32 }
- - { id: 17, class: vreg_128 }
-body: |
- bb.0:
- successors: %bb.2, %bb.1
-
- %7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, 0, implicit %exec
- %vcc = COPY killed %7
- S_CBRANCH_VCCZ %bb.2, implicit killed %vcc
-
- bb.1:
- successors: %bb.3
-
- %4 = V_ADD_F32_e32 undef %6, undef %5, implicit %exec
- undef %12.sub0 = COPY killed %4
- %17 = COPY killed %12
- S_BRANCH %bb.3
-
- bb.2:
- successors: %bb.3
-
- %8 = V_MAC_F32_e32 undef %3, undef %9, undef %8, implicit %exec
- undef %13.sub0 = COPY %8
- %13.sub1 = COPY %8
- %13.sub2 = COPY killed %8
- %0 = COPY killed %13
- %17 = COPY killed %0
-
- bb.3:
- %1 = COPY killed %17
- FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, implicit %exec, implicit %flat_scr
- %14 = COPY %1.sub1
- %16 = COPY killed %1.sub0
- undef %15.sub0 = COPY killed %16
- %15.sub1 = COPY killed %14
- FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, implicit %exec, implicit %flat_scr
- S_ENDPGM
-
-...
diff --git a/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir b/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
new file mode 100644
index 000000000000..770bfaddb23e
--- /dev/null
+++ b/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
@@ -0,0 +1,155 @@
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=simple-register-coalescing,rename-independent-subregs -o - %s | FileCheck -check-prefix=GCN %s
+---
+
+# GCN-LABEL: name: mac_invalid_operands
+# GCN: undef %18.sub0 = V_MAC_F32_e32 undef %3, undef %9, undef %18.sub0, implicit %exec
+
+name: mac_invalid_operands
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_128 }
+ - { id: 1, class: vreg_128 }
+ - { id: 2, class: sgpr_64 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: vgpr_32 }
+ - { id: 6, class: vgpr_32 }
+ - { id: 7, class: sreg_64 }
+ - { id: 8, class: vgpr_32 }
+ - { id: 9, class: vgpr_32 }
+ - { id: 10, class: vreg_64 }
+ - { id: 11, class: vreg_64 }
+ - { id: 12, class: vreg_128 }
+ - { id: 13, class: vreg_128 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vreg_64 }
+ - { id: 16, class: vgpr_32 }
+ - { id: 17, class: vreg_128 }
+body: |
+ bb.0:
+ successors: %bb.2, %bb.1
+
+ %7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, 0, implicit %exec
+ %vcc = COPY killed %7
+ S_CBRANCH_VCCZ %bb.2, implicit killed %vcc
+
+ bb.1:
+ successors: %bb.3
+
+ %4 = V_ADD_F32_e32 undef %6, undef %5, implicit %exec
+ undef %12.sub0 = COPY killed %4
+ %17 = COPY killed %12
+ S_BRANCH %bb.3
+
+ bb.2:
+ successors: %bb.3
+
+ %8 = V_MAC_F32_e32 undef %3, undef %9, undef %8, implicit %exec
+ undef %13.sub0 = COPY %8
+ %13.sub1 = COPY %8
+ %13.sub2 = COPY killed %8
+ %0 = COPY killed %13
+ %17 = COPY killed %0
+
+ bb.3:
+ %1 = COPY killed %17
+ FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, implicit %exec, implicit %flat_scr
+ %14 = COPY %1.sub1
+ %16 = COPY killed %1.sub0
+ undef %15.sub0 = COPY killed %16
+ %15.sub1 = COPY killed %14
+ FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, implicit %exec, implicit %flat_scr
+ S_ENDPGM
+
+...
+---
+# Make sure others uses after the mac are properly handled and not
+# left unreplaced due to iterator issues from substituteRegister.
+
+# GCN-LABEL: name: vreg_does_not_dominate
+
+# GCN: undef %8.sub1 = V_MAC_F32_e32 undef %2, undef %1, undef %8.sub1, implicit %exec
+# GCN: undef %7.sub0 = V_MOV_B32_e32 0, implicit %exec
+# GCN: undef %9.sub2 = COPY %7.sub0
+
+# GCN: undef %6.sub3 = V_ADD_F32_e32 undef %3, undef %3, implicit %exec
+# GCN: undef %7.sub0 = V_ADD_F32_e64 0, 0, 0, 0, 0, 0, implicit %exec
+# GCN: %8.sub1 = V_ADD_F32_e32 %8.sub1, %8.sub1, implicit %exec
+
+# GCN: BUFFER_STORE_DWORD_OFFEN %6.sub3, %0,
+# GCN: BUFFER_STORE_DWORD_OFFEN %9.sub2, %0,
+# GCN: BUFFER_STORE_DWORD_OFFEN %8.sub1, %0,
+# GCN: BUFFER_STORE_DWORD_OFFEN %7.sub0, %0,
+name: vreg_does_not_dominate
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '' }
+ - { id: 1, class: vgpr_32, preferred-register: '' }
+ - { id: 2, class: vgpr_32, preferred-register: '' }
+ - { id: 3, class: vgpr_32, preferred-register: '' }
+ - { id: 4, class: vgpr_32, preferred-register: '' }
+ - { id: 5, class: sreg_64, preferred-register: '' }
+ - { id: 6, class: vreg_128, preferred-register: '' }
+liveins:
+ - { reg: '%vgpr0', virtual-reg: '%0' }
+ - { reg: '%sgpr30_sgpr31', virtual-reg: '%5' }
+body: |
+ bb.0:
+ successors: %bb.2, %bb.1
+ liveins: %vgpr0, %sgpr30_sgpr31, %sgpr5
+
+ %5 = COPY %sgpr30_sgpr31
+ %0 = COPY %vgpr0
+ undef %6.sub1 = V_MAC_F32_e32 undef %2, undef %1, undef %6.sub1, implicit %exec
+ %6.sub0 = V_MOV_B32_e32 0, implicit %exec
+ %6.sub2 = COPY %6.sub0
+ S_CBRANCH_VCCNZ %bb.2, implicit undef %vcc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2
+
+ %6.sub3 = V_ADD_F32_e32 undef %3, undef %3, implicit %exec
+ %6.sub0 = V_ADD_F32_e64 0, 0, 0, 0, 0, 0, implicit %exec
+ %6.sub1 = V_ADD_F32_e32 %6.sub1, %6.sub1, implicit %exec
+ %6.sub2 = COPY %6.sub0
+
+ bb.2:
+ BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 12, 0, 0, 0, implicit %exec
+ BUFFER_STORE_DWORD_OFFEN %6.sub2, %0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 8, 0, 0, 0, implicit %exec
+ BUFFER_STORE_DWORD_OFFEN %6.sub1, %0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 4, 0, 0, 0, implicit %exec
+ BUFFER_STORE_DWORD_OFFEN %6.sub0, %0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
+ %sgpr30_sgpr31 = COPY %5
+ %sgpr5 = COPY %sgpr5
+ S_SETPC_B64_return %sgpr30_sgpr31, implicit %sgpr5
+
+...
+
+# GCN-LABEL: name: inf_loop_tied_operand
+# GCN: bb.0:
+# GCN-NEXT: undef %2.sub0 = V_MAC_F32_e32 1073741824, undef %0, undef %2.sub0, implicit %exec
+# GCN-NEXT: dead undef %3.sub1 = COPY %2.sub0
+
+name: inf_loop_tied_operand
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '' }
+ - { id: 1, class: vgpr_32, preferred-register: '' }
+ - { id: 2, class: vreg_128, preferred-register: '' }
+body: |
+ bb.0:
+ %1 = V_MAC_F32_e32 1073741824, undef %0, undef %1, implicit %exec
+ undef %2.sub0 = COPY %1
+ %2.sub1 = COPY %1
+
+...
diff --git a/test/CodeGen/AMDGPU/ret_jump.ll b/test/CodeGen/AMDGPU/ret_jump.ll
index e7a05d94cdc4..1acae60f3057 100644
--- a/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/test/CodeGen/AMDGPU/ret_jump.ll
@@ -23,7 +23,7 @@
; GCN-NEXT: [[RET_BB]]:
; GCN-NEXT: ; return
; GCN-NEXT: .Lfunc_end0
-define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_trivial_ret_divergent_br_trivial_unreachable([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, i32 inreg %arg17, i32 %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
+define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_trivial_ret_divergent_br_trivial_unreachable([9 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, i32 inreg %arg17, i32 %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
entry:
%i.i = extractelement <2 x i32> %arg7, i32 0
%j.i = extractelement <2 x i32> %arg7, i32 1
@@ -75,7 +75,7 @@ ret.bb: ; preds = %else, %main_body
; GCN-NEXT: s_waitcnt
; GCN-NEXT: ; return
; GCN-NEXT: .Lfunc_end
-define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
+define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
main_body:
%i.i = extractelement <2 x i32> %arg7, i32 0
%j.i = extractelement <2 x i32> %arg7, i32 1
@@ -119,9 +119,6 @@ declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-
-; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
diff --git a/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll b/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
index 47e32724d9ca..5edc2c5c9b71 100644
--- a/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
+++ b/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
@@ -15,16 +15,16 @@ target triple = "amdgcn--"
define amdgpu_gs void @main(i32 inreg %arg) #0 {
main_body:
- %tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 20)
- %tmp1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 24)
- %tmp2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 48)
+ %tmp = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 20)
+ %tmp1 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 24)
+ %tmp2 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 48)
%array_vector3 = insertelement <4 x float> zeroinitializer, float %tmp2, i32 3
%array_vector5 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %tmp, i32 1
%array_vector6 = insertelement <4 x float> %array_vector5, float undef, i32 2
%array_vector9 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %tmp1, i32 1
%array_vector10 = insertelement <4 x float> %array_vector9, float 0.000000e+00, i32 2
%array_vector11 = insertelement <4 x float> %array_vector10, float undef, i32 3
- %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> undef, i32 undef, i32 4864, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
+ %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> undef, i32 undef, i32 4864, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 36, i32 4, i32 4, i1 1, i1 1)
%bc = bitcast <4 x float> %array_vector3 to <4 x i32>
%tmp4 = extractelement <4 x i32> %bc, i32 undef
@@ -45,8 +45,8 @@ main_body:
ret void
}
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #2
+declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
+declare i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #3
attributes #0 = { nounwind "target-cpu"="tonga" }
diff --git a/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir b/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
new file mode 100644
index 000000000000..4f5c582f8b58
--- /dev/null
+++ b/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
@@ -0,0 +1,446 @@
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI -check-prefix=GFX89 -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=GCN %s
+
+# GFX89-LABEL: {{^}}name: vop1_instructions
+
+# GFX89: %{{[0-9]+}} = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
+# GFX89: %{{[0-9]+}} = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec
+# GFX89: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec
+# GFX89: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
+# GFX89: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec
+
+
+# GFX89: %{{[0-9]+}} = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 6, 0, 5, implicit %exec
+# GFX89: %{{[0-9]+}} = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec
+# GFX89: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec
+# GFX89: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
+# GFX89: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec
+
+
+# VI: %{{[0-9]+}} = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec
+# VI: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit %exec
+# VI: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
+# VI: %{{[0-9]+}} = V_CVT_F32_I32_e64 %{{[0-9]+}}, 0, 1, implicit %exec
+
+# GFX9: %{{[0-9]+}} = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec
+# GFX9: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit %exec
+# GFX9: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
+# GFX9: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit %exec
+
+
+---
+name: vop1_instructions
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: sreg_64 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_32_xm0 }
+ - { id: 5, class: sreg_32_xm0 }
+ - { id: 6, class: sreg_32_xm0 }
+ - { id: 7, class: sreg_32_xm0 }
+ - { id: 8, class: sreg_32 }
+ - { id: 9, class: vgpr_32 }
+ - { id: 10, class: vgpr_32 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+ - { id: 16, class: vgpr_32 }
+ - { id: 17, class: vgpr_32 }
+ - { id: 18, class: vgpr_32 }
+ - { id: 19, class: vgpr_32 }
+ - { id: 20, class: vgpr_32 }
+ - { id: 21, class: vgpr_32 }
+ - { id: 22, class: vgpr_32 }
+ - { id: 23, class: vgpr_32 }
+ - { id: 24, class: vgpr_32 }
+ - { id: 25, class: vgpr_32 }
+ - { id: 26, class: vgpr_32 }
+ - { id: 27, class: vgpr_32 }
+ - { id: 28, class: vgpr_32 }
+ - { id: 29, class: vgpr_32 }
+ - { id: 30, class: vgpr_32 }
+ - { id: 31, class: vgpr_32 }
+ - { id: 32, class: vgpr_32 }
+ - { id: 33, class: vgpr_32 }
+ - { id: 34, class: vgpr_32 }
+ - { id: 35, class: vgpr_32 }
+ - { id: 36, class: vgpr_32 }
+ - { id: 37, class: vgpr_32 }
+ - { id: 38, class: vgpr_32 }
+ - { id: 39, class: vgpr_32 }
+ - { id: 40, class: vgpr_32 }
+ - { id: 41, class: vgpr_32 }
+ - { id: 42, class: vgpr_32 }
+ - { id: 43, class: vgpr_32 }
+ - { id: 44, class: vgpr_32 }
+ - { id: 45, class: vgpr_32 }
+ - { id: 46, class: vgpr_32 }
+ - { id: 47, class: vgpr_32 }
+ - { id: 48, class: vgpr_32 }
+ - { id: 100, class: vgpr_32 }
+body: |
+ bb.0:
+ liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31
+
+ %2 = COPY %sgpr30_sgpr31
+ %1 = COPY %vgpr2_vgpr3
+ %0 = COPY %vgpr0_vgpr1
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
+
+ %5 = S_MOV_B32 65535
+ %6 = S_MOV_B32 65535
+
+ %10 = V_LSHRREV_B32_e64 16, %3, implicit %exec
+ %11 = V_MOV_B32_e32 %10, implicit %exec
+ %12 = V_LSHLREV_B32_e64 16, %11, implicit %exec
+ %14 = V_FRACT_F32_e32 123, implicit %exec
+ %15 = V_LSHLREV_B32_e64 16, %14, implicit %exec
+ %16 = V_LSHRREV_B32_e64 16, %15, implicit %exec
+ %17 = V_SIN_F32_e32 %16, implicit %exec
+ %18 = V_LSHLREV_B32_e64 16, %17, implicit %exec
+ %19 = V_LSHRREV_B32_e64 16, %18, implicit %exec
+ %20 = V_CVT_U32_F32_e32 %19, implicit %exec
+ %21 = V_LSHLREV_B32_e64 16, %20, implicit %exec
+ %23 = V_CVT_F32_I32_e32 123, implicit %exec
+ %24 = V_LSHLREV_B32_e64 16, %23, implicit %exec
+
+ %25 = V_LSHRREV_B32_e64 16, %3, implicit %exec
+ %26 = V_MOV_B32_e64 %25, implicit %exec
+ %26 = V_LSHLREV_B32_e64 16, %26, implicit %exec
+ %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit %exec
+ %28 = V_LSHLREV_B32_e64 16, %27, implicit %exec
+ %29 = V_LSHRREV_B32_e64 16, %28, implicit %exec
+ %30 = V_SIN_F32_e64 0, %29, 0, 0, implicit %exec
+ %31 = V_LSHLREV_B32_e64 16, %30, implicit %exec
+ %32 = V_LSHRREV_B32_e64 16, %31, implicit %exec
+ %33 = V_CVT_U32_F32_e64 0, %32, 0, 0, implicit %exec
+ %34 = V_LSHLREV_B32_e64 16, %33, implicit %exec
+ %35 = V_CVT_F32_I32_e64 %6, 0, 0, implicit %exec
+ %36 = V_LSHLREV_B32_e64 16, %35, implicit %exec
+
+
+ %37 = V_LSHRREV_B32_e64 16, %36, implicit %exec
+ %38 = V_FRACT_F32_e64 1, %37, 0, 0, implicit %exec
+ %39 = V_LSHLREV_B32_e64 16, %38, implicit %exec
+ %40 = V_LSHRREV_B32_e64 16, %39, implicit %exec
+ %41 = V_SIN_F32_e64 0, %40, 1, 0, implicit %exec
+ %42 = V_LSHLREV_B32_e64 16, %41, implicit %exec
+ %43 = V_LSHRREV_B32_e64 16, %42, implicit %exec
+ %44 = V_CVT_U32_F32_e64 1, %43, 0, 0, implicit %exec
+ %45 = V_LSHLREV_B32_e64 16, %44, implicit %exec
+ %46 = V_LSHRREV_B32_e64 16, %45, implicit %exec
+ %47 = V_CVT_F32_I32_e64 %46, 0, 1, implicit %exec
+ %48 = V_LSHLREV_B32_e64 16, %47, implicit %exec
+
+
+ %100 = V_MOV_B32_e32 %48, implicit %exec
+
+ FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
+ %sgpr30_sgpr31 = COPY %2
+ S_SETPC_B64_return %sgpr30_sgpr31
+
+...
+---
+# GCN-LABEL: {{^}}name: vop2_instructions
+
+
+# VI: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec
+# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
+# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec
+# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec
+# VI: %{{[0-9]+}} = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec
+
+# GFX9: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec
+# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
+# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec
+# GFX9: %{{[0-9]+}} = V_MAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec
+# GFX9: %{{[0-9]+}} = V_MAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec
+
+
+# VI: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec
+# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
+# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec
+# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec
+# VI: %{{[0-9]+}} = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec
+
+# GFX9: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec
+# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
+# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec
+# GFX9: %{{[0-9]+}} = V_MAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit %exec
+# GFX9: %{{[0-9]+}} = V_MAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit %exec
+
+
+# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
+# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec
+# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, %{{[0-9]+}}, 1, 0, 6, 0, 6, 1, implicit %exec
+# VI: %{{[0-9]+}} = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit %exec
+
+# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
+# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec
+# GFX9: %{{[0-9]+}} = V_MAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit %exec
+# GFX9: %{{[0-9]+}} = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit %exec
+
+name: vop2_instructions
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: sreg_64 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_32_xm0 }
+ - { id: 5, class: sreg_32_xm0 }
+ - { id: 6, class: sreg_32_xm0 }
+ - { id: 7, class: sreg_32_xm0 }
+ - { id: 8, class: sreg_32 }
+ - { id: 9, class: vgpr_32 }
+ - { id: 10, class: vgpr_32 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+ - { id: 16, class: vgpr_32 }
+ - { id: 17, class: vgpr_32 }
+ - { id: 18, class: vgpr_32 }
+ - { id: 19, class: vgpr_32 }
+ - { id: 20, class: vgpr_32 }
+ - { id: 21, class: vgpr_32 }
+ - { id: 22, class: vgpr_32 }
+ - { id: 23, class: vgpr_32 }
+ - { id: 24, class: vgpr_32 }
+ - { id: 25, class: vgpr_32 }
+ - { id: 26, class: vgpr_32 }
+ - { id: 27, class: vgpr_32 }
+ - { id: 28, class: vgpr_32 }
+ - { id: 29, class: vgpr_32 }
+ - { id: 30, class: vgpr_32 }
+ - { id: 31, class: vgpr_32 }
+ - { id: 32, class: vgpr_32 }
+ - { id: 33, class: vgpr_32 }
+ - { id: 34, class: vgpr_32 }
+ - { id: 35, class: vgpr_32 }
+ - { id: 36, class: vgpr_32 }
+ - { id: 37, class: vgpr_32 }
+ - { id: 38, class: vgpr_32 }
+ - { id: 39, class: vgpr_32 }
+ - { id: 40, class: vgpr_32 }
+ - { id: 41, class: vgpr_32 }
+ - { id: 42, class: vgpr_32 }
+ - { id: 43, class: vgpr_32 }
+ - { id: 44, class: vgpr_32 }
+ - { id: 45, class: vgpr_32 }
+ - { id: 46, class: vgpr_32 }
+ - { id: 47, class: vgpr_32 }
+ - { id: 48, class: vgpr_32 }
+ - { id: 49, class: vgpr_32 }
+ - { id: 50, class: vgpr_32 }
+ - { id: 51, class: vgpr_32 }
+ - { id: 52, class: vgpr_32 }
+ - { id: 53, class: vgpr_32 }
+ - { id: 54, class: vgpr_32 }
+ - { id: 55, class: vgpr_32 }
+ - { id: 56, class: vgpr_32 }
+ - { id: 57, class: vgpr_32 }
+ - { id: 58, class: vgpr_32 }
+ - { id: 59, class: vgpr_32 }
+ - { id: 60, class: vgpr_32 }
+ - { id: 100, class: vgpr_32 }
+body: |
+ bb.0:
+ liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31
+
+ %2 = COPY %sgpr30_sgpr31
+ %1 = COPY %vgpr2_vgpr3
+ %0 = COPY %vgpr0_vgpr1
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
+
+ %5 = S_MOV_B32 65535
+ %6 = S_MOV_B32 65535
+
+ %11 = V_LSHRREV_B32_e64 16, %3, implicit %exec
+ %12 = V_AND_B32_e32 %6, %11, implicit %exec
+ %13 = V_LSHLREV_B32_e64 16, %12, implicit %exec
+ %14 = V_LSHRREV_B32_e64 16, %13, implicit %exec
+ %15 = V_BFE_U32 %13, 8, 8, implicit %exec
+ %16 = V_ADD_F32_e32 %14, %15, implicit %exec
+ %17 = V_LSHLREV_B32_e64 16, %16, implicit %exec
+ %18 = V_LSHRREV_B32_e64 16, %17, implicit %exec
+ %19 = V_BFE_U32 %17, 8, 8, implicit %exec
+ %20 = V_SUB_F16_e32 %18, %19, implicit %exec
+ %21 = V_LSHLREV_B32_e64 16, %20, implicit %exec
+ %22 = V_BFE_U32 %20, 8, 8, implicit %exec
+ %23 = V_MAC_F32_e32 %21, %22, %22, implicit %exec
+ %24 = V_LSHLREV_B32_e64 16, %23, implicit %exec
+ %25 = V_LSHRREV_B32_e64 16, %24, implicit %exec
+ %26 = V_BFE_U32 %24, 8, 8, implicit %exec
+ %27 = V_MAC_F16_e32 %25, %26, %26, implicit %exec
+ %28 = V_LSHLREV_B32_e64 16, %27, implicit %exec
+
+ %29 = V_LSHRREV_B32_e64 16, %28, implicit %exec
+ %30 = V_AND_B32_e64 23, %29, implicit %exec
+ %31 = V_LSHLREV_B32_e64 16, %30, implicit %exec
+ %32 = V_LSHRREV_B32_e64 16, %31, implicit %exec
+ %33 = V_BFE_U32 %31, 8, 8, implicit %exec
+ %34 = V_ADD_F32_e64 0, %32, 0, %33, 0, 0, implicit %exec
+ %35 = V_LSHLREV_B32_e64 16, %34, implicit %exec
+ %37 = V_BFE_U32 %35, 8, 8, implicit %exec
+ %38 = V_SUB_F16_e64 0, 23, 0, %37, 0, 0, implicit %exec
+ %39 = V_LSHLREV_B32_e64 16, %38, implicit %exec
+ %40 = V_BFE_U32 %39, 8, 8, implicit %exec
+ %41 = V_MAC_F32_e64 0, 23, 0, %40, 0, %40, 0, 0, implicit %exec
+ %42 = V_LSHLREV_B32_e64 16, %41, implicit %exec
+ %43 = V_LSHRREV_B32_e64 16, %42, implicit %exec
+ %44 = V_BFE_U32 %42, 8, 8, implicit %exec
+ %45 = V_MAC_F16_e64 0, %43, 0, %44, 0, %44, 0, 0, implicit %exec
+ %46 = V_LSHLREV_B32_e64 16, %45, implicit %exec
+
+ %47 = V_LSHRREV_B32_e64 16, %46, implicit %exec
+ %48 = V_BFE_U32 %46, 8, 8, implicit %exec
+ %49 = V_ADD_F32_e64 0, %47, 1, %48, 0, 0, implicit %exec
+ %50 = V_LSHLREV_B32_e64 16, %49, implicit %exec
+ %51 = V_BFE_U32 %50, 8, 8, implicit %exec
+ %52 = V_SUB_F16_e64 1, 23, 1, %51, 0, 0, implicit %exec
+ %53 = V_LSHLREV_B32_e64 16, %52, implicit %exec
+ %54 = V_BFE_U32 %53, 8, 8, implicit %exec
+ %55 = V_MAC_F32_e64 1, 23, 1, %54, 1, %54, 1, 0, implicit %exec
+ %56 = V_LSHLREV_B32_e64 16, %55, implicit %exec
+ %57 = V_LSHRREV_B32_e64 16, %56, implicit %exec
+ %58 = V_BFE_U32 %56, 8, 8, implicit %exec
+ %59 = V_MAC_F16_e64 1, %57, 1, %58, 1, %58, 0, 2, implicit %exec
+ %60 = V_LSHLREV_B32_e64 16, %59, implicit %exec
+
+ %100 = V_MOV_B32_e32 %60, implicit %exec
+
+ FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
+ %sgpr30_sgpr31 = COPY %2
+ S_SETPC_B64_return %sgpr30_sgpr31
+
+...
+---
+
+# GCN-LABEL: {{^}}name: vopc_instructions
+
+# GFX89: %{{[0-9]+}} = V_MOV_B32_e32 123, implicit %exec
+# GFX89: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
+# GFX89: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
+# GFX89: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
+# GFX89: %vcc = V_CMPX_EQ_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
+
+
+# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
+# VI: %{{[0-9]+}} = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 0, implicit-def %exec, implicit %exec
+# VI: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %3, 0, 6, 4, implicit-def %vcc, implicit %exec
+# VI: %{{[0-9]+}} = V_CMPX_EQ_I32_e64 23, killed %{{[0-9]+}}, implicit-def %exec, implicit %exec
+
+# GFX9: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
+# GFX9: %{{[0-9]+}} = V_MOV_B32_e32 23, implicit %exec
+# GFX9: %{{[0-9]+}} = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
+# GFX9: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
+# GFX9: %{{[0-9]+}} = V_MOV_B32_e32 23, implicit %exec
+# GFX9: %{{[0-9]+}} = V_CMPX_EQ_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
+
+
+# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit %exec
+# VI: %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 2, implicit-def %exec, implicit %exec
+# VI: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 2, implicit %exec
+# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
+# VI: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
+# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
+# VI: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, 2, implicit-def %exec, implicit %exec
+
+# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 0, implicit %exec
+# GFX9: %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 2, implicit-def %exec, implicit %exec
+# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 2, implicit %exec
+# GFX9: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
+# GFX9: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
+# GFX9: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
+# GFX9: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, 2, implicit-def %exec, implicit %exec
+
+
+name: vopc_instructions
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: sreg_64 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_32_xm0 }
+ - { id: 5, class: sreg_32_xm0 }
+ - { id: 6, class: sreg_32_xm0 }
+ - { id: 7, class: sreg_32_xm0 }
+ - { id: 8, class: sreg_32 }
+ - { id: 9, class: vgpr_32 }
+ - { id: 10, class: vgpr_32 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+ - { id: 16, class: vgpr_32 }
+ - { id: 17, class: vgpr_32 }
+ - { id: 18, class: sreg_64 }
+ - { id: 19, class: sreg_64 }
+ - { id: 20, class: vgpr_32 }
+ - { id: 21, class: vgpr_32 }
+ - { id: 22, class: vgpr_32 }
+ - { id: 23, class: vgpr_32 }
+ - { id: 24, class: vgpr_32 }
+ - { id: 25, class: vgpr_32 }
+ - { id: 26, class: vgpr_32 }
+ - { id: 27, class: vgpr_32 }
+ - { id: 100, class: vgpr_32 }
+body: |
+ bb.0:
+ liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31
+
+ %2 = COPY %sgpr30_sgpr31
+ %1 = COPY %vgpr2_vgpr3
+ %0 = COPY %vgpr0_vgpr1
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
+
+ %5 = S_MOV_B32 65535
+ %6 = S_MOV_B32 65535
+
+ %10 = V_AND_B32_e64 %5, %3, implicit %exec
+ V_CMP_EQ_F32_e32 123, killed %10, implicit-def %vcc, implicit %exec
+ %11 = V_AND_B32_e64 %5, %3, implicit %exec
+ V_CMPX_GT_F32_e32 123, killed %11, implicit-def %vcc, implicit-def %exec, implicit %exec
+ %12 = V_AND_B32_e64 %5, %3, implicit %exec
+ V_CMP_LT_I32_e32 123, killed %12, implicit-def %vcc, implicit %exec
+ %13 = V_AND_B32_e64 %5, %3, implicit %exec
+ V_CMPX_EQ_I32_e32 123, killed %13, implicit-def %vcc, implicit-def %exec, implicit %exec
+
+ %14 = V_AND_B32_e64 %5, %3, implicit %exec
+ %vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %14, 0, 0, implicit %exec
+ %15 = V_AND_B32_e64 %5, %3, implicit %exec
+ %18 = V_CMPX_GT_F32_e64 0, 23, 0, killed %15, 0, 0, implicit-def %exec, implicit %exec
+ %16 = V_AND_B32_e64 %5, %3, implicit %exec
+ %vcc = V_CMP_LT_I32_e64 %6, killed %16, implicit %exec
+ %17 = V_AND_B32_e64 %5, %3, implicit %exec
+ %19 = V_CMPX_EQ_I32_e64 23, killed %17, implicit-def %exec, implicit %exec
+
+ %20 = V_AND_B32_e64 %5, %3, implicit %exec
+ %vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %20, 1, 0, implicit %exec
+ %21 = V_AND_B32_e64 %5, %3, implicit %exec
+ %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %21, 0, 2, implicit-def %exec, implicit %exec
+ %23 = V_AND_B32_e64 %5, %3, implicit %exec
+ %vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %23, 1, 2, implicit %exec
+ %24 = V_AND_B32_e64 %5, %3, implicit %exec
+ %vcc = V_CMPX_GT_F32_e64 1, 23, 0, killed %24, 0, 0, implicit-def %exec, implicit %exec
+ %25 = V_AND_B32_e64 %5, %3, implicit %exec
+ %vcc = V_CMPX_GT_F32_e64 0, 23, 1, killed %25, 0, 0, implicit-def %exec, implicit %exec
+ %26 = V_AND_B32_e64 %5, %3, implicit %exec
+ %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %26, 0, 0, implicit-def %exec, implicit %exec
+ %27 = V_AND_B32_e64 %5, %3, implicit %exec
+ %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %27, 1, 2, implicit-def %exec, implicit %exec
+
+
+ %100 = V_MOV_B32_e32 %vcc_lo, implicit %exec
+
+ FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
+ %sgpr30_sgpr31 = COPY %2
+ S_SETPC_B64_return %sgpr30_sgpr31
diff --git a/test/CodeGen/AMDGPU/select-vectors.ll b/test/CodeGen/AMDGPU/select-vectors.ll
index 4b00a48211ec..ebbc675b2bab 100644
--- a/test/CodeGen/AMDGPU/select-vectors.ll
+++ b/test/CodeGen/AMDGPU/select-vectors.ll
@@ -66,7 +66,7 @@ define amdgpu_kernel void @v_select_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8
}
; GCN-LABEL: {{^}}select_v4i8:
-; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32
; GCN-NOT: cndmask
define amdgpu_kernel void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) #0 {
%cmp = icmp eq i8 %c, 0
diff --git a/test/CodeGen/AMDGPU/setcc-sext.ll b/test/CodeGen/AMDGPU/setcc-sext.ll
new file mode 100644
index 000000000000..eadce225e350
--- /dev/null
+++ b/test/CodeGen/AMDGPU/setcc-sext.ll
@@ -0,0 +1,292 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}setcc_sgt_true_sext:
+; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_sgt_true_sext(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp sgt i32 %ext, -1
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+
+; GCN-LABEL: {{^}}setcc_sgt_true_sext_swap:
+; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_sgt_true_sext_swap(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp slt i32 -1, %ext
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+
+; GCN-LABEL: {{^}}setcc_ne_true_sext:
+; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_ne_true_sext(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp ne i32 %ext, -1
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+
+; GCN-LABEL: {{^}}setcc_ult_true_sext:
+; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_ult_true_sext(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp ult i32 %ext, -1
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+
+; GCN-LABEL: {{^}}setcc_eq_true_sext:
+; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_eq_true_sext(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp eq i32 %ext, -1
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+
+; GCN-LABEL: {{^}}setcc_sle_true_sext:
+; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_sle_true_sext(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp sle i32 %ext, -1
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+
+; GCN-LABEL: {{^}}setcc_uge_true_sext:
+; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_uge_true_sext(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp uge i32 %ext, -1
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+
+; GCN-LABEL: {{^}}setcc_eq_false_sext:
+; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_eq_false_sext(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp eq i32 %ext, 0
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+
+; GCN-LABEL: {{^}}setcc_sge_false_sext:
+; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_sge_false_sext(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp sge i32 %ext, 0
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+
+; GCN-LABEL: {{^}}setcc_ule_false_sext:
+; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_ule_false_sext(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp ule i32 %ext, 0
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+
+; GCN-LABEL: {{^}}setcc_ne_false_sext:
+; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_ne_false_sext(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp ne i32 %ext, 0
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+; GCN-LABEL: {{^}}setcc_ugt_false_sext:
+; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_ugt_false_sext(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp ugt i32 %ext, 0
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+; GCN-LABEL: {{^}}setcc_slt_false_sext:
+; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
+; GCN-NOT: v_cndmask_
+
+define amdgpu_kernel void @setcc_slt_false_sext(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %cond = icmp slt i32 %ext, 0
+ br i1 %cond, label %then, label %endif
+
+then:
+ store i32 1, i32 addrspace(1)* %arg, align 4
+ br label %endif
+
+endif:
+ ret void
+}
+
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+
+attributes #0 = { nounwind readnone speculatable }
diff --git a/test/CodeGen/AMDGPU/sgpr-copy.ll b/test/CodeGen/AMDGPU/sgpr-copy.ll
index 5c20e9a8d585..931051102cd5 100644
--- a/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -4,13 +4,13 @@
; CHECK-LABEL: {{^}}phi1:
; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
-define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @phi1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
- %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 0)
- %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
- %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 32)
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
+ %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
+ %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
%tmp24 = fptosi float %tmp22 to i32
%tmp25 = icmp ne i32 %tmp24, 0
br i1 %tmp25, label %ENDIF, label %ELSE
@@ -28,29 +28,29 @@ ENDIF: ; preds = %ELSE, %main_body
; Make sure this program doesn't crash
; CHECK-LABEL: {{^}}phi2:
-define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
+define amdgpu_ps void @phi2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
- %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
- %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 32)
- %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 36)
- %tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 40)
- %tmp25 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 48)
- %tmp26 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 52)
- %tmp27 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 56)
- %tmp28 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 64)
- %tmp29 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 68)
- %tmp30 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 72)
- %tmp31 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 76)
- %tmp32 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 80)
- %tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 84)
- %tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 88)
- %tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 92)
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
+ %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
+ %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 36)
+ %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 40)
+ %tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 48)
+ %tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 52)
+ %tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 56)
+ %tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 64)
+ %tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 68)
+ %tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 72)
+ %tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 76)
+ %tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 80)
+ %tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 84)
+ %tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 88)
+ %tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 92)
%tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
%tmp37 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp36, !tbaa !0
- %tmp38 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
- %tmp39 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp38, !tbaa !0
+ %tmp38 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
+ %tmp39 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp38, !tbaa !0
%i.i = extractelement <2 x i32> %arg5, i32 0
%j.i = extractelement <2 x i32> %arg5, i32 1
%i.f.i = bitcast i32 %i.i to float
@@ -85,7 +85,7 @@ main_body:
%tmp46 = bitcast float %p2.i24 to i32
%tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
%tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1
- %tmp39.bc = bitcast <16 x i8> %tmp39 to <4 x i32>
+ %tmp39.bc = bitcast <4 x i32> %tmp39 to <4 x i32>
%a.bc.i = bitcast <2 x i32> %tmp48 to <2 x float>
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp50 = extractelement <4 x float> %tmp1, i32 2
@@ -173,14 +173,14 @@ ENDIF24: ; preds = %IF25, %ENDIF
; We just want ot make sure the program doesn't crash
; CHECK-LABEL: {{^}}loop:
-define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @loop(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
- %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 0)
- %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 4)
- %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 8)
- %tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 12)
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
+ %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 4)
+ %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 8)
+ %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 12)
%tmp25 = fptosi float %tmp24 to i32
%tmp26 = bitcast i32 %tmp25 to float
%tmp27 = bitcast float %tmp26 to i32
@@ -226,17 +226,17 @@ ENDIF: ; preds = %LOOP
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
; CHECK: exp
; CHECK: s_endpgm
-define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
- %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
- %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 16)
+ %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
+ %tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 16)
%tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
%tmp24 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp23, !tbaa !0
- %tmp25 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
- %tmp26 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp25, !tbaa !0
+ %tmp25 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
+ %tmp26 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp25, !tbaa !0
%tmp27 = fcmp oeq float %tmp22, 0.000000e+00
- %tmp26.bc = bitcast <16 x i8> %tmp26 to <4 x i32>
+ %tmp26.bc = bitcast <4 x i32> %tmp26 to <4 x i32>
br i1 %tmp27, label %if, label %else
if: ; preds = %entry
@@ -290,7 +290,7 @@ endif: ; preds = %if1, %if0, %entry
; This test is just checking that we don't crash / assertion fail.
; CHECK-LABEL: {{^}}copy2:
; CHECK: s_endpgm
-define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
br label %LOOP68
@@ -326,11 +326,11 @@ ENDIF69: ; preds = %LOOP68
; [[END]]:
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
; CHECK: s_endpgm
-define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
+define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
bb:
- %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
- %tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !3
- %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16)
+ %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i32 0, i32 0
+ %tmp22 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !3
+ %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp22, i32 16)
%tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
%tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !3
%tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
@@ -420,7 +420,7 @@ declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll b/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
index a6026785b173..c70eb9b9c4a5 100644
--- a/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
+++ b/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
@@ -151,10 +151,11 @@ define amdgpu_kernel void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 a
ret void
}
-; Spans the dword boundary, so requires full shift
+; Spans the dword boundary, so requires full shift.
+; Truncated after the shift, so only low shift result is used.
; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64:
-; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
+; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
+; GCN: v_alignbit_b32 v[[SHRLO:[0-9]+]], v[[VALHI]], v[[VALLO]], 31
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
@@ -188,8 +189,8 @@ define amdgpu_kernel void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64
; GCN-LABEL: {{^}}v_uextract_bit_30_60_i64:
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 30
+; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
+; GCN: v_alignbit_b32 v[[SHRLO:[0-9]+]], v[[VALHI]], v[[VALLO]], 30
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO1]]{{\]}}
@@ -223,10 +224,9 @@ define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64
; GCN-LABEL: {{^}}v_uextract_bit_31_63_i64:
; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
-; GCN-NEXT: v_mov_b32_e32 v[[SHRHI]], v[[ZERO]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
+; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
+; GCN: v_alignbit_b32 v[[SHRLO:[0-9]+]], v[[VALHI]], v[[VALLO]], 31
+; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[ZERO]]{{\]}}
define amdgpu_kernel void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
@@ -288,8 +288,8 @@ define amdgpu_kernel void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %ou
}
; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64_trunc_i32:
-; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
+; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
+; GCN: v_alignbit_b32 v[[SHRLO:[0-9]+]], v[[VALHI]], v[[VALLO]], 31
; GCN-NEXT: v_and_b32_e32 v[[SHRLO]], 3, v[[SHRLO]]
; GCN-NOT: v[[SHRLO]]
; GCN: buffer_store_dword v[[SHRLO]]
diff --git a/test/CodeGen/AMDGPU/shift-i64-opts.ll b/test/CodeGen/AMDGPU/shift-i64-opts.ll
index a803849be02c..5306e190a4f9 100644
--- a/test/CodeGen/AMDGPU/shift-i64-opts.ll
+++ b/test/CodeGen/AMDGPU/shift-i64-opts.ll
@@ -243,3 +243,77 @@ define amdgpu_kernel void @trunc_shl_31_i32_i64_multi_use(i32 addrspace(1)* %out
store volatile i64 %shl, i64 addrspace(1)* %in
ret void
}
+
+; GCN-LABEL: {{^}}trunc_shl_and31:
+; GCN: s_and_b32 s[[AMT:[0-9]+]], s{{[0-9]+}}, 31
+; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, s[[AMT]], v{{[0-9]+}}
+; GCN-NOT: v_lshl_b64
+; GCN-NOT: v_lshlrev_b64
+define amdgpu_kernel void @trunc_shl_and31(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+bb:
+ %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp3 = and i32 %arg2, 31
+ %tmp4 = zext i32 %tmp3 to i64
+ %tmp5 = shl i64 %tmp, %tmp4
+ %tmp6 = trunc i64 %tmp5 to i32
+ store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}trunc_shl_and30:
+; GCN: s_and_b32 s[[AMT:[0-9]+]], s{{[0-9]+}}, 30
+; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, s[[AMT]], v{{[0-9]+}}
+; GCN-NOT: v_lshl_b64
+; GCN-NOT: v_lshlrev_b64
+define amdgpu_kernel void @trunc_shl_and30(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+bb:
+ %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp3 = and i32 %arg2, 30
+ %tmp4 = zext i32 %tmp3 to i64
+ %tmp5 = shl i64 %tmp, %tmp4
+ %tmp6 = trunc i64 %tmp5 to i32
+ store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}trunc_shl_wrong_and63:
+; Negative test, wrong constant
+; GCN: v_lshl_b64
+define amdgpu_kernel void @trunc_shl_wrong_and63(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+bb:
+ %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp3 = and i32 %arg2, 63
+ %tmp4 = zext i32 %tmp3 to i64
+ %tmp5 = shl i64 %tmp, %tmp4
+ %tmp6 = trunc i64 %tmp5 to i32
+ store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}trunc_shl_no_and:
+; Negative test, shift can be full 64 bit
+; GCN: v_lshl_b64
+define amdgpu_kernel void @trunc_shl_no_and(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+bb:
+ %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp4 = zext i32 %arg2 to i64
+ %tmp5 = shl i64 %tmp, %tmp4
+ %tmp6 = trunc i64 %tmp5 to i32
+ store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}trunc_shl_vec_vec:
+; GCN-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}
+; GCN-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
+; GCN-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+; GCN-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 6, v{{[0-9]+}}
+; GCN-NOT: v_lshl_b64
+; GCN-NOT: v_lshlrev_b64
+define amdgpu_kernel void @trunc_shl_vec_vec(<4 x i64> addrspace(1)* %arg) {
+bb:
+ %v = load <4 x i64>, <4 x i64> addrspace(1)* %arg, align 32
+ %shl = shl <4 x i64> %v, <i64 3, i64 4, i64 5, i64 6>
+ store <4 x i64> %shl, <4 x i64> addrspace(1)* %arg, align 32
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/si-lod-bias.ll b/test/CodeGen/AMDGPU/si-lod-bias.ll
index 3a7359ea4ffa..422498066509 100644
--- a/test/CodeGen/AMDGPU/si-lod-bias.ll
+++ b/test/CodeGen/AMDGPU/si-lod-bias.ll
@@ -6,15 +6,15 @@
; GCN-LABEL: {{^}}main:
; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
-define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @main(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
- %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
%tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
%tmp23 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp22, !tbaa !0
- %tmp24 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
- %tmp25 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp24, !tbaa !0
+ %tmp24 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
+ %tmp25 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp24, !tbaa !0
%i.i = extractelement <2 x i32> %arg5, i32 0
%j.i = extractelement <2 x i32> %arg5, i32 1
%i.f.i = bitcast i32 %i.i to float
@@ -34,9 +34,8 @@ main_body:
%tmp32 = insertelement <4 x i32> %tmp31, i32 %tmp29, i32 1
%tmp33 = insertelement <4 x i32> %tmp32, i32 %tmp30, i32 2
%tmp34 = insertelement <4 x i32> %tmp33, i32 undef, i32 3
- %tmp25.bc = bitcast <16 x i8> %tmp25 to <4 x i32>
%tmp34.bc = bitcast <4 x i32> %tmp34 to <4 x float>
- %tmp35 = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %tmp34.bc, <8 x i32> %tmp23, <4 x i32> %tmp25.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp35 = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %tmp34.bc, <8 x i32> %tmp23, <4 x i32> %tmp25, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp36 = extractelement <4 x float> %tmp35, i32 0
%tmp37 = extractelement <4 x float> %tmp35, i32 1
%tmp38 = extractelement <4 x float> %tmp35, i32 2
@@ -49,7 +48,7 @@ declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/test/CodeGen/AMDGPU/si-sgpr-spill.ll
index 8731e74d63a0..3e70f2c77826 100644
--- a/test/CodeGen/AMDGPU/si-sgpr-spill.ll
+++ b/test/CodeGen/AMDGPU/si-sgpr-spill.ll
@@ -24,81 +24,81 @@
; GCN: s_endpgm
; TOVGPR: ScratchSize: 0{{$}}
-define amdgpu_ps void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
+define amdgpu_ps void @main([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
main_body:
- %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
- %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 96)
- %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 100)
- %tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 104)
- %tmp25 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 112)
- %tmp26 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 116)
- %tmp27 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 120)
- %tmp28 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 128)
- %tmp29 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 132)
- %tmp30 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 140)
- %tmp31 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 144)
- %tmp32 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 160)
- %tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 176)
- %tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 180)
- %tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 184)
- %tmp36 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 192)
- %tmp37 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 196)
- %tmp38 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 200)
- %tmp39 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 208)
- %tmp40 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 212)
- %tmp41 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 216)
- %tmp42 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 224)
- %tmp43 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 240)
- %tmp44 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 244)
- %tmp45 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 248)
- %tmp46 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 256)
- %tmp47 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 272)
- %tmp48 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 276)
- %tmp49 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 280)
- %tmp50 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 288)
- %tmp51 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 292)
- %tmp52 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 296)
- %tmp53 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 304)
- %tmp54 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 308)
- %tmp55 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 312)
- %tmp56 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 368)
- %tmp57 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 372)
- %tmp58 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 376)
- %tmp59 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 384)
+ %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
+ %tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 96)
+ %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 100)
+ %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 104)
+ %tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 112)
+ %tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 116)
+ %tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 120)
+ %tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 128)
+ %tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 132)
+ %tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 140)
+ %tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 144)
+ %tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 160)
+ %tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 176)
+ %tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 180)
+ %tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 184)
+ %tmp36 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 192)
+ %tmp37 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 196)
+ %tmp38 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 200)
+ %tmp39 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 208)
+ %tmp40 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 212)
+ %tmp41 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 216)
+ %tmp42 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 224)
+ %tmp43 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 240)
+ %tmp44 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 244)
+ %tmp45 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 248)
+ %tmp46 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 256)
+ %tmp47 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 272)
+ %tmp48 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 276)
+ %tmp49 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 280)
+ %tmp50 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 288)
+ %tmp51 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 292)
+ %tmp52 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 296)
+ %tmp53 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 304)
+ %tmp54 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 308)
+ %tmp55 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 312)
+ %tmp56 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 368)
+ %tmp57 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 372)
+ %tmp58 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 376)
+ %tmp59 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 384)
%tmp60 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
%tmp61 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp60, !tbaa !0
- %tmp62 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
- %tmp63 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp62, !tbaa !0
- %tmp63.bc = bitcast <16 x i8> %tmp63 to <4 x i32>
+ %tmp62 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
+ %tmp63 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp62, !tbaa !0
+ %tmp63.bc = bitcast <4 x i32> %tmp63 to <4 x i32>
%tmp64 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
%tmp65 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp64, !tbaa !0
- %tmp66 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 1
- %tmp67 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp66, !tbaa !0
+ %tmp66 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 1
+ %tmp67 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp66, !tbaa !0
%tmp68 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
%tmp69 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp68, !tbaa !0
- %tmp70 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 2
- %tmp71 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp70, !tbaa !0
+ %tmp70 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 2
+ %tmp71 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp70, !tbaa !0
%tmp72 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
%tmp73 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp72, !tbaa !0
- %tmp74 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 3
- %tmp75 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp74, !tbaa !0
+ %tmp74 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 3
+ %tmp75 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp74, !tbaa !0
%tmp76 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
%tmp77 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp76, !tbaa !0
- %tmp78 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 4
- %tmp79 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp78, !tbaa !0
+ %tmp78 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 4
+ %tmp79 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp78, !tbaa !0
%tmp80 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
%tmp81 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp80, !tbaa !0
- %tmp82 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 5
- %tmp83 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp82, !tbaa !0
+ %tmp82 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 5
+ %tmp83 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp82, !tbaa !0
%tmp84 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
%tmp85 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp84, !tbaa !0
- %tmp86 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 6
- %tmp87 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp86, !tbaa !0
+ %tmp86 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 6
+ %tmp87 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp86, !tbaa !0
%tmp88 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
%tmp89 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp88, !tbaa !0
- %tmp90 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 7
- %tmp91 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp90, !tbaa !0
+ %tmp90 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 7
+ %tmp91 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp90, !tbaa !0
%i.i = extractelement <2 x i32> %arg6, i32 0
%j.i = extractelement <2 x i32> %arg6, i32 1
%i.f.i = bitcast i32 %i.i to float
@@ -410,7 +410,7 @@ IF67: ; preds = %LOOP65
%tmp274 = insertelement <8 x i32> %tmp273, i32 %tmp268, i32 5
%tmp275 = insertelement <8 x i32> %tmp274, i32 undef, i32 6
%tmp276 = insertelement <8 x i32> %tmp275, i32 undef, i32 7
- %tmp67.bc = bitcast <16 x i8> %tmp67 to <4 x i32>
+ %tmp67.bc = bitcast <4 x i32> %tmp67 to <4 x i32>
%tmp276.bc = bitcast <8 x i32> %tmp276 to <8 x float>
%tmp277 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp276.bc, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp278 = extractelement <4 x float> %tmp277, i32 0
@@ -432,7 +432,7 @@ IF67: ; preds = %LOOP65
%tmp294 = insertelement <8 x i32> %tmp293, i32 %tmp288, i32 5
%tmp295 = insertelement <8 x i32> %tmp294, i32 undef, i32 6
%tmp296 = insertelement <8 x i32> %tmp295, i32 undef, i32 7
- %tmp83.bc = bitcast <16 x i8> %tmp83 to <4 x i32>
+ %tmp83.bc = bitcast <4 x i32> %tmp83 to <4 x i32>
%tmp296.bc = bitcast <8 x i32> %tmp296 to <8 x float>
%tmp297 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp296.bc, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp298 = extractelement <4 x float> %tmp297, i32 0
@@ -452,7 +452,7 @@ IF67: ; preds = %LOOP65
%tmp312 = insertelement <8 x i32> %tmp311, i32 %tmp306, i32 5
%tmp313 = insertelement <8 x i32> %tmp312, i32 undef, i32 6
%tmp314 = insertelement <8 x i32> %tmp313, i32 undef, i32 7
- %tmp79.bc = bitcast <16 x i8> %tmp79 to <4 x i32>
+ %tmp79.bc = bitcast <4 x i32> %tmp79 to <4 x i32>
%tmp314.bc = bitcast <8 x i32> %tmp314 to <8 x float>
%tmp315 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp314.bc, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp316 = extractelement <4 x float> %tmp315, i32 0
@@ -515,7 +515,7 @@ IF67: ; preds = %LOOP65
%tmp372 = insertelement <8 x i32> %tmp371, i32 %tmp366, i32 5
%tmp373 = insertelement <8 x i32> %tmp372, i32 undef, i32 6
%tmp374 = insertelement <8 x i32> %tmp373, i32 undef, i32 7
- %tmp71.bc = bitcast <16 x i8> %tmp71 to <4 x i32>
+ %tmp71.bc = bitcast <4 x i32> %tmp71 to <4 x i32>
%tmp374.bc = bitcast <8 x i32> %tmp374 to <8 x float>
%tmp375 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp374.bc, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp376 = extractelement <4 x float> %tmp375, i32 0
@@ -571,7 +571,7 @@ IF67: ; preds = %LOOP65
%tmp426 = insertelement <8 x i32> %tmp425, i32 %tmp420, i32 5
%tmp427 = insertelement <8 x i32> %tmp426, i32 undef, i32 6
%tmp428 = insertelement <8 x i32> %tmp427, i32 undef, i32 7
- %tmp87.bc = bitcast <16 x i8> %tmp87 to <4 x i32>
+ %tmp87.bc = bitcast <4 x i32> %tmp87 to <4 x i32>
%tmp428.bc = bitcast <8 x i32> %tmp428 to <8 x float>
%tmp429 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp428.bc, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp430 = extractelement <4 x float> %tmp429, i32 0
@@ -624,7 +624,7 @@ IF67: ; preds = %LOOP65
%tmp467 = insertelement <4 x i32> %tmp466, i32 %tmp464, i32 1
%tmp468 = insertelement <4 x i32> %tmp467, i32 %tmp465, i32 2
%tmp469 = insertelement <4 x i32> %tmp468, i32 undef, i32 3
- %tmp91.bc = bitcast <16 x i8> %tmp91 to <4 x i32>
+ %tmp91.bc = bitcast <4 x i32> %tmp91 to <4 x i32>
%tmp469.bc = bitcast <4 x i32> %tmp469 to <4 x float>
%tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp469.bc, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
%tmp471 = extractelement <4 x float> %tmp470, i32 0
@@ -727,7 +727,7 @@ IF67: ; preds = %LOOP65
%tmp568 = insertelement <8 x i32> %tmp567, i32 %tmp562, i32 5
%tmp569 = insertelement <8 x i32> %tmp568, i32 undef, i32 6
%tmp570 = insertelement <8 x i32> %tmp569, i32 undef, i32 7
- %tmp75.bc = bitcast <16 x i8> %tmp75 to <4 x i32>
+ %tmp75.bc = bitcast <4 x i32> %tmp75 to <4 x i32>
%tmp570.bc = bitcast <8 x i32> %tmp570 to <8 x float>
%tmp571 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp570.bc, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp572 = extractelement <4 x float> %tmp571, i32 0
@@ -778,149 +778,149 @@ ENDIF66: ; preds = %LOOP65
; GCN-LABEL: {{^}}main1:
; GCN: s_endpgm
; TOVGPR: ScratchSize: 0{{$}}
-define amdgpu_ps void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @main1([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
main_body:
- %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
- %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 0)
- %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 4)
- %tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 8)
- %tmp25 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 12)
- %tmp26 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 28)
- %tmp27 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 48)
- %tmp28 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 52)
- %tmp29 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 56)
- %tmp30 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 64)
- %tmp31 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 68)
- %tmp32 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 72)
- %tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 76)
- %tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 128)
- %tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 132)
- %tmp36 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 144)
- %tmp37 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 148)
- %tmp38 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 152)
- %tmp39 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 160)
- %tmp40 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 164)
- %tmp41 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 168)
- %tmp42 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 172)
- %tmp43 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 176)
- %tmp44 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 180)
- %tmp45 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 184)
- %tmp46 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 192)
- %tmp47 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 196)
- %tmp48 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 200)
- %tmp49 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 208)
- %tmp50 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 212)
- %tmp51 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 216)
- %tmp52 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 220)
- %tmp53 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 236)
- %tmp54 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 240)
- %tmp55 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 244)
- %tmp56 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 248)
- %tmp57 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 252)
- %tmp58 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 256)
- %tmp59 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 260)
- %tmp60 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 264)
- %tmp61 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 268)
- %tmp62 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 272)
- %tmp63 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 276)
- %tmp64 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 280)
- %tmp65 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 284)
- %tmp66 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 288)
- %tmp67 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 292)
- %tmp68 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 464)
- %tmp69 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 468)
- %tmp70 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 472)
- %tmp71 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 496)
- %tmp72 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 500)
- %tmp73 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 504)
- %tmp74 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 512)
- %tmp75 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 516)
- %tmp76 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 524)
- %tmp77 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 532)
- %tmp78 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 536)
- %tmp79 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 540)
- %tmp80 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 544)
- %tmp81 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 548)
- %tmp82 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 552)
- %tmp83 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 556)
- %tmp84 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 560)
- %tmp85 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 564)
- %tmp86 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 568)
- %tmp87 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 572)
- %tmp88 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 576)
- %tmp89 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 580)
- %tmp90 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 584)
- %tmp91 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 588)
- %tmp92 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 592)
- %tmp93 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 596)
- %tmp94 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 600)
- %tmp95 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 604)
- %tmp96 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 608)
- %tmp97 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 612)
- %tmp98 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 616)
- %tmp99 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 624)
- %tmp100 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 628)
- %tmp101 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 632)
- %tmp102 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 636)
- %tmp103 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 640)
- %tmp104 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 644)
- %tmp105 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 648)
- %tmp106 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 652)
- %tmp107 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 656)
- %tmp108 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 660)
- %tmp109 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 664)
- %tmp110 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 668)
- %tmp111 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 672)
- %tmp112 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 676)
- %tmp113 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 680)
- %tmp114 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 684)
- %tmp115 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 688)
- %tmp116 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 692)
- %tmp117 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 696)
- %tmp118 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 700)
- %tmp119 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 704)
- %tmp120 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 708)
- %tmp121 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 712)
- %tmp122 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 716)
- %tmp123 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 864)
- %tmp124 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 868)
+ %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
+ %tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 0)
+ %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 4)
+ %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 8)
+ %tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 12)
+ %tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 28)
+ %tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 48)
+ %tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 52)
+ %tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 56)
+ %tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 64)
+ %tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 68)
+ %tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 72)
+ %tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 76)
+ %tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 128)
+ %tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 132)
+ %tmp36 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 144)
+ %tmp37 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 148)
+ %tmp38 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 152)
+ %tmp39 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 160)
+ %tmp40 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 164)
+ %tmp41 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 168)
+ %tmp42 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 172)
+ %tmp43 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 176)
+ %tmp44 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 180)
+ %tmp45 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 184)
+ %tmp46 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 192)
+ %tmp47 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 196)
+ %tmp48 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 200)
+ %tmp49 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 208)
+ %tmp50 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 212)
+ %tmp51 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 216)
+ %tmp52 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 220)
+ %tmp53 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 236)
+ %tmp54 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 240)
+ %tmp55 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 244)
+ %tmp56 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 248)
+ %tmp57 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 252)
+ %tmp58 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 256)
+ %tmp59 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 260)
+ %tmp60 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 264)
+ %tmp61 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 268)
+ %tmp62 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 272)
+ %tmp63 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 276)
+ %tmp64 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 280)
+ %tmp65 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 284)
+ %tmp66 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 288)
+ %tmp67 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 292)
+ %tmp68 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 464)
+ %tmp69 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 468)
+ %tmp70 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 472)
+ %tmp71 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 496)
+ %tmp72 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 500)
+ %tmp73 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 504)
+ %tmp74 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 512)
+ %tmp75 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 516)
+ %tmp76 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 524)
+ %tmp77 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 532)
+ %tmp78 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 536)
+ %tmp79 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 540)
+ %tmp80 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 544)
+ %tmp81 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 548)
+ %tmp82 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 552)
+ %tmp83 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 556)
+ %tmp84 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 560)
+ %tmp85 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 564)
+ %tmp86 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 568)
+ %tmp87 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 572)
+ %tmp88 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 576)
+ %tmp89 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 580)
+ %tmp90 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 584)
+ %tmp91 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 588)
+ %tmp92 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 592)
+ %tmp93 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 596)
+ %tmp94 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 600)
+ %tmp95 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 604)
+ %tmp96 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 608)
+ %tmp97 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 612)
+ %tmp98 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 616)
+ %tmp99 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 624)
+ %tmp100 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 628)
+ %tmp101 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 632)
+ %tmp102 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 636)
+ %tmp103 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 640)
+ %tmp104 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 644)
+ %tmp105 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 648)
+ %tmp106 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 652)
+ %tmp107 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 656)
+ %tmp108 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 660)
+ %tmp109 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 664)
+ %tmp110 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 668)
+ %tmp111 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 672)
+ %tmp112 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 676)
+ %tmp113 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 680)
+ %tmp114 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 684)
+ %tmp115 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 688)
+ %tmp116 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 692)
+ %tmp117 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 696)
+ %tmp118 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 700)
+ %tmp119 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 704)
+ %tmp120 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 708)
+ %tmp121 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 712)
+ %tmp122 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 716)
+ %tmp123 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 864)
+ %tmp124 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 868)
%tmp125 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
%tmp126 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp125, !tbaa !0
- %tmp127 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
- %tmp128 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp127, !tbaa !0
+ %tmp127 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
+ %tmp128 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp127, !tbaa !0
%tmp129 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
%tmp130 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp129, !tbaa !0
- %tmp131 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 1
- %tmp132 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp131, !tbaa !0
+ %tmp131 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 1
+ %tmp132 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp131, !tbaa !0
%tmp133 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
%tmp134 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp133, !tbaa !0
- %tmp135 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 2
- %tmp136 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp135, !tbaa !0
+ %tmp135 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 2
+ %tmp136 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp135, !tbaa !0
%tmp137 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
%tmp138 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp137, !tbaa !0
- %tmp139 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 3
- %tmp140 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp139, !tbaa !0
+ %tmp139 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 3
+ %tmp140 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp139, !tbaa !0
%tmp141 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
%tmp142 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp141, !tbaa !0
- %tmp143 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 4
- %tmp144 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp143, !tbaa !0
+ %tmp143 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 4
+ %tmp144 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp143, !tbaa !0
%tmp145 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
%tmp146 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp145, !tbaa !0
- %tmp147 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 5
- %tmp148 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp147, !tbaa !0
+ %tmp147 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 5
+ %tmp148 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp147, !tbaa !0
%tmp149 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
%tmp150 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp149, !tbaa !0
- %tmp151 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 6
- %tmp152 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp151, !tbaa !0
+ %tmp151 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 6
+ %tmp152 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp151, !tbaa !0
%tmp153 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
%tmp154 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp153, !tbaa !0
- %tmp155 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 7
- %tmp156 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp155, !tbaa !0
+ %tmp155 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 7
+ %tmp156 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp155, !tbaa !0
%tmp157 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 8
%tmp158 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp157, !tbaa !0
- %tmp159 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 8
- %tmp160 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp159, !tbaa !0
+ %tmp159 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 8
+ %tmp160 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp159, !tbaa !0
%tmp161 = fcmp ugt float %arg17, 0.000000e+00
%tmp162 = select i1 %tmp161, float 1.000000e+00, float 0.000000e+00
%i.i = extractelement <2 x i32> %arg6, i32 0
@@ -1144,7 +1144,7 @@ main_body:
%tmp222 = bitcast float %p2.i126 to i32
%tmp223 = insertelement <2 x i32> undef, i32 %tmp221, i32 0
%tmp224 = insertelement <2 x i32> %tmp223, i32 %tmp222, i32 1
- %tmp132.bc = bitcast <16 x i8> %tmp132 to <4 x i32>
+ %tmp132.bc = bitcast <4 x i32> %tmp132 to <4 x i32>
%tmp224.bc = bitcast <2 x i32> %tmp224 to <2 x float>
%tmp225 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp224.bc, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp226 = extractelement <4 x float> %tmp225, i32 0
@@ -1218,7 +1218,7 @@ LOOP: ; preds = %LOOP, %main_body
%tmp279 = insertelement <4 x i32> %tmp278, i32 %tmp277, i32 1
%tmp280 = insertelement <4 x i32> %tmp279, i32 0, i32 2
%tmp281 = insertelement <4 x i32> %tmp280, i32 undef, i32 3
- %tmp148.bc = bitcast <16 x i8> %tmp148 to <4 x i32>
+ %tmp148.bc = bitcast <4 x i32> %tmp148 to <4 x i32>
%tmp281.bc = bitcast <4 x i32> %tmp281 to <4 x float>
%tmp282 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp281.bc, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp283 = extractelement <4 x float> %tmp282, i32 3
@@ -1283,7 +1283,7 @@ IF189: ; preds = %LOOP
%tmp339 = bitcast float %tmp335 to i32
%tmp340 = insertelement <2 x i32> undef, i32 %tmp338, i32 0
%tmp341 = insertelement <2 x i32> %tmp340, i32 %tmp339, i32 1
- %tmp136.bc = bitcast <16 x i8> %tmp136 to <4 x i32>
+ %tmp136.bc = bitcast <4 x i32> %tmp136 to <4 x i32>
%a.bc.i = bitcast <2 x i32> %tmp341 to <2 x float>
%tmp0 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp343 = extractelement <4 x float> %tmp0, i32 0
@@ -1317,7 +1317,7 @@ IF189: ; preds = %LOOP
%tmp359 = bitcast float %tmp337 to i32
%tmp360 = insertelement <2 x i32> undef, i32 %tmp358, i32 0
%tmp361 = insertelement <2 x i32> %tmp360, i32 %tmp359, i32 1
- %tmp152.bc = bitcast <16 x i8> %tmp152 to <4 x i32>
+ %tmp152.bc = bitcast <4 x i32> %tmp152 to <4 x i32>
%a.bc.i3 = bitcast <2 x i32> %tmp361 to <2 x float>
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i3, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp363 = extractelement <4 x float> %tmp1, i32 2
@@ -1329,7 +1329,7 @@ IF189: ; preds = %LOOP
%tmp369 = bitcast float %tmp311 to i32
%tmp370 = insertelement <2 x i32> undef, i32 %tmp368, i32 0
%tmp371 = insertelement <2 x i32> %tmp370, i32 %tmp369, i32 1
- %tmp140.bc = bitcast <16 x i8> %tmp140 to <4 x i32>
+ %tmp140.bc = bitcast <4 x i32> %tmp140 to <4 x i32>
%a.bc.i2 = bitcast <2 x i32> %tmp371 to <2 x float>
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i2, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp373 = extractelement <4 x float> %tmp2, i32 0
@@ -1347,7 +1347,7 @@ IF189: ; preds = %LOOP
%tmp383 = bitcast float %tmp321 to i32
%tmp384 = insertelement <2 x i32> undef, i32 %tmp382, i32 0
%tmp385 = insertelement <2 x i32> %tmp384, i32 %tmp383, i32 1
- %tmp144.bc = bitcast <16 x i8> %tmp144 to <4 x i32>
+ %tmp144.bc = bitcast <4 x i32> %tmp144 to <4 x i32>
%a.bc.i1 = bitcast <2 x i32> %tmp385 to <2 x float>
%tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp387 = extractelement <4 x float> %tmp3, i32 0
@@ -1446,7 +1446,7 @@ ENDIF197: ; preds = %IF198, %IF189
%tmp467 = bitcast float %tmp220 to i32
%tmp468 = insertelement <2 x i32> undef, i32 %tmp466, i32 0
%tmp469 = insertelement <2 x i32> %tmp468, i32 %tmp467, i32 1
- %tmp160.bc = bitcast <16 x i8> %tmp160 to <4 x i32>
+ %tmp160.bc = bitcast <4 x i32> %tmp160 to <4 x i32>
%tmp469.bc = bitcast <2 x i32> %tmp469 to <2 x float>
%tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp469.bc, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp471 = extractelement <4 x float> %tmp470, i32 0
@@ -1465,7 +1465,7 @@ ENDIF197: ; preds = %IF198, %IF189
%tmp484 = bitcast float %p2.i138 to i32
%tmp485 = insertelement <2 x i32> undef, i32 %tmp483, i32 0
%tmp486 = insertelement <2 x i32> %tmp485, i32 %tmp484, i32 1
- %tmp156.bc = bitcast <16 x i8> %tmp156 to <4 x i32>
+ %tmp156.bc = bitcast <4 x i32> %tmp156 to <4 x i32>
%tmp486.bc = bitcast <2 x i32> %tmp486 to <2 x float>
%tmp487 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp486.bc, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp488 = extractelement <4 x float> %tmp487, i32 0
@@ -1674,7 +1674,7 @@ ENDIF209: ; preds = %ELSE214, %ELSE211,
%tmp657 = insertelement <4 x i32> %tmp656, i32 %tmp654, i32 1
%tmp658 = insertelement <4 x i32> %tmp657, i32 %tmp655, i32 2
%tmp659 = insertelement <4 x i32> %tmp658, i32 undef, i32 3
- %tmp128.bc = bitcast <16 x i8> %tmp128 to <4 x i32>
+ %tmp128.bc = bitcast <4 x i32> %tmp128 to <4 x i32>
%tmp659.bc = bitcast <4 x i32> %tmp659 to <4 x float>
%tmp660 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp659.bc, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp661 = extractelement <4 x float> %tmp660, i32 0
@@ -1869,7 +1869,7 @@ declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/si-spill-cf.ll b/test/CodeGen/AMDGPU/si-spill-cf.ll
index 926702645d9e..2a8ced59ddef 100644
--- a/test/CodeGen/AMDGPU/si-spill-cf.ll
+++ b/test/CodeGen/AMDGPU/si-spill-cf.ll
@@ -9,73 +9,73 @@
define amdgpu_ps void @main() #0 {
main_body:
- %tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
- %tmp1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
- %tmp2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
- %tmp3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
- %tmp4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
- %tmp5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
- %tmp6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
- %tmp7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
- %tmp8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
- %tmp9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
- %tmp10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
- %tmp11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
- %tmp12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
- %tmp13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
- %tmp14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
- %tmp15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
- %tmp16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
- %tmp17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
- %tmp18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
- %tmp19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
- %tmp20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
- %tmp22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
- %tmp23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
- %tmp24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
- %tmp25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
- %tmp26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
- %tmp27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
- %tmp28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
- %tmp29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
- %tmp30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
- %tmp31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
- %tmp32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
- %tmp33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
- %tmp34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
- %tmp35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
- %tmp36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
- %tmp37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
- %tmp38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
- %tmp39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
- %tmp40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
- %tmp41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
- %tmp42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
- %tmp43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
- %tmp44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
- %tmp45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
- %tmp46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
- %tmp47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
- %tmp48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
- %tmp49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
- %tmp50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
- %tmp51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
- %tmp52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
- %tmp53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
- %tmp54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
- %tmp55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
- %tmp56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
- %tmp57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
- %tmp58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
- %tmp59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
- %tmp60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
- %tmp61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
- %tmp62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
- %tmp63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
- %tmp64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
- %tmp65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
- %tmp66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
+ %tmp = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 16)
+ %tmp1 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 32)
+ %tmp2 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 80)
+ %tmp3 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 84)
+ %tmp4 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 88)
+ %tmp5 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 96)
+ %tmp6 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 100)
+ %tmp7 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 104)
+ %tmp8 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 112)
+ %tmp9 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 116)
+ %tmp10 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 120)
+ %tmp11 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 128)
+ %tmp12 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 132)
+ %tmp13 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 136)
+ %tmp14 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 144)
+ %tmp15 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 148)
+ %tmp16 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 152)
+ %tmp17 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 160)
+ %tmp18 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 164)
+ %tmp19 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 168)
+ %tmp20 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 176)
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 180)
+ %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 184)
+ %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 192)
+ %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 196)
+ %tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 200)
+ %tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 208)
+ %tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 212)
+ %tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 216)
+ %tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 224)
+ %tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 228)
+ %tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 232)
+ %tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 240)
+ %tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 244)
+ %tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 248)
+ %tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 256)
+ %tmp36 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 260)
+ %tmp37 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 264)
+ %tmp38 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 272)
+ %tmp39 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 276)
+ %tmp40 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 280)
+ %tmp41 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 288)
+ %tmp42 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 292)
+ %tmp43 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 296)
+ %tmp44 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 304)
+ %tmp45 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 308)
+ %tmp46 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 312)
+ %tmp47 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 320)
+ %tmp48 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 324)
+ %tmp49 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 328)
+ %tmp50 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 336)
+ %tmp51 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 340)
+ %tmp52 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 344)
+ %tmp53 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 352)
+ %tmp54 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 356)
+ %tmp55 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 360)
+ %tmp56 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 368)
+ %tmp57 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 372)
+ %tmp58 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 376)
+ %tmp59 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 384)
+ %tmp60 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 388)
+ %tmp61 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 392)
+ %tmp62 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 400)
+ %tmp63 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 404)
+ %tmp64 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 408)
+ %tmp65 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 416)
+ %tmp66 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 420)
br label %LOOP
LOOP: ; preds = %ENDIF2795, %main_body
@@ -497,7 +497,7 @@ declare float @llvm.minnum.f32(float, float) #1
declare float @llvm.maxnum.f32(float, float) #1
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll
index 50f72c670598..3f1e1cacb879 100644
--- a/test/CodeGen/AMDGPU/smrd.ll
+++ b/test/CodeGen/AMDGPU/smrd.ll
@@ -84,34 +84,34 @@ entry:
ret void
}
-; SMRD load using the load.const intrinsic with an immediate offset
+; SMRD load using the load.const.v4i32 intrinsic with an immediate offset
; GCN-LABEL: {{^}}smrd_load_const0:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
-define amdgpu_ps void @smrd_load_const0(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @smrd_load_const0(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
- %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
-; SMRD load using the load.const intrinsic with the largest possible immediate
+; SMRD load using the load.const.v4i32 intrinsic with the largest possible immediate
; offset.
; GCN-LABEL: {{^}}smrd_load_const1:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
-define amdgpu_ps void @smrd_load_const1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @smrd_load_const1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
- %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1020)
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1020)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
-; SMRD load using the load.const intrinsic with an offset greater than the
+; SMRD load using the load.const.v4i32 intrinsic with an offset greater than the
; largets possible immediate.
; immediate offset.
; GCN-LABEL: {{^}}smrd_load_const2:
@@ -119,11 +119,11 @@ main_body:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
-define amdgpu_ps void @smrd_load_const2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @smrd_load_const2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
- %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1024)
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1024)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
@@ -134,11 +134,11 @@ main_body:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
-define amdgpu_ps void @smrd_load_const3(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @smrd_load_const3(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
- %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1048572)
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1048572)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
@@ -149,17 +149,17 @@ main_body:
; SIVI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
-define amdgpu_ps void @smrd_load_const4(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @smrd_load_const4(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
- %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1048576)
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1048576)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/spill-to-smem-m0.ll b/test/CodeGen/AMDGPU/spill-to-smem-m0.ll
new file mode 100644
index 000000000000..c6691e7bb2f8
--- /dev/null
+++ b/test/CodeGen/AMDGPU/spill-to-smem-m0.ll
@@ -0,0 +1,22 @@
+; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs -stop-before=prologepilog < %s
+
+; Spill to SMEM clobbers M0. Check that the implicit-def dead operand is present
+; in the pseudo instructions.
+
+; CHECK-LABEL: {{^}}spill_sgpr:
+; CHECK: SI_SPILL_S32_SAVE {{.*}}, implicit-def dead %m0
+; CHECK: SI_SPILL_S32_RESTORE {{.*}}, implicit-def dead %m0
+define amdgpu_kernel void @spill_sgpr(i32 addrspace(1)* %out, i32 %in) #0 {
+ %sgpr = call i32 asm sideeffect "; def $0", "=s" () #0
+ %cmp = icmp eq i32 %in, 0
+ br i1 %cmp, label %bb0, label %ret
+
+bb0:
+ call void asm sideeffect "; use $0", "s"(i32 %sgpr) #0
+ br label %ret
+
+ret:
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/split-smrd.ll b/test/CodeGen/AMDGPU/split-smrd.ll
index cdb1b1e3b503..5fc69067760a 100644
--- a/test/CodeGen/AMDGPU/split-smrd.ll
+++ b/test/CodeGen/AMDGPU/split-smrd.ll
@@ -8,7 +8,7 @@
; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
bb:
- %tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
+ %tmp = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 96)
%tmp1 = bitcast float %tmp to i32
br i1 undef, label %bb2, label %bb3
@@ -31,7 +31,7 @@ bb3: ; preds = %bb
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
index c9c8583d5e87..ca2366a361fb 100644
--- a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -27,17 +27,17 @@
; GCN: NumVgprs: 256
; GCN: ScratchSize: 1536
-define amdgpu_vs void @main([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
+define amdgpu_vs void @main([9 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <4 x i32>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
bb:
- %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i64 0, i64 0
- %tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
- %tmp12 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 0)
- %tmp13 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 16)
- %tmp14 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 32)
- %tmp15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 0
- %tmp16 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp15, align 16, !tbaa !0
+ %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i64 0, i64 0
+ %tmp11 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, align 16, !tbaa !0
+ %tmp12 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 0)
+ %tmp13 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 16)
+ %tmp14 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 32)
+ %tmp15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg4, i64 0, i64 0
+ %tmp16 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp15, align 16, !tbaa !0
%tmp17 = add i32 %arg5, %arg7
- %tmp16.cast = bitcast <16 x i8> %tmp16 to <4 x i32>
+ %tmp16.cast = bitcast <4 x i32> %tmp16 to <4 x i32>
%tmp18 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp16.cast, i32 %tmp17, i32 0, i1 false, i1 false)
%tmp19 = extractelement <4 x float> %tmp18, i32 0
%tmp20 = extractelement <4 x float> %tmp18, i32 1
@@ -488,7 +488,7 @@ bb157: ; preds = %bb24
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #2
attributes #0 = { nounwind }
diff --git a/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll
index ff3b7e16188e..fefe16747f10 100644
--- a/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll
+++ b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll
@@ -24,7 +24,7 @@ entry:
; CHECK-LABEL: caller:
define void @caller() {
-; CHECK: ldm r0, {r1, r2, r3}
+; CHECK: ldm r{{[0-9]+}}, {r1, r2, r3}
call void @t(i32 0, %struct.s* @v);
ret void
}
diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
index 16642d85d9cf..6a1da0dfe85f 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
@@ -42,6 +42,9 @@
define void @test_constant_imm() { ret void }
define void @test_constant_cimm() { ret void }
+ define void @test_select_s32() { ret void }
+ define void @test_select_ptr() { ret void }
+
define void @test_soft_fp_double() #0 { ret void }
attributes #0 = { "target-features"="+vfp2,-neonfp" }
@@ -1100,6 +1103,76 @@ body: |
BX_RET 14, _, implicit %r0
...
---
+name: test_select_s32
+# CHECK-LABEL: name: test_select_s32
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+ - { id: 3, class: gprb }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2
+
+ %0(s32) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s32) = COPY %r1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+ %2(s1) = COPY %r2
+ ; CHECK: [[VREGC:%[0-9]+]] = COPY %r2
+
+ %3(s32) = G_SELECT %2(s1), %0, %1
+ ; CHECK: CMPri [[VREGC]], 0, 14, _, implicit-def %cpsr
+ ; CHECK: [[RES:%[0-9]+]] = MOVCCr [[VREGX]], [[VREGY]], 0, %cpsr
+
+ %r0 = COPY %3(s32)
+ ; CHECK: %r0 = COPY [[RES]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_select_ptr
+# CHECK-LABEL: name: test_select_ptr
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+ - { id: 3, class: gprb }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2
+
+ %0(p0) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(p0) = COPY %r1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+ %2(s1) = COPY %r2
+ ; CHECK: [[VREGC:%[0-9]+]] = COPY %r2
+
+ %3(p0) = G_SELECT %2(s1), %0, %1
+ ; CHECK: CMPri [[VREGC]], 0, 14, _, implicit-def %cpsr
+ ; CHECK: [[RES:%[0-9]+]] = MOVCCr [[VREGX]], [[VREGY]], 0, %cpsr
+
+ %r0 = COPY %3(p0)
+ ; CHECK: %r0 = COPY [[RES]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
name: test_soft_fp_double
# CHECK-LABEL: name: test_soft_fp_double
legalized: true
diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
index 0ff8d52e94c6..f50916e4b474 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@@ -910,7 +910,7 @@ define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x) {
define i32 @test_shufflevector_s32_v2s32(i32 %arg) {
; CHECK-LABEL: name: test_shufflevector_s32_v2s32
; CHECK: [[ARG:%[0-9]+]](s32) = COPY %r0
-; CHECK-DAG: [[UNDEF:%[0-9]+]](s32) = IMPLICIT_DEF
+; CHECK-DAG: [[UNDEF:%[0-9]+]](s32) = G_IMPLICIT_DEF
; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0
; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32)
; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], [[MASK]](<2 x s32>)
@@ -925,7 +925,7 @@ define i32 @test_shufflevector_v2s32_v3s32(i32 %arg1, i32 %arg2) {
; CHECK-LABEL: name: test_shufflevector_v2s32_v3s32
; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0
; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1
-; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = IMPLICIT_DEF
+; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = G_IMPLICIT_DEF
; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0
; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1
; CHECK-DAG: [[MASK:%[0-9]+]](<3 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32), [[C1]](s32)
@@ -945,7 +945,7 @@ define i32 @test_shufflevector_v2s32_v4s32(i32 %arg1, i32 %arg2) {
; CHECK-LABEL: name: test_shufflevector_v2s32_v4s32
; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0
; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1
-; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = IMPLICIT_DEF
+; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = G_IMPLICIT_DEF
; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0
; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1
; CHECK-DAG: [[MASK:%[0-9]+]](<4 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32), [[C0]](s32), [[C0]](s32)
@@ -966,7 +966,7 @@ define i32 @test_shufflevector_v4s32_v2s32(i32 %arg1, i32 %arg2, i32 %arg3, i32
; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1
; CHECK: [[ARG3:%[0-9]+]](s32) = COPY %r2
; CHECK: [[ARG4:%[0-9]+]](s32) = COPY %r3
-; CHECK-DAG: [[UNDEF:%[0-9]+]](<4 x s32>) = IMPLICIT_DEF
+; CHECK-DAG: [[UNDEF:%[0-9]+]](<4 x s32>) = G_IMPLICIT_DEF
; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0
; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1
; CHECK-DAG: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2
@@ -1009,7 +1009,7 @@ define i32 @test_constantstruct_v2s32_s32_s32() {
; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32)
; CHECK: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3
; CHECK: [[C4:%[0-9]+]](s32) = G_CONSTANT i32 4
-; CHECK: [[C5:%[0-9]+]](s128) = IMPLICIT_DEF
+; CHECK: [[C5:%[0-9]+]](s128) = G_IMPLICIT_DEF
; CHECK: [[C6:%[0-9]+]](s128) = G_INSERT [[C5]], [[VEC]](<2 x s32>), 0
; CHECK: [[C7:%[0-9]+]](s128) = G_INSERT [[C6]], [[C3]](s32), 64
; CHECK: [[C8:%[0-9]+]](s128) = G_INSERT [[C7]], [[C4]](s32), 96
diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
index 76fb39ecea01..4c498ff6ca9b 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
@@ -400,3 +400,23 @@ entry:
%r = zext i1 %v to i32
ret i32 %r
}
+
+define arm_aapcscc i32 @test_select_i32(i32 %a, i32 %b, i1 %cond) {
+; CHECK-LABEL: test_select_i32
+; CHECK: cmp r2, #0
+; CHECK: moveq r0, r1
+; CHECK: bx lr
+entry:
+ %r = select i1 %cond, i32 %a, i32 %b
+ ret i32 %r
+}
+
+define arm_aapcscc i32* @test_select_ptr(i32* %a, i32* %b, i1 %cond) {
+; CHECK-LABEL: test_select_ptr
+; CHECK: cmp r2, #0
+; CHECK: moveq r0, r1
+; CHECK: bx lr
+entry:
+ %r = select i1 %cond, i32* %a, i32* %b
+ ret i32* %r
+}
diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
index 2def31eb1592..bf759728c365 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
@@ -39,6 +39,9 @@
define void @test_icmp_s16() { ret void }
define void @test_icmp_s32() { ret void }
+ define void @test_select_s32() { ret void }
+ define void @test_select_ptr() { ret void }
+
define void @test_fadd_s32() #0 { ret void }
define void @test_fadd_s64() #0 { ret void }
@@ -775,6 +778,58 @@ body: |
BX_RET 14, _, implicit %r0
...
---
+name: test_select_s32
+# CHECK-LABEL: name: test_select_s32
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s1) = COPY %r2
+ %3(s32) = G_SELECT %2(s1), %0, %1
+ ; G_SELECT with s32 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s32) = G_SELECT {{%[0-9]+}}(s1), {{%[0-9]+}}, {{%[0-9]+}}
+ %r0 = COPY %3(s32)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_select_ptr
+# CHECK-LABEL: name: test_select_ptr
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2
+
+ %0(p0) = COPY %r0
+ %1(p0) = COPY %r1
+ %2(s1) = COPY %r2
+ %3(p0) = G_SELECT %2(s1), %0, %1
+ ; G_SELECT with p0 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(p0) = G_SELECT {{%[0-9]+}}(s1), {{%[0-9]+}}, {{%[0-9]+}}
+ %r0 = COPY %3(p0)
+ BX_RET 14, _, implicit %r0
+...
+---
name: test_fadd_s32
# CHECK-LABEL: name: test_fadd_s32
legalized: false
diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
index d97dd60bac22..d3b93e488ef4 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
@@ -36,6 +36,8 @@
define void @test_icmp_eq_s32() { ret void }
+ define void @test_select_s32() { ret void }
+
define void @test_fadd_s32() #0 { ret void }
define void @test_fadd_s64() #0 { ret void }
@@ -741,6 +743,35 @@ body: |
...
---
+name: test_select_s32
+# CHECK-LABEL: name: test_select_s32
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s1) = COPY %r2
+ %3(s32) = G_SELECT %2(s1), %0, %1
+ %r0 = COPY %3(s32)
+ BX_RET 14, _, implicit %r0
+
+...
+---
name: test_fadd_s32
# CHECK-LABEL: name: test_fadd_s32
legalized: true
diff --git a/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll b/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
index d303e9da8604..a73a7cf8414f 100644
--- a/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
+++ b/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
@@ -19,9 +19,9 @@ entry:
; CHECK-LABEL: isel
; CHECK: push {r4, r5}
-; CHECK: movw r4, #{{\d*}}
; CHECK: movw r12, #0
; CHECK: movt r12, #0
+; CHECK: movw r4, #{{\d*}}
; CHECK: blx r12
; CHECK: sub.w sp, sp, r4
diff --git a/test/CodeGen/ARM/Windows/no-arm-mode.ll b/test/CodeGen/ARM/Windows/no-arm-mode.ll
deleted file mode 100644
index 30353640a4cc..000000000000
--- a/test/CodeGen/ARM/Windows/no-arm-mode.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not llc -mtriple=armv7-windows-itanium -mcpu=cortex-a9 -o /dev/null %s 2>&1 \
-; RUN: | FileCheck %s -check-prefix CHECK-WIN
-
-; RUN: not llc -mtriple=armv7-windows-gnu -mcpu=cortex-a9 -o /dev/null %s 2>&1 \
-; RUN: | FileCheck %s -check-prefix CHECK-GNU
-
-; CHECK-WIN: does not support ARM mode execution
-
-; CHECK-GNU: does not support ARM mode execution
-
diff --git a/test/CodeGen/ARM/Windows/tls.ll b/test/CodeGen/ARM/Windows/tls.ll
index 947e29dfa65c..2c38ad3e58f7 100644
--- a/test/CodeGen/ARM/Windows/tls.ll
+++ b/test/CodeGen/ARM/Windows/tls.ll
@@ -15,11 +15,11 @@ define i32 @f() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
+; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
@@ -36,11 +36,11 @@ define i32 @e() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
+; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
@@ -57,11 +57,11 @@ define i32 @d() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
+; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
@@ -78,11 +78,11 @@ define i32 @c() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
+; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
@@ -99,11 +99,11 @@ define i32 @b() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
+; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
@@ -120,11 +120,11 @@ define i16 @a() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
+; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
@@ -141,11 +141,11 @@ define i8 @Z() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
+; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
diff --git a/test/CodeGen/ARM/alloca.ll b/test/CodeGen/ARM/alloca.ll
index 4a0835a2c0ca..82b6b11ea4b2 100644
--- a/test/CodeGen/ARM/alloca.ll
+++ b/test/CodeGen/ARM/alloca.ll
@@ -2,11 +2,11 @@
define void @f(i32 %a) {
entry:
-; CHECK: add r11, sp, #4
+; CHECK: add r11, sp, #8
%tmp = alloca i8, i32 %a ; <i8*> [#uses=1]
call void @g( i8* %tmp, i32 %a, i32 1, i32 2, i32 3 )
ret void
-; CHECK: sub sp, r11, #4
+; CHECK: sub sp, r11, #8
}
declare void @g(i8*, i32, i32, i32, i32)
diff --git a/test/CodeGen/ARM/arg-copy-elide.ll b/test/CodeGen/ARM/arg-copy-elide.ll
index 739b560b0833..625b57073406 100644
--- a/test/CodeGen/ARM/arg-copy-elide.ll
+++ b/test/CodeGen/ARM/arg-copy-elide.ll
@@ -31,8 +31,8 @@ entry:
; CHECK-LABEL: use_arg:
; CHECK: push {[[csr:[^ ]*]], lr}
-; CHECK: ldr [[csr]], [sp, #8]
; CHECK: add r0, sp, #8
+; CHECK: ldr [[csr]], [sp, #8]
; CHECK: bl addrof_i32
; CHECK: mov r0, [[csr]]
; CHECK: pop {[[csr]], pc}
@@ -50,8 +50,8 @@ entry:
; CHECK: push {r4, r5, r11, lr}
; CHECK: sub sp, sp, #8
; CHECK: ldr r4, [sp, #28]
-; CHECK: ldr r5, [sp, #24]
; CHECK: mov r0, sp
+; CHECK: ldr r5, [sp, #24]
; CHECK: str r4, [sp, #4]
; CHECK: str r5, [sp]
; CHECK: bl addrof_i64
diff --git a/test/CodeGen/ARM/arm-abi-attr.ll b/test/CodeGen/ARM/arm-abi-attr.ll
index 61cb6cefa170..f05e6e788d6f 100644
--- a/test/CodeGen/ARM/arm-abi-attr.ll
+++ b/test/CodeGen/ARM/arm-abi-attr.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-linux-gnu < %s | FileCheck %s --check-prefix=APCS
+; RUN: llc -mtriple=arm-linux-gnu < %s | FileCheck %s --check-prefix=AAPCS
; RUN: llc -mtriple=arm-linux-gnu -target-abi=apcs < %s | \
; RUN: FileCheck %s --check-prefix=APCS
; RUN: llc -mtriple=arm-linux-gnueabi -target-abi=apcs < %s | \
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 31691e9468c9..af05392c98a5 100644
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -171,8 +171,8 @@ define i32 @test_tst_assessment(i32 %a, i32 %b) {
;
; V8-LABEL: test_tst_assessment:
; V8: @ BB#0:
-; V8-NEXT: lsls r1, r1, #31
; V8-NEXT: and r0, r0, #1
+; V8-NEXT: lsls r1, r1, #31
; V8-NEXT: it ne
; V8-NEXT: subne r0, #1
; V8-NEXT: bx lr
diff --git a/test/CodeGen/ARM/arm-position-independence-jump-table.ll b/test/CodeGen/ARM/arm-position-independence-jump-table.ll
index 790b4f41776e..afc2d38be18c 100644
--- a/test/CodeGen/ARM/arm-position-independence-jump-table.ll
+++ b/test/CodeGen/ARM/arm-position-independence-jump-table.ll
@@ -47,8 +47,8 @@ lab4:
; CHECK-LABEL: jump_table:
-; ARM: lsl r[[R_TAB_IDX:[0-9]+]], r{{[0-9]+}}, #2
; ARM: adr r[[R_TAB_BASE:[0-9]+]], [[LJTI:\.LJTI[0-9]+_[0-9]+]]
+; ARM: lsl r[[R_TAB_IDX:[0-9]+]], r{{[0-9]+}}, #2
; ARM_ABS: ldr pc, [r[[R_TAB_IDX]], r[[R_TAB_BASE]]]
; ARM_PC: ldr r[[R_OFFSET:[0-9]+]], [r[[R_TAB_IDX]], r[[R_TAB_BASE]]]
; ARM_PC: add pc, r[[R_OFFSET]], r[[R_TAB_BASE]]
diff --git a/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll b/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
index 1434f40137b5..7007018dd0b2 100644
--- a/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
+++ b/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
@@ -45,15 +45,19 @@ target triple = "armv7--linux-gnueabi"
; CHECK: @ %while.cond2
; CHECK: add
; CHECK-NEXT: cmp r{{[0-1]+}}, #1
-; Set the return value.
-; CHECK-NEXT: moveq r0,
-; CHECK-NEXT: popeq
+; Jump to the return block
+; CHECK-NEXT: beq [[RETURN_BLOCK:[.a-zA-Z0-9_]+]]
;
; Use the back edge to check we get the label of the loop right.
; This is to make sure we check the right loop pattern.
; CHECK: @ %while.body24.land.rhs14_crit_edge
; CHECK: cmp r{{[0-9]+}}, #192
; CHECK-NEXT bhs [[LOOP_HEADER]]
+;
+; CHECK: [[RETURN_BLOCK]]:
+; Set the return value.
+; CHECK-NEXT: mov r0,
+; CHECK-NEXT: pop
define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnone %lim) {
entry:
%cmp = icmp sgt i32 %off, -1
diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll
index e026bae361e1..a136e44fc196 100644
--- a/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -70,8 +70,8 @@ entry:
; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0]
; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1
; CHECK-ARMV7-NEXT: beq [[HEAD]]
-; CHECK-ARMV7-NEXT: clrex
; CHECK-ARMV7-NEXT: mov r0, #0
+; CHECK-ARMV7-NEXT: clrex
; CHECK-ARMV7-NEXT: bx lr
; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
@@ -88,6 +88,6 @@ entry:
; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]]
; CHECK-THUMBV7-NEXT: beq [[TRYST:.LBB[0-9_]+]]
-; CHECK-THUMBV7-NEXT: clrex
; CHECK-THUMBV7-NEXT: movs r0, #0
+; CHECK-THUMBV7-NEXT: clrex
; CHECK-THUMBV7-NEXT: bx lr
diff --git a/test/CodeGen/ARM/bool-ext-inc.ll b/test/CodeGen/ARM/bool-ext-inc.ll
index 5f2ba8b109a7..ca9c9ab079db 100644
--- a/test/CodeGen/ARM/bool-ext-inc.ll
+++ b/test/CodeGen/ARM/bool-ext-inc.ll
@@ -16,8 +16,8 @@ define <4 x i32> @sext_inc_vec(<4 x i1> %x) {
; CHECK: @ BB#0:
; CHECK-NEXT: vmov.i16 d16, #0x1
; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vmov.i32 q9, #0x1
; CHECK-NEXT: veor d16, d17, d16
+; CHECK-NEXT: vmov.i32 q9, #0x1
; CHECK-NEXT: vmovl.u16 q8, d16
; CHECK-NEXT: vand q8, q8, q9
; CHECK-NEXT: vmov r0, r1, d16
@@ -31,13 +31,13 @@ define <4 x i32> @sext_inc_vec(<4 x i1> %x) {
define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: cmpgt_sext_inc_vec:
; CHECK: @ BB#0:
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vmov d19, r2, r3
-; CHECK-NEXT: vmov.i32 q10, #0x1
-; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
-; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vcge.s32 q8, q8, q9
-; CHECK-NEXT: vand q8, q8, q10
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: mov r0, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vcge.s32 q8, q9, q8
+; CHECK-NEXT: vmov.i32 q9, #0x1
+; CHECK-NEXT: vand q8, q8, q9
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: mov pc, lr
@@ -50,13 +50,13 @@ define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @cmpne_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: cmpne_sext_inc_vec:
; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d17, r2, r3
; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vmov d19, r2, r3
-; CHECK-NEXT: vmov.i32 q10, #0x1
-; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
-; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vceq.i32 q8, q9, q8
-; CHECK-NEXT: vand q8, q8, q10
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vceq.i32 q8, q8, q9
+; CHECK-NEXT: vmov.i32 q9, #0x1
+; CHECK-NEXT: vand q8, q8, q9
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: mov pc, lr
diff --git a/test/CodeGen/ARM/cmpxchg-O0-be.ll b/test/CodeGen/ARM/cmpxchg-O0-be.ll
new file mode 100644
index 000000000000..9e9a93e19b6a
--- /dev/null
+++ b/test/CodeGen/ARM/cmpxchg-O0-be.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-machineinstrs -mtriple=armebv8-linux-gnueabi -O0 %s -o - | FileCheck %s
+
+@x = global i64 10, align 8
+@y = global i64 20, align 8
+@z = global i64 20, align 8
+
+; CHECK_LABEL: main:
+; CHECK: ldr [[R2:r[0-9]+]], {{\[}}[[R1:r[0-9]+]]{{\]}}
+; CHECK-NEXT: ldr [[R1]], {{\[}}[[R1]], #4]
+; CHECK: mov [[R4:r[0-9]+]], [[R2]]
+; CHECK-NEXT: mov [[R5:r[0-9]+]], [[R1]]
+; CHECK: ldr [[R2]], {{\[}}[[R1]]{{\]}}
+; CHECK-NEXT: ldr [[R1]], {{\[}}[[R1]], #4]
+; CHECK: mov [[R6:r[0-9]+]], [[R2]]
+; CHECK-NEXT: mov [[R7:r[0-9]+]], [[R1]]
+
+define arm_aapcs_vfpcc i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ %0 = load i64, i64* @z, align 8
+ %1 = load i64, i64* @x, align 8
+ %2 = cmpxchg i64* @y, i64 %0, i64 %1 seq_cst seq_cst
+ %3 = extractvalue { i64, i1 } %2, 1
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/cmpxchg-weak.ll b/test/CodeGen/ARM/cmpxchg-weak.ll
index 0d5681aafbcb..29d97fef0606 100644
--- a/test/CodeGen/ARM/cmpxchg-weak.ll
+++ b/test/CodeGen/ARM/cmpxchg-weak.ll
@@ -47,12 +47,12 @@ define i1 @test_cmpxchg_weak_to_bool(i32, i32 *%addr, i32 %desired, i32 %new) {
; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1]
; CHECK-NEXT: cmp [[SUCCESS]], #0
; CHECK-NEXT: bxne lr
-; CHECK-NEXT: dmb ish
; CHECK-NEXT: mov r0, #1
+; CHECK-NEXT: dmb ish
; CHECK-NEXT: bx lr
; CHECK-NEXT: [[LDFAILBB]]:
-; CHECK-NEXT: clrex
; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: clrex
; CHECK-NEXT: bx lr
ret i1 %success
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index b9d90249e9f6..b381aecc69a6 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -38,9 +38,8 @@ entry:
br i1 %0, label %bb5, label %bb.nph15
bb1: ; preds = %bb2.preheader, %bb1
-; CHECK: LBB1_[[BB3:.]]: @ %bb3
; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
-; CHECK: blt LBB1_[[BB3]]
+; CHECK: blt LBB1_[[BB3:.]]
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
%sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
%tmp17 = sub i32 %i.07, %indvar ; <i32> [#uses=1]
@@ -54,7 +53,7 @@ bb1: ; preds = %bb2.preheader, %bb1
bb3: ; preds = %bb1, %bb2.preheader
; CHECK: LBB1_[[BB1:.]]: @ %bb1
; CHECK: bne LBB1_[[BB1]]
-; CHECK: b LBB1_[[BB3]]
+; CHECK: LBB1_[[BB3]]: @ %bb3
%sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
%3 = add i32 %pass.011, 1 ; <i32> [#uses=2]
%exitcond18 = icmp eq i32 %3, %passes ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/constantfp.ll b/test/CodeGen/ARM/constantfp.ll
index 0b431f47f50b..f825061d1169 100644
--- a/test/CodeGen/ARM/constantfp.ll
+++ b/test/CodeGen/ARM/constantfp.ll
@@ -5,25 +5,25 @@
; RUN: llc -mtriple=thumbv7m -mcpu=cortex-m4 %s -o - \
; RUN: | FileCheck --check-prefix=CHECK-NO-XO %s
-; RUN: llc -mtriple=thumbv7m -arm-execute-only -mcpu=cortex-m4 %s -o - \
+; RUN: llc -mtriple=thumbv7m -mattr=+execute-only -mcpu=cortex-m4 %s -o - \
; RUN: | FileCheck --check-prefix=CHECK-XO-FLOAT --check-prefix=CHECK-XO-DOUBLE %s
-; RUN: llc -mtriple=thumbv7meb -arm-execute-only -mcpu=cortex-m4 %s -o - \
+; RUN: llc -mtriple=thumbv7meb -mattr=+execute-only -mcpu=cortex-m4 %s -o - \
; RUN: | FileCheck --check-prefix=CHECK-XO-FLOAT --check-prefix=CHECK-XO-DOUBLE-BE %s
-; RUN: llc -mtriple=thumbv7m -arm-execute-only -mcpu=cortex-m4 -relocation-model=ropi %s -o - \
+; RUN: llc -mtriple=thumbv7m -mattr=+execute-only -mcpu=cortex-m4 -relocation-model=ropi %s -o - \
; RUN: | FileCheck --check-prefix=CHECK-XO-ROPI %s
; RUN: llc -mtriple=thumbv8m.main -mattr=fp-armv8 %s -o - \
; RUN: | FileCheck --check-prefix=CHECK-NO-XO %s
-; RUN: llc -mtriple=thumbv8m.main -arm-execute-only -mattr=fp-armv8 %s -o - \
+; RUN: llc -mtriple=thumbv8m.main -mattr=+execute-only -mattr=fp-armv8 %s -o - \
; RUN: | FileCheck --check-prefix=CHECK-XO-FLOAT --check-prefix=CHECK-XO-DOUBLE %s
-; RUN: llc -mtriple=thumbv8m.maineb -arm-execute-only -mattr=fp-armv8 %s -o - \
+; RUN: llc -mtriple=thumbv8m.maineb -mattr=+execute-only -mattr=fp-armv8 %s -o - \
; RUN: | FileCheck --check-prefix=CHECK-XO-FLOAT --check-prefix=CHECK-XO-DOUBLE-BE %s
-; RUN: llc -mtriple=thumbv8m.main -arm-execute-only -mattr=fp-armv8 -relocation-model=ropi %s -o - \
+; RUN: llc -mtriple=thumbv8m.main -mattr=+execute-only -mattr=fp-armv8 -relocation-model=ropi %s -o - \
; RUN: | FileCheck --check-prefix=CHECK-XO-ROPI %s
define arm_aapcs_vfpcc float @test_vmov_f32() {
diff --git a/test/CodeGen/ARM/cortex-a57-misched-basic.ll b/test/CodeGen/ARM/cortex-a57-misched-basic.ll
index 2ec50b9d3343..cfbef7bd4293 100644
--- a/test/CodeGen/ARM/cortex-a57-misched-basic.ll
+++ b/test/CodeGen/ARM/cortex-a57-misched-basic.ll
@@ -8,14 +8,14 @@
; CHECK: ********** MI Scheduling **********
; CHECK: foo:BB#0 entry
-; GENERIC: SDIV
+; GENERIC: LDRi12
; GENERIC: Latency : 1
; GENERIC: EORrr
; GENERIC: Latency : 1
-; GENERIC: LDRi12
-; GENERIC: Latency : 4
; GENERIC: ADDrr
; GENERIC: Latency : 1
+; GENERIC: SDIV
+; GENERIC: Latency : 0
; GENERIC: SUBrr
; GENERIC: Latency : 1
diff --git a/test/CodeGen/ARM/cortexr52-misched-basic.ll b/test/CodeGen/ARM/cortexr52-misched-basic.ll
index eb2c29a3a5d1..614157eb0e10 100644
--- a/test/CodeGen/ARM/cortexr52-misched-basic.ll
+++ b/test/CodeGen/ARM/cortexr52-misched-basic.ll
@@ -12,10 +12,10 @@
; GENERIC: Latency : 1
; R52_SCHED: Latency : 3
; CHECK: MLA
-; GENERIC: Latency : 1
+; GENERIC: Latency : 2
; R52_SCHED: Latency : 4
; CHECK: SDIV
-; GENERIC: Latency : 1
+; GENERIC: Latency : 0
; R52_SCHED: Latency : 8
; CHECK: ** Final schedule for BB#0 ***
; GENERIC: EORrr
diff --git a/test/CodeGen/ARM/ctor_order.ll b/test/CodeGen/ARM/ctor_order.ll
index 7fcc8cba0c8f..0cf87d7a97b7 100644
--- a/test/CodeGen/ARM/ctor_order.ll
+++ b/test/CodeGen/ARM/ctor_order.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s --check-prefix=DARWIN
; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=DARWIN-STATIC
-; RUN: llc < %s -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-linux-gnu -target-abi=apcs | FileCheck %s -check-prefix=ELF
; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI
; DARWIN: .section __DATA,__mod_init_func,mod_init_funcs
diff --git a/test/CodeGen/ARM/ctors_dtors.ll b/test/CodeGen/ARM/ctors_dtors.ll
index fb94626ab7dd..c097ade3c846 100644
--- a/test/CodeGen/ARM/ctors_dtors.ll
+++ b/test/CodeGen/ARM/ctors_dtors.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
-; RUN: llc < %s -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-linux-gnu -target-abi=apcs | FileCheck %s -check-prefix=ELF
; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI
; DARWIN: .section __DATA,__mod_init_func,mod_init_funcs
diff --git a/test/CodeGen/ARM/cttz.ll b/test/CodeGen/ARM/cttz.ll
index dacfca505931..cba7be583310 100644
--- a/test/CodeGen/ARM/cttz.ll
+++ b/test/CodeGen/ARM/cttz.ll
@@ -40,8 +40,8 @@ define i64 @test_i64(i64 %a) {
; CHECK-LABEL: test_i64:
; CHECK: rbit
; CHECK: rbit
-; CHECK: cmp
; CHECK: clz
+; CHECK: cmp
; CHECK: add
; CHECK: clzne
%tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false)
@@ -81,8 +81,8 @@ define i64 @test_i64_zero_undef(i64 %a) {
; CHECK-LABEL: test_i64_zero_undef:
; CHECK: rbit
; CHECK: rbit
-; CHECK: cmp
; CHECK: clz
+; CHECK: cmp
; CHECK: add
; CHECK: clzne
%tmp = call i64 @llvm.cttz.i64(i64 %a, i1 true)
diff --git a/test/CodeGen/ARM/cttz_vector.ll b/test/CodeGen/ARM/cttz_vector.ll
index 9480d75db47a..bed644980415 100644
--- a/test/CodeGen/ARM/cttz_vector.ll
+++ b/test/CodeGen/ARM/cttz_vector.ll
@@ -168,17 +168,17 @@ define void @test_v4i32(<4 x i32>* %p) {
define void @test_v1i64(<1 x i64>* %p) {
; CHECK-LABEL: test_v1i64:
-; CHECK: vldr [[D1:d[0-9]+]], [r0]
; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x0
+; CHECK: vldr [[D1:d[0-9]+]], [r0]
; CHECK: vmov.i64 [[D3:d[0-9]+]], #0xffffffffffffffff
; CHECK: vsub.i64 [[D2]], [[D2]], [[D1]]
-; CHECK: vand [[D1]], [[D1]], [[D2]]
-; CHECK: vadd.i64 [[D1]], [[D1]], [[D3]]
-; CHECK: vcnt.8 [[D1]], [[D1]]
-; CHECK: vpaddl.u8 [[D1]], [[D1]]
-; CHECK: vpaddl.u16 [[D1]], [[D1]]
-; CHECK: vpaddl.u32 [[D1]], [[D1]]
-; CHECK: vstr [[D1]], [r0]
+; CHECK: vand [[D2]], [[D1]], [[D2]]
+; CHECK: vadd.i64 [[D2]], [[D2]], [[D3]]
+; CHECK: vcnt.8 [[D2]], [[D2]]
+; CHECK: vpaddl.u8 [[D2]], [[D2]]
+; CHECK: vpaddl.u16 [[D2]], [[D2]]
+; CHECK: vpaddl.u32 [[D2]], [[D2]]
+; CHECK: vstr [[D2]], [r0]
%a = load <1 x i64>, <1 x i64>* %p
%tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 false)
store <1 x i64> %tmp, <1 x i64>* %p
@@ -187,17 +187,17 @@ define void @test_v1i64(<1 x i64>* %p) {
define void @test_v2i64(<2 x i64>* %p) {
; CHECK-LABEL: test_v2i64:
-; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x0
+; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
; CHECK: vmov.i64 [[Q3:q[0-9]+]], #0xffffffffffffffff
; CHECK: vsub.i64 [[Q2]], [[Q2]], [[Q1:q[0-9]+]]
-; CHECK: vand [[Q1]], [[Q1]], [[Q2]]
-; CHECK: vadd.i64 [[Q1]], [[Q1]], [[Q3]]
-; CHECK: vcnt.8 [[Q1]], [[Q1]]
-; CHECK: vpaddl.u8 [[Q1]], [[Q1]]
-; CHECK: vpaddl.u16 [[Q1]], [[Q1]]
-; CHECK: vpaddl.u32 [[Q1]], [[Q1]]
-; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0]
+; CHECK: vand [[Q2]], [[Q1]], [[Q2]]
+; CHECK: vadd.i64 [[Q2]], [[Q2]], [[Q3]]
+; CHECK: vcnt.8 [[Q2]], [[Q2]]
+; CHECK: vpaddl.u8 [[Q2]], [[Q2]]
+; CHECK: vpaddl.u16 [[Q2]], [[Q2]]
+; CHECK: vpaddl.u32 [[Q2]], [[Q2]]
+; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
%a = load <2 x i64>, <2 x i64>* %p
%tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
store <2 x i64> %tmp, <2 x i64>* %p
@@ -346,17 +346,17 @@ define void @test_v4i32_zero_undef(<4 x i32>* %p) {
define void @test_v1i64_zero_undef(<1 x i64>* %p) {
; CHECK-LABEL: test_v1i64_zero_undef:
-; CHECK: vldr [[D1:d[0-9]+]], [r0]
; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x0
+; CHECK: vldr [[D1:d[0-9]+]], [r0]
; CHECK: vmov.i64 [[D3:d[0-9]+]], #0xffffffffffffffff
; CHECK: vsub.i64 [[D2]], [[D2]], [[D1]]
-; CHECK: vand [[D1]], [[D1]], [[D2]]
-; CHECK: vadd.i64 [[D1]], [[D1]], [[D3]]
-; CHECK: vcnt.8 [[D1]], [[D1]]
-; CHECK: vpaddl.u8 [[D1]], [[D1]]
-; CHECK: vpaddl.u16 [[D1]], [[D1]]
-; CHECK: vpaddl.u32 [[D1]], [[D1]]
-; CHECK: vstr [[D1]], [r0]
+; CHECK: vand [[D2]], [[D1]], [[D2]]
+; CHECK: vadd.i64 [[D2]], [[D2]], [[D3]]
+; CHECK: vcnt.8 [[D2]], [[D2]]
+; CHECK: vpaddl.u8 [[D2]], [[D2]]
+; CHECK: vpaddl.u16 [[D2]], [[D2]]
+; CHECK: vpaddl.u32 [[D2]], [[D2]]
+; CHECK: vstr [[D2]], [r0]
%a = load <1 x i64>, <1 x i64>* %p
%tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 true)
store <1 x i64> %tmp, <1 x i64>* %p
@@ -365,17 +365,17 @@ define void @test_v1i64_zero_undef(<1 x i64>* %p) {
define void @test_v2i64_zero_undef(<2 x i64>* %p) {
; CHECK-LABEL: test_v2i64_zero_undef:
-; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x0
+; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
; CHECK: vmov.i64 [[Q3:q[0-9]+]], #0xffffffffffffffff
; CHECK: vsub.i64 [[Q2]], [[Q2]], [[Q1:q[0-9]+]]
-; CHECK: vand [[Q1]], [[Q1]], [[Q2]]
-; CHECK: vadd.i64 [[Q1]], [[Q1]], [[Q3]]
-; CHECK: vcnt.8 [[Q1]], [[Q1]]
-; CHECK: vpaddl.u8 [[Q1]], [[Q1]]
-; CHECK: vpaddl.u16 [[Q1]], [[Q1]]
-; CHECK: vpaddl.u32 [[Q1]], [[Q1]]
-; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0]
+; CHECK: vand [[Q2]], [[Q1]], [[Q2]]
+; CHECK: vadd.i64 [[Q2]], [[Q2]], [[Q3]]
+; CHECK: vcnt.8 [[Q2]], [[Q2]]
+; CHECK: vpaddl.u8 [[Q2]], [[Q2]]
+; CHECK: vpaddl.u16 [[Q2]], [[Q2]]
+; CHECK: vpaddl.u32 [[Q2]], [[Q2]]
+; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
%a = load <2 x i64>, <2 x i64>* %p
%tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
store <2 x i64> %tmp, <2 x i64>* %p
diff --git a/test/CodeGen/ARM/cxx-tlscc.ll b/test/CodeGen/ARM/cxx-tlscc.ll
index 6a5aa12ac5a6..6a66c5f197ef 100644
--- a/test/CodeGen/ARM/cxx-tlscc.ll
+++ b/test/CodeGen/ARM/cxx-tlscc.ll
@@ -26,7 +26,7 @@ declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
; THUMB-LABEL: _ZTW2sg
; THUMB: push {{.*}}lr
; THUMB: blx
-; THUMB: bne [[TH_end:.?LBB0_[0-9]+]]
+; THUMB: bne{{(.w)?}} [[TH_end:.?LBB0_[0-9]+]]
; THUMB: blx
; THUMB: tlv_atexit
; THUMB: [[TH_end]]:
diff --git a/test/CodeGen/ARM/execute-only-big-stack-frame.ll b/test/CodeGen/ARM/execute-only-big-stack-frame.ll
index 0fe67f9863a5..24c6a06d6af1 100644
--- a/test/CodeGen/ARM/execute-only-big-stack-frame.ll
+++ b/test/CodeGen/ARM/execute-only-big-stack-frame.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -mtriple=thumbv7m -arm-execute-only -O0 %s -o - \
+; RUN: llc < %s -mtriple=thumbv7m -mattr=+execute-only -O0 %s -o - \
; RUN: | FileCheck --check-prefix=CHECK-SUBW-ADDW %s
-; RUN: llc < %s -mtriple=thumbv8m.base -arm-execute-only -O0 %s -o - \
+; RUN: llc < %s -mtriple=thumbv8m.base -mattr=+execute-only -O0 %s -o - \
; RUN: | FileCheck --check-prefix=CHECK-MOVW-MOVT-ADD %s
-; RUN: llc < %s -mtriple=thumbv8m.main -arm-execute-only -O0 %s -o - \
+; RUN: llc < %s -mtriple=thumbv8m.main -mattr=+execute-only -O0 %s -o - \
; RUN: | FileCheck --check-prefix=CHECK-SUBW-ADDW %s
define i8 @test_big_stack_frame() {
diff --git a/test/CodeGen/ARM/execute-only-section.ll b/test/CodeGen/ARM/execute-only-section.ll
index 6e1973cd0f14..a3313d8c2f73 100644
--- a/test/CodeGen/ARM/execute-only-section.ll
+++ b/test/CodeGen/ARM/execute-only-section.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=thumbv7m -arm-execute-only %s -o - | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv8m.base -arm-execute-only %s -o - | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv8m.main -arm-execute-only %s -o - | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7m -mattr=+execute-only %s -o - | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8m.base -mattr=+execute-only %s -o - | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8m.main -mattr=+execute-only %s -o - | FileCheck %s
; CHECK: .section .text,"axy",%progbits,unique,0
; CHECK-NOT: .section
diff --git a/test/CodeGen/ARM/execute-only.ll b/test/CodeGen/ARM/execute-only.ll
index 1f9e8bf2813c..f8c3d279573b 100644
--- a/test/CodeGen/ARM/execute-only.ll
+++ b/test/CodeGen/ARM/execute-only.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=thumbv8m.base-eabi -arm-execute-only %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-T2BASE %s
-; RUN: llc -mtriple=thumbv7m-eabi -arm-execute-only %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-T2 %s
-; RUN: llc -mtriple=thumbv8m.main-eabi -arm-execute-only %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-T2 %s
+; RUN: llc -mtriple=thumbv8m.base-eabi -mattr=+execute-only %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-T2BASE %s
+; RUN: llc -mtriple=thumbv7m-eabi -mattr=+execute-only %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-T2 %s
+; RUN: llc -mtriple=thumbv8m.main-eabi -mattr=+execute-only %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-T2 %s
@var = global i32 0
diff --git a/test/CodeGen/ARM/fp16-promote.ll b/test/CodeGen/ARM/fp16-promote.ll
index 9148ac109ae3..257d99d11928 100644
--- a/test/CodeGen/ARM/fp16-promote.ll
+++ b/test/CodeGen/ARM/fp16-promote.ll
@@ -687,8 +687,8 @@ define void @test_maxnan(half* %p) #0 {
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-VFP-LIBCALL: vbsl
-; CHECK-NOVFP: bic
; CHECK-NOVFP: and
+; CHECK-NOVFP: bic
; CHECK-NOVFP: orr
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_copysign(half* %p, half* %q) #0 {
@@ -818,25 +818,24 @@ define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
; CHECK-ALL-LABEL: test_insertelement:
; CHECK-ALL: sub sp, sp, #8
; CHECK-ALL: ldrh
-; CHECK-ALL: strh
; CHECK-ALL: ldrh
-; CHECK-ALL: strh
; CHECK-ALL: ldrh
-; CHECK-ALL: strh
; CHECK-ALL: ldrh
-; CHECK-ALL: strh
-; CHECK-ALL: mov
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: mov
; CHECK-ALL-DAG: ldrh
; CHECK-ALL-DAG: orr
-; CHECK-ALL: strh
-; CHECK-ALL: ldrh
-; CHECK-ALL: strh
-; CHECK-ALL: ldrh
-; CHECK-ALL: strh
-; CHECK-ALL: ldrh
-; CHECK-ALL: strh
-; CHECK-ALL: ldrh
-; CHECK-ALL: strh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: strh
; CHECK-ALL: add sp, sp, #8
define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
%a = load half, half* %p, align 2
diff --git a/test/CodeGen/ARM/fp16-v3.ll b/test/CodeGen/ARM/fp16-v3.ll
index a37f71d9ba88..e84fee2c2e1b 100644
--- a/test/CodeGen/ARM/fp16-v3.ll
+++ b/test/CodeGen/ARM/fp16-v3.ll
@@ -11,8 +11,8 @@ target triple = "armv7a--none-eabi"
; CHECK: vadd.f32 [[SREG5:s[0-9]+]], [[SREG4]], [[SREG1]]
; CHECK-NEXT: vcvtb.f16.f32 [[SREG6:s[0-9]+]], [[SREG5]]
; CHECK-NEXT: vmov [[RREG1:r[0-9]+]], [[SREG6]]
-; CHECK-NEXT: uxth [[RREG2:r[0-9]+]], [[RREG1]]
-; CHECK-NEXT: pkhbt [[RREG3:r[0-9]+]], [[RREG1]], [[RREG1]], lsl #16
+; CHECK-DAG: uxth [[RREG2:r[0-9]+]], [[RREG1]]
+; CHECK-DAG: pkhbt [[RREG3:r[0-9]+]], [[RREG1]], [[RREG1]], lsl #16
; CHECK-DAG: strh [[RREG1]], [r0, #4]
; CHECK-DAG: vmov [[DREG:d[0-9]+]], [[RREG3]], [[RREG2]]
; CHECK-DAG: vst1.32 {[[DREG]][0]}, [r0:32]
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index e0d2b7cffb44..ed443a1814e6 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -5,8 +5,6 @@
define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
; CHECK: cmpeq
-; CHECK: moveq
-; CHECK: popeq
entry:
br label %tailrecurse
diff --git a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
index 74117d3896bd..a633c0291c60 100644
--- a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
+++ b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
@@ -55,8 +55,8 @@ define void @i24_and_or(i24* %a) {
define void @i24_insert_bit(i24* %a, i1 zeroext %bit) {
; LE-LABEL: i24_insert_bit:
; LE: @ BB#0:
-; LE-NEXT: ldrh r2, [r0]
; LE-NEXT: mov r3, #255
+; LE-NEXT: ldrh r2, [r0]
; LE-NEXT: orr r3, r3, #57088
; LE-NEXT: and r2, r2, r3
; LE-NEXT: orr r1, r2, r1, lsl #13
@@ -99,8 +99,8 @@ define void @i56_or(i56* %a) {
; BE-NEXT: orr r2, r3, r2, lsl #8
; BE-NEXT: orr r2, r2, r12, lsl #24
; BE-NEXT: orr r2, r2, #384
-; BE-NEXT: lsr r3, r2, #8
; BE-NEXT: strb r2, [r1, #2]
+; BE-NEXT: lsr r3, r2, #8
; BE-NEXT: strh r3, [r1]
; BE-NEXT: bic r1, r12, #255
; BE-NEXT: orr r1, r1, r2, lsr #24
@@ -127,8 +127,8 @@ define void @i56_and_or(i56* %a) {
; BE-NEXT: mov r3, #128
; BE-NEXT: ldrh r2, [r1, #4]!
; BE-NEXT: strb r3, [r1, #2]
-; BE-NEXT: lsl r2, r2, #8
; BE-NEXT: ldr r12, [r0]
+; BE-NEXT: lsl r2, r2, #8
; BE-NEXT: orr r2, r2, r12, lsl #24
; BE-NEXT: orr r2, r2, #384
; BE-NEXT: lsr r3, r2, #8
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index 90defad43a7d..a3ec2a7f3e77 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -56,9 +56,11 @@ L2: ; preds = %L3, %bb2
L1: ; preds = %L2, %bb2
%res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ] ; <i32> [#uses=1]
; ARM-LABEL: %L1
+; ARM: ldr [[R_NEXTADDR:r[0-9]+]], LCPI
; ARM: ldr [[R1:r[0-9]+]], LCPI
+; ARM: add [[R_NEXTADDR_b:r[0-9]+]], pc, [[R_NEXTADDR]]
; ARM: add [[R1b:r[0-9]+]], pc, [[R1]]
-; ARM: str [[R1b]]
+; ARM: str [[R1b]], {{\[}}[[R_NEXTADDR_b]]]
; THUMB-LABEL: %L1
; THUMB: ldr [[R2:r[0-9]+]], LCPI
diff --git a/test/CodeGen/ARM/jump-table-islands.ll b/test/CodeGen/ARM/jump-table-islands.ll
index 6b4f174c0928..755ca30199ad 100644
--- a/test/CodeGen/ARM/jump-table-islands.ll
+++ b/test/CodeGen/ARM/jump-table-islands.ll
@@ -13,7 +13,7 @@ define %BigInt @test_moved_jumptable(i1 %tst, i32 %sw, %BigInt %l) {
; CHECK: .long LBB{{[0-9]+_[0-9]+}}-[[JUMP_TABLE]]
; CHECK: [[SKIP_TABLE]]:
-; CHECK: add pc, {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: add pc, {{r[0-9]+|lr}}, {{r[0-9]+|lr}}
br i1 %tst, label %simple, label %complex
simple:
diff --git a/test/CodeGen/ARM/jump-table-tbh.ll b/test/CodeGen/ARM/jump-table-tbh.ll
index 2da8a5fafc40..b3ee68ea0758 100644
--- a/test/CodeGen/ARM/jump-table-tbh.ll
+++ b/test/CodeGen/ARM/jump-table-tbh.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=thumbv7m-linux-gnu -o - %s | FileCheck %s --check-prefix=T2
-; RUN: llc -mtriple=thumbv6m-linux-gnu -o - %s | FileCheck %s --check-prefix=T1
+; RUN: llc -mtriple=thumbv7m-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=T2
+; RUN: llc -mtriple=thumbv6m-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=T1
declare void @foo(double)
declare i32 @llvm.arm.space(i32, i32)
@@ -10,7 +10,7 @@ define i32 @test_tbh(i1 %tst, i32 %sw, i32 %l) {
; T2-LABEL: test_tbh:
; T2: [[ANCHOR:.LCPI[0-9_]+]]:
; T2: tbh [pc, r{{[0-9]+}}, lsl #1]
-; T2-NEXT: @ BB#1
+; T2-NEXT: @ BB#{{[0-9]+}}
; T2-NEXT: LJTI
; T2-NEXT: .short (.LBB0_[[x:[0-9]+]]-([[ANCHOR]]+4))/2
; T2-NEXT: .short (.LBB0_{{[0-9]+}}-([[ANCHOR]]+4))/2
diff --git a/test/CodeGen/ARM/ldm-stm-i256.ll b/test/CodeGen/ARM/ldm-stm-i256.ll
index 7b4151dabf6d..151c42e0e158 100644
--- a/test/CodeGen/ARM/ldm-stm-i256.ll
+++ b/test/CodeGen/ARM/ldm-stm-i256.ll
@@ -17,22 +17,24 @@ entry:
%add6 = add nsw i256 %or, %d
store i256 %add6, i256* %b, align 8
ret void
- ; CHECK-DAG: ldm r3
; CHECK-DAG: ldm r2
- ; CHECK-DAG: ldr {{.*}}, [r3, #20]
+ ; CHECK-DAG: ldr {{.*}}, [r3]
+ ; CHECK-DAG: ldr {{.*}}, [r3, #4]
+ ; CHECK-DAG: ldr {{.*}}, [r3, #8]
+ ; CHECK-DAG: ldr {{.*}}, [r3, #12]
; CHECK-DAG: ldr {{.*}}, [r3, #16]
- ; CHECK-DAG: ldr {{.*}}, [r3, #28]
+ ; CHECK-DAG: ldr {{.*}}, [r3, #20]
; CHECK-DAG: ldr {{.*}}, [r3, #24]
+ ; CHECK-DAG: ldr {{.*}}, [r3, #28]
; CHECK-DAG: ldr {{.*}}, [r2, #20]
- ; CHECK-DAG: ldr {{.*}}, [r2, #16]
- ; CHECK-DAG: ldr {{.*}}, [r2, #28]
; CHECK-DAG: ldr {{.*}}, [r2, #24]
- ; CHECK-DAG: stmib r0
- ; CHECK-DAG: str {{.*}}, [r0]
+ ; CHECK-DAG: ldr {{.*}}, [r2, #28]
+ ; CHECK-DAG: stm r0
+ ; CHECK-DAG: str {{.*}}, [r0, #20]
; CHECK-DAG: str {{.*}}, [r0, #24]
; CHECK-DAG: str {{.*}}, [r0, #28]
- ; CHECK-DAG: str {{.*}}, [r1]
- ; CHECK-DAG: stmib r1
+ ; CHECK-DAG: stm r1
+ ; CHECK-DAG: str {{.*}}, [r1, #20]
; CHECK-DAG: str {{.*}}, [r1, #24]
; CHECK-DAG: str {{.*}}, [r1, #28]
}
diff --git a/test/CodeGen/ARM/legalize-unaligned-load.ll b/test/CodeGen/ARM/legalize-unaligned-load.ll
index eb4e942f0742..ccf93c3ef55e 100644
--- a/test/CodeGen/ARM/legalize-unaligned-load.ll
+++ b/test/CodeGen/ARM/legalize-unaligned-load.ll
@@ -10,7 +10,7 @@
; CHECK-NOT: str
; CHECK: ldr
; CHECK: str
-; CHECK: bx
+; CHECK: {{bx|pop.*pc}}
define i32 @get_set_complex({ float, float }* noalias nocapture %retptr,
{ i8*, i32 }** noalias nocapture readnone %excinfo,
i8* noalias nocapture readnone %env,
diff --git a/test/CodeGen/ARM/long-setcc.ll b/test/CodeGen/ARM/long-setcc.ll
index f09167ed9e78..1fbc3f2c0838 100644
--- a/test/CodeGen/ARM/long-setcc.ll
+++ b/test/CodeGen/ARM/long-setcc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi < %s | FileCheck %s
define i1 @t1(i64 %x) {
%B = icmp slt i64 %x, 0
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index 3ec5fa41aa6f..cf8396db9db5 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -28,15 +28,15 @@ define i32 @f1(i64 %x, i64 %y) {
define i32 @f2(i64 %x, i64 %y) {
; CHECK-LABEL: f2:
-; CHECK-LE: lsr{{.*}}r2
-; CHECK-LE-NEXT: rsb r3, r2, #32
+; CHECK-LE: rsb r3, r2, #32
+; CHECK-LE-NEXT: lsr{{.*}}r2
; CHECK-LE-NEXT: sub r2, r2, #32
; CHECK-LE-NEXT: orr r0, r0, r1, lsl r3
; CHECK-LE-NEXT: cmp r2, #0
; CHECK-LE-NEXT: asrge r0, r1, r2
-; CHECK-BE: lsr{{.*}}r3
-; CHECK-BE-NEXT: rsb r2, r3, #32
+; CHECK-BE: rsb r2, r3, #32
+; CHECK-BE-NEXT: lsr{{.*}}r3
; CHECK-BE-NEXT: orr r1, r1, r0, lsl r2
; CHECK-BE-NEXT: sub r2, r3, #32
; CHECK-BE-NEXT: cmp r2, #0
@@ -49,15 +49,15 @@ define i32 @f2(i64 %x, i64 %y) {
define i32 @f3(i64 %x, i64 %y) {
; CHECK-LABEL: f3:
-; CHECK-LE: lsr{{.*}}r2
-; CHECK-LE-NEXT: rsb r3, r2, #32
+; CHECK-LE: rsb r3, r2, #32
+; CHECK-LE-NEXT: lsr{{.*}}r2
; CHECK-LE-NEXT: sub r2, r2, #32
; CHECK-LE-NEXT: orr r0, r0, r1, lsl r3
; CHECK-LE-NEXT: cmp r2, #0
; CHECK-LE-NEXT: lsrge r0, r1, r2
-; CHECK-BE: lsr{{.*}}r3
-; CHECK-BE-NEXT: rsb r2, r3, #32
+; CHECK-BE: rsb r2, r3, #32
+; CHECK-BE-NEXT: lsr{{.*}}r3
; CHECK-BE-NEXT: orr r1, r1, r0, lsl r2
; CHECK-BE-NEXT: sub r2, r3, #32
; CHECK-BE-NEXT: cmp r2, #0
diff --git a/test/CodeGen/ARM/misched-fusion-aes.ll b/test/CodeGen/ARM/misched-fusion-aes.ll
index d3558ab4abb0..483f26cc8e00 100644
--- a/test/CodeGen/ARM/misched-fusion-aes.ll
+++ b/test/CodeGen/ARM/misched-fusion-aes.ll
@@ -74,15 +74,16 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]]
; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]]
+; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QC]]
; CHECK: aese.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QD]]
; CHECK: aese.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QE]]
+; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QF]]
-; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QG]]
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
@@ -159,15 +160,16 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QA]]
; CHECK: aesd.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QB]]
+; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QC]]
; CHECK: aesd.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QD]]
; CHECK: aesd.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QE]]
+; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QF]]
-; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QG]]
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
diff --git a/test/CodeGen/ARM/select_const.ll b/test/CodeGen/ARM/select_const.ll
index 48fe572bf8a7..23de9c35a5b8 100644
--- a/test/CodeGen/ARM/select_const.ll
+++ b/test/CodeGen/ARM/select_const.ll
@@ -281,16 +281,16 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
; CHECK: @ BB#0:
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: ands r12, r0, #1
; CHECK-NEXT: mov lr, #1
+; CHECK-NEXT: ands r12, r0, #1
; CHECK-NEXT: mov r0, #23
-; CHECK-NEXT: eor r3, r3, #1
; CHECK-NEXT: orr lr, lr, #65536
; CHECK-NEXT: mvnne r0, #3
-; CHECK-NEXT: movne r12, #1
; CHECK-NEXT: and r4, r0, lr
-; CHECK-NEXT: eor r2, r2, lr
+; CHECK-NEXT: movne r12, #1
; CHECK-NEXT: subs r0, r4, #1
+; CHECK-NEXT: eor r2, r2, lr
+; CHECK-NEXT: eor r3, r3, #1
; CHECK-NEXT: sbc r1, r12, #0
; CHECK-NEXT: orrs r2, r2, r3
; CHECK-NEXT: movne r0, r4
diff --git a/test/CodeGen/ARM/shift-i64.ll b/test/CodeGen/ARM/shift-i64.ll
index 12cc5fbe03e4..3644afa17ca4 100644
--- a/test/CodeGen/ARM/shift-i64.ll
+++ b/test/CodeGen/ARM/shift-i64.ll
@@ -29,8 +29,8 @@ define i64 @test_shl(i64 %val, i64 %amt) {
; Explanation for lshr is pretty much the reverse of shl.
define i64 @test_lshr(i64 %val, i64 %amt) {
; CHECK-LABEL: test_lshr:
-; CHECK: lsr r0, r0, r2
; CHECK: rsb [[REVERSE_SHIFT:.*]], r2, #32
+; CHECK: lsr r0, r0, r2
; CHECK: orr r0, r0, r1, lsl [[REVERSE_SHIFT]]
; CHECK: sub [[EXTRA_SHIFT:.*]], r2, #32
; CHECK: cmp [[EXTRA_SHIFT]], #0
diff --git a/test/CodeGen/ARM/ssp-data-layout.ll b/test/CodeGen/ARM/ssp-data-layout.ll
index 92fa0809ed2d..39c279eb90d4 100644
--- a/test/CodeGen/ARM/ssp-data-layout.ll
+++ b/test/CodeGen/ARM/ssp-data-layout.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -disable-fp-elim -march=arm -mcpu=cortex-a8 -mtriple arm-linux-gnu -o - | FileCheck %s
+; RUN: llc < %s -disable-fp-elim -march=arm -mcpu=cortex-a8 -mtriple arm-linux-gnu -target-abi=apcs -o - | FileCheck %s
; This test is fairly fragile. The goal is to ensure that "large" stack
; objects are allocated closest to the stack protector (i.e., farthest away
; from the Stack Pointer.) In standard SSP mode this means that large (>=
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index 4b8b4c6bca72..1c6c05de2579 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=armv6-linux-gnu -target-abi=apcs | FileCheck %s
@b = external global i64*
diff --git a/test/CodeGen/ARM/swifterror.ll b/test/CodeGen/ARM/swifterror.ll
index 3fd57c592bfb..b02adf7912b5 100644
--- a/test/CodeGen/ARM/swifterror.ll
+++ b/test/CodeGen/ARM/swifterror.ll
@@ -420,10 +420,10 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
; CHECK-ARMV7-DAG: str r8, [s[[STK1:.*]]]
; CHECK-ARMV7-DAG: str r10, [s[[STK2:.*]]]
; Store arguments.
-; CHECK-ARMV7: mov r6, r3
-; CHECK-ARMV7: mov r4, r2
-; CHECK-ARMV7: mov r11, r1
-; CHECK-ARMV7: mov r5, r0
+; CHECK-ARMV7-DAG: mov r6, r3
+; CHECK-ARMV7-DAG: mov r4, r2
+; CHECK-ARMV7-DAG: mov r11, r1
+; CHECK-ARMV7-DAG: mov r5, r0
; Setup call.
; CHECK-ARMV7: mov r0, #1
; CHECK-ARMV7: mov r1, #2
@@ -435,10 +435,10 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
; Restore original arguments.
; CHECK-ARMV7-DAG: ldr r10, [s[[STK2]]]
; CHECK-ARMV7-DAG: ldr r8, [s[[STK1]]]
-; CHECK-ARMV7: mov r0, r5
-; CHECK-ARMV7: mov r1, r11
-; CHECK-ARMV7: mov r2, r4
-; CHECK-ARMV7: mov r3, r6
+; CHECK-ARMV7-DAG: mov r0, r5
+; CHECK-ARMV7-DAG: mov r1, r11
+; CHECK-ARMV7-DAG: mov r2, r4
+; CHECK-ARMV7-DAG: mov r3, r6
; CHECK-ARMV7: bl _params_in_reg2
; CHECK-ARMV7: pop {r4, r5, r6, r7, r10, r11, pc}
define swiftcc void @params_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) {
@@ -469,25 +469,25 @@ declare swiftcc void @params_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_e
; CHECK-ARMV7: mov r8, #0
; CHECK-ARMV7: bl _params_in_reg2
; Restore original arguments.
-; CHECK-ARMV7: ldr r3, [s[[STK2]]]
-; CHECK-ARMV7: ldr r10, [s[[STK1]]]
+; CHECK-ARMV7-DAG: ldr r3, [s[[STK2]]]
+; CHECK-ARMV7-DAG: ldr r10, [s[[STK1]]]
; Store %error_ptr_ref;
-; CHECK-ARMV7: str r8, [s[[STK3:.*]]]
+; CHECK-ARMV7-DAG: str r8, [s[[STK3:.*]]]
; Restore original arguments.
-; CHECK-ARMV7: mov r0, r5
-; CHECK-ARMV7: mov r1, r11
-; CHECK-ARMV7: mov r2, r4
-; CHECK-ARMV7: mov r8, r6
+; CHECK-ARMV7-DAG: mov r0, r5
+; CHECK-ARMV7-DAG: mov r1, r11
+; CHECK-ARMV7-DAG: mov r2, r4
+; CHECK-ARMV7-DAG: mov r8, r6
; CHECK-ARMV7: bl _params_and_return_in_reg2
; Store swifterror return %err;
-; CHECK-ARMV7: str r8, [s[[STK1]]]
+; CHECK-ARMV7-DAG: str r8, [s[[STK1]]]
; Load swifterror value %error_ptr_ref.
-; CHECK-ARMV7: ldr r8, [s[[STK3]]]
+; CHECK-ARMV7-DAG: ldr r8, [s[[STK3]]]
; Save return values.
-; CHECK-ARMV7: mov r4, r0
-; CHECK-ARMV7: mov r5, r1
-; CHECK-ARMV7: mov r6, r2
-; CHECK-ARMV7: mov r11, r3
+; CHECK-ARMV7-DAG: mov r4, r0
+; CHECK-ARMV7-DAG: mov r5, r1
+; CHECK-ARMV7-DAG: mov r6, r2
+; CHECK-ARMV7-DAG: mov r11, r3
; Setup call.
; CHECK-ARMV7: mov r0, #1
; CHECK-ARMV7: mov r1, #2
@@ -496,12 +496,12 @@ declare swiftcc void @params_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_e
; CHECK-ARMV7: mov r10, #0
; CHECK-ARMV7: bl _params_in_reg2
; Load swifterror %err;
-; CHECK-ARMV7: ldr r8, [s[[STK1]]]
+; CHECK-ARMV7-DAG: ldr r8, [s[[STK1]]]
; Restore return values for returning.
-; CHECK-ARMV7: mov r0, r4
-; CHECK-ARMV7: mov r1, r5
-; CHECK-ARMV7: mov r2, r6
-; CHECK-ARMV7: mov r3, r11
+; CHECK-ARMV7-DAG: mov r0, r4
+; CHECK-ARMV7-DAG: mov r1, r5
+; CHECK-ARMV7-DAG: mov r2, r6
+; CHECK-ARMV7-DAG: mov r3, r11
; CHECK-ARMV7: pop {r4, r5, r6, r7, r10, r11, pc}
define swiftcc { i32, i32, i32, i32} @params_and_return_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) {
%error_ptr_ref = alloca swifterror %swift_error*, align 8
diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll
index aaefc0a14863..6d93869ec10f 100644
--- a/test/CodeGen/ARM/thumb2-it-block.ll
+++ b/test/CodeGen/ARM/thumb2-it-block.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8 < %s | FileCheck %s
; PR11107
define i32 @test(i32 %a, i32 %b) {
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index c39c939d6c95..1e68ff13699a 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -162,8 +162,8 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
; rdar://7923010
define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vcgt_zext:
-;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1
-;CHECK: vcgt.f32 [[Q1:q[0-9]+]]
+;CHECK-DAG: vmov.i32 [[Q0:q[0-9]+]], #0x1
+;CHECK-DAG: vcgt.f32 [[Q1:q[0-9]+]]
;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]]
%tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = load <4 x float>, <4 x float>* %B
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index 2ef2a0697ec9..8623d2c164ba 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -237,14 +237,14 @@ entry:
; illegal type to a legal type.
define <2 x i8> @test_truncate(<2 x i128> %in) {
; CHECK-LABEL: test_truncate:
-; CHECK: mov [[BASE:r[0-9]+]], sp
-; CHECK-NEXT: vld1.32 {[[REG1:d[0-9]+]][0]}, {{\[}}[[BASE]]:32]
-; CHECK-NEXT: add [[BASE2:r[0-9]+]], [[BASE]], #4
-; CHECK-NEXT: vld1.32 {[[REG1]][1]}, {{\[}}[[BASE2]]:32]
; REG2 Should map on the same Q register as REG1, i.e., REG2 = REG1 - 1, but we
; cannot express that.
-; CHECK-NEXT: vmov.32 [[REG2:d[0-9]+]][0], r0
+; CHECK: vmov.32 [[REG2:d[0-9]+]][0], r0
+; CHECK-NEXT: mov [[BASE:r[0-9]+]], sp
+; CHECK-NEXT: vld1.32 {[[REG1:d[0-9]+]][0]}, {{\[}}[[BASE]]:32]
+; CHECK-NEXT: add [[BASE2:r[0-9]+]], [[BASE]], #4
; CHECK-NEXT: vmov.32 [[REG2]][1], r1
+; CHECK-NEXT: vld1.32 {[[REG1]][1]}, {{\[}}[[BASE2]]:32]
; The Q register used here should match floor(REG1/2), but we cannot express that.
; CHECK-NEXT: vmovn.i64 [[RES:d[0-9]+]], q{{[0-9]+}}
; CHECK-NEXT: vmov r0, r1, [[RES]]
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index 5742dc314978..5b524145be76 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -182,9 +182,9 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
; CHECK-LABEL: test_interleaved:
; CHECK: @ BB#0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
; CHECK-NEXT: vext.16 d16, d16, d17, #3
; CHECK-NEXT: vorr d17, d16, d16
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
; CHECK-NEXT: vuzp.16 d16, d17
; CHECK-NEXT: vzip.16 d16, d18
; CHECK-NEXT: vmov r0, r1, d16
@@ -217,16 +217,16 @@ define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {
; CHECK-LABEL: test_multisource:
; CHECK: @ BB#0:
; CHECK-NEXT: mov r1, r0
-; CHECK-NEXT: add r2, r0, #32
-; CHECK-NEXT: add r0, r0, #48
+; CHECK-NEXT: add r2, r0, #48
+; CHECK-NEXT: add r0, r0, #32
; CHECK-NEXT: vld1.16 {d16, d17}, [r1:128]!
-; CHECK-NEXT: vld1.64 {d20, d21}, [r2:128]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128]
-; CHECK-NEXT: vld1.64 {d22, d23}, [r1:128]
+; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128]
; CHECK-NEXT: vorr d24, d20, d20
+; CHECK-NEXT: vld1.64 {d18, d19}, [r2:128]
+; CHECK-NEXT: vld1.64 {d22, d23}, [r1:128]
; CHECK-NEXT: vzip.16 d24, d18
-; CHECK-NEXT: vext.16 d18, d20, d24, #2
; CHECK-NEXT: vtrn.16 q8, q11
+; CHECK-NEXT: vext.16 d18, d20, d24, #2
; CHECK-NEXT: vext.16 d16, d18, d16, #2
; CHECK-NEXT: vext.16 d16, d16, d16, #2
; CHECK-NEXT: vmov r0, r1, d16
@@ -259,24 +259,24 @@ define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
; CHECK-LABEL: test_illegal:
; CHECK: @ BB#0:
-; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
-; CHECK-NEXT: vmov.u16 r1, d16[0]
-; CHECK-NEXT: vmov.u16 r0, d17[3]
-; CHECK-NEXT: vorr d22, d16, d16
-; CHECK-NEXT: vorr d23, d16, d16
-; CHECK-NEXT: vmov.16 d20[0], r1
-; CHECK-NEXT: vuzp.16 d22, d23
-; CHECK-NEXT: vmov.u16 r1, d17[1]
-; CHECK-NEXT: vmov.16 d20[1], r0
-; CHECK-NEXT: vuzp.16 d22, d18
-; CHECK-NEXT: vmov.16 d20[2], r1
-; CHECK-NEXT: vmov.u16 r0, d19[1]
-; CHECK-NEXT: vext.16 d21, d16, d18, #3
-; CHECK-NEXT: vmov.16 d20[3], r0
-; CHECK-NEXT: vmov r0, r1, d20
-; CHECK-NEXT: vmov r2, r3, d21
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT: vorr d22, d16, d16
+; CHECK-NEXT: vmov.u16 r0, d16[0]
+; CHECK-NEXT: vorr d23, d16, d16
+; CHECK-NEXT: vmov.u16 r2, d17[3]
+; CHECK-NEXT: vmov.u16 r3, d17[1]
+; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT: vmov.u16 r1, d19[1]
+; CHECK-NEXT: vuzp.16 d22, d23
+; CHECK-NEXT: vuzp.16 d22, d18
+; CHECK-NEXT: vmov.16 d20[0], r0
+; CHECK-NEXT: vmov.16 d20[1], r2
+; CHECK-NEXT: vmov.16 d20[2], r3
+; CHECK-NEXT: vmov.16 d20[3], r1
+; CHECK-NEXT: vext.16 d21, d16, d18, #3
+; CHECK-NEXT: vmov r0, r1, d20
+; CHECK-NEXT: vmov r2, r3, d21
+; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 13, i32 3, i32 2, i32 2, i32 9>
@@ -289,10 +289,10 @@ define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>
; CHECK-LABEL: test_elem_mismatch:
; CHECK: @ BB#0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT: vmov.32 r2, d16[0]
-; CHECK-NEXT: vmov.32 r0, d17[0]
-; CHECK-NEXT: vmov.16 d16[0], r2
-; CHECK-NEXT: vmov.16 d16[1], r0
+; CHECK-NEXT: vmov.32 r0, d16[0]
+; CHECK-NEXT: vmov.32 r2, d17[0]
+; CHECK-NEXT: vmov.16 d16[0], r0
+; CHECK-NEXT: vmov.16 d16[1], r2
; CHECK-NEXT: vstr d16, [r1]
; CHECK-NEXT: mov pc, lr
%tmp0 = load <2 x i64>, <2 x i64>* %src, align 16
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 03c0354aa1df..8fa5113d8a31 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -40,8 +40,8 @@ define void @test_add(float* %P, double* %D) {
define void @test_ext_round(float* %P, double* %D) {
;CHECK-LABEL: test_ext_round:
%a = load float, float* %P ; <float> [#uses=1]
-;CHECK: vcvt.f64.f32
-;CHECK: vcvt.f32.f64
+;CHECK-DAG: vcvt.f64.f32
+;CHECK-DAG: vcvt.f32.f64
%b = fpext float %a to double ; <double> [#uses=1]
%A = load double, double* %D ; <double> [#uses=1]
%B = fptrunc double %A to float ; <float> [#uses=1]
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index bdb384769741..c50e0beea4d1 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -78,7 +78,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
;Check for a post-increment updating load.
define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
;CHECK-LABEL: vld1Qi8_update:
-;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]!
+;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+|lr}}:64]!
%A = load i8*, i8** %ptr
%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %A, i32 8)
%tmp2 = getelementptr i8, i8* %A, i32 16
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 1ca16587bd91..6ef37c1b6678 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -14,7 +14,7 @@
define <8 x i8> @vld2i8(i8* %A) nounwind {
;CHECK-LABEL: vld2i8:
;Check the alignment value. Max for this instruction is 128 bits:
-;CHECK: vld2.8 {d16, d17}, [r0:64]
+;CHECK: vld2.8 {d16, d17}, [{{r[0-9]+|lr}}:64]
%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8.p0i8(i8* %A, i32 8)
%tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
@@ -25,7 +25,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
define <4 x i16> @vld2i16(i16* %A) nounwind {
;CHECK-LABEL: vld2i16:
;Check the alignment value. Max for this instruction is 128 bits:
-;CHECK: vld2.16 {d16, d17}, [r0:128]
+;CHECK: vld2.16 {d16, d17}, [{{r[0-9]+|lr}}:128]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16.p0i8(i8* %tmp0, i32 32)
%tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
@@ -59,7 +59,7 @@ define <2 x float> @vld2f(float* %A) nounwind {
;Check for a post-increment updating load.
define <2 x float> @vld2f_update(float** %ptr) nounwind {
;CHECK-LABEL: vld2f_update:
-;CHECK: vld2.32 {d16, d17}, [r1]!
+;CHECK: vld2.32 {d16, d17}, [{{r[0-9]+|lr}}]!
%A = load float*, float** %ptr
%tmp0 = bitcast float* %A to i8*
%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32.p0i8(i8* %tmp0, i32 1)
@@ -74,7 +74,7 @@ define <2 x float> @vld2f_update(float** %ptr) nounwind {
define <1 x i64> @vld2i64(i64* %A) nounwind {
;CHECK-LABEL: vld2i64:
;Check the alignment value. Max for this instruction is 128 bits:
-;CHECK: vld1.64 {d16, d17}, [r0:128]
+;CHECK: vld1.64 {d16, d17}, [{{r[0-9]+|lr}}:128]
%tmp0 = bitcast i64* %A to i8*
%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64.p0i8(i8* %tmp0, i32 32)
%tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
@@ -86,7 +86,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {
define <16 x i8> @vld2Qi8(i8* %A) nounwind {
;CHECK-LABEL: vld2Qi8:
;Check the alignment value. Max for this instruction is 256 bits:
-;CHECK: vld2.8 {d16, d17, d18, d19}, [r0:64]
+;CHECK: vld2.8 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:64]
%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8.p0i8(i8* %A, i32 8)
%tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
@@ -97,7 +97,7 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {
;Check for a post-increment updating load with register increment.
define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld2Qi8_update:
-;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1
+;CHECK: vld2.8 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:128], r1
%A = load i8*, i8** %ptr
%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8.p0i8(i8* %A, i32 16)
%tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
@@ -111,7 +111,7 @@ define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
define <8 x i16> @vld2Qi16(i16* %A) nounwind {
;CHECK-LABEL: vld2Qi16:
;Check the alignment value. Max for this instruction is 256 bits:
-;CHECK: vld2.16 {d16, d17, d18, d19}, [r0:128]
+;CHECK: vld2.16 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:128]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16.p0i8(i8* %tmp0, i32 16)
%tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
@@ -123,7 +123,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
define <4 x i32> @vld2Qi32(i32* %A) nounwind {
;CHECK-LABEL: vld2Qi32:
;Check the alignment value. Max for this instruction is 256 bits:
-;CHECK: vld2.32 {d16, d17, d18, d19}, [r0:256]
+;CHECK: vld2.32 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256]
%tmp0 = bitcast i32* %A to i8*
%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8* %tmp0, i32 64)
%tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index c3e8ee8691fd..0eaad0f90035 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -15,7 +15,7 @@
define <8 x i8> @vld3i8(i8* %A) nounwind {
;CHECK-LABEL: vld3i8:
;Check the alignment value. Max for this instruction is 64 bits:
-;CHECK: vld3.8 {d16, d17, d18}, [r0:64]
+;CHECK: vld3.8 {d16, d17, d18}, [{{r[0-9]+|lr}}:64]
%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A, i32 32)
%tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
@@ -37,7 +37,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
;Check for a post-increment updating load with register increment.
define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld3i16_update:
-;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}}
+;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+|lr}}], {{r[0-9]+|lr}}
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0i8(i8* %tmp0, i32 1)
@@ -74,7 +74,7 @@ define <2 x float> @vld3f(float* %A) nounwind {
define <1 x i64> @vld3i64(i64* %A) nounwind {
;CHECK-LABEL: vld3i64:
;Check the alignment value. Max for this instruction is 64 bits:
-;CHECK: vld1.64 {d16, d17, d18}, [r0:64]
+;CHECK: vld1.64 {d16, d17, d18}, [{{r[0-9]+|lr}}:64]
%tmp0 = bitcast i64* %A to i8*
%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16)
%tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
@@ -85,7 +85,7 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
;CHECK-LABEL: vld3i64_update:
-;CHECK: vld1.64 {d16, d17, d18}, [r1:64]!
+;CHECK: vld1.64 {d16, d17, d18}, [{{r[0-9]+|lr}}:64]!
%tmp0 = bitcast i64* %A to i8*
%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16)
%tmp5 = getelementptr i64, i64* %A, i32 3
@@ -99,8 +99,8 @@ define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
define <16 x i8> @vld3Qi8(i8* %A) nounwind {
;CHECK-LABEL: vld3Qi8:
;Check the alignment value. Max for this instruction is 64 bits:
-;CHECK: vld3.8 {d16, d18, d20}, [r0:64]!
-;CHECK: vld3.8 {d17, d19, d21}, [r0:64]
+;CHECK: vld3.8 {d16, d18, d20}, [{{r[0-9]+|lr}}:64]!
+;CHECK: vld3.8 {d17, d19, d21}, [{{r[0-9]+|lr}}:64]
%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8.p0i8(i8* %A, i32 32)
%tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
@@ -135,8 +135,8 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {
;Check for a post-increment updating load.
define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
;CHECK-LABEL: vld3Qi32_update:
-;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]!
-;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]!
+;CHECK: vld3.32 {d16, d18, d20}, {{\[}}[[R:r[0-9]+|lr]]]!
+;CHECK: vld3.32 {d17, d19, d21}, {{\[}}[[R]]]!
%A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8* %tmp0, i32 1)
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index 10570039a9d2..5663e6d41f02 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -14,7 +14,7 @@
define <8 x i8> @vld4i8(i8* %A) nounwind {
;CHECK-LABEL: vld4i8:
;Check the alignment value. Max for this instruction is 256 bits:
-;CHECK: vld4.8 {d16, d17, d18, d19}, [r0:64]
+;CHECK: vld4.8 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:64]
%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 8)
%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
@@ -25,7 +25,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
;Check for a post-increment updating load with register increment.
define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld4i8_update:
-;CHECK: vld4.8 {d16, d17, d18, d19}, [r2:128], r1
+;CHECK: vld4.8 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:128], r1
%A = load i8*, i8** %ptr
%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 16)
%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
@@ -39,7 +39,7 @@ define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
define <4 x i16> @vld4i16(i16* %A) nounwind {
;CHECK-LABEL: vld4i16:
;Check the alignment value. Max for this instruction is 256 bits:
-;CHECK: vld4.16 {d16, d17, d18, d19}, [r0:128]
+;CHECK: vld4.16 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:128]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16.p0i8(i8* %tmp0, i32 16)
%tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
@@ -51,7 +51,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
define <2 x i32> @vld4i32(i32* %A) nounwind {
;CHECK-LABEL: vld4i32:
;Check the alignment value. Max for this instruction is 256 bits:
-;CHECK: vld4.32 {d16, d17, d18, d19}, [r0:256]
+;CHECK: vld4.32 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256]
%tmp0 = bitcast i32* %A to i8*
%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8* %tmp0, i32 32)
%tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
@@ -74,7 +74,7 @@ define <2 x float> @vld4f(float* %A) nounwind {
define <1 x i64> @vld4i64(i64* %A) nounwind {
;CHECK-LABEL: vld4i64:
;Check the alignment value. Max for this instruction is 256 bits:
-;CHECK: vld1.64 {d16, d17, d18, d19}, [r0:256]
+;CHECK: vld1.64 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256]
%tmp0 = bitcast i64* %A to i8*
%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %tmp0, i32 64)
%tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
@@ -85,7 +85,7 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
;CHECK-LABEL: vld4i64_update:
-;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
+;CHECK: vld1.64 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256]!
%tmp0 = bitcast i64* %A to i8*
%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %tmp0, i32 64)
%tmp5 = getelementptr i64, i64* %A, i32 4
@@ -99,8 +99,8 @@ define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
define <16 x i8> @vld4Qi8(i8* %A) nounwind {
;CHECK-LABEL: vld4Qi8:
;Check the alignment value. Max for this instruction is 256 bits:
-;CHECK: vld4.8 {d16, d18, d20, d22}, [r0:256]!
-;CHECK: vld4.8 {d17, d19, d21, d23}, [r0:256]
+;CHECK: vld4.8 {d16, d18, d20, d22}, [{{r[0-9]+|lr}}:256]!
+;CHECK: vld4.8 {d17, d19, d21, d23}, [{{r[0-9]+|lr}}:256]
%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8.p0i8(i8* %A, i32 64)
%tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
@@ -111,8 +111,8 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind {
define <8 x i16> @vld4Qi16(i16* %A) nounwind {
;CHECK-LABEL: vld4Qi16:
;Check for no alignment specifier.
-;CHECK: vld4.16 {d16, d18, d20, d22}, [r0]!
-;CHECK: vld4.16 {d17, d19, d21, d23}, [r0]
+;CHECK: vld4.16 {d16, d18, d20, d22}, [{{r[0-9]+|lr}}]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [{{r[0-9]+|lr}}]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
@@ -124,8 +124,8 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {
;Check for a post-increment updating load.
define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
;CHECK-LABEL: vld4Qi16_update:
-;CHECK: vld4.16 {d16, d18, d20, d22}, [r1:64]!
-;CHECK: vld4.16 {d17, d19, d21, d23}, [r1:64]!
+;CHECK: vld4.16 {d16, d18, d20, d22}, [{{r[0-9]+|lr}}:64]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [{{r[0-9]+|lr}}:64]!
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16.p0i8(i8* %tmp0, i32 8)
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index 71ca0f791524..72f9434fd10a 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -3,7 +3,7 @@
define <8 x i8> @vld1dupi8(i8* %A) nounwind {
;CHECK-LABEL: vld1dupi8:
;Check the (default) alignment value.
-;CHECK: vld1.8 {d16[]}, [r0]
+;CHECK: vld1.8 {d16[]}, [{{r[0-9]+|lr}}]
%tmp1 = load i8, i8* %A, align 8
%tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0
%tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer
@@ -13,7 +13,7 @@ define <8 x i8> @vld1dupi8(i8* %A) nounwind {
define <8 x i8> @vld1dupi8_preinc(i8** noalias nocapture %a, i32 %b) nounwind {
entry:
;CHECK-LABEL: vld1dupi8_preinc:
-;CHECK: vld1.8 {d16[]}, [r1]
+;CHECK: vld1.8 {d16[]}, [{{r[0-9]+|lr}}]
%0 = load i8*, i8** %a, align 4
%add.ptr = getelementptr inbounds i8, i8* %0, i32 %b
%1 = load i8, i8* %add.ptr, align 1
@@ -26,7 +26,7 @@ entry:
define <8 x i8> @vld1dupi8_postinc_fixed(i8** noalias nocapture %a) nounwind {
entry:
;CHECK-LABEL: vld1dupi8_postinc_fixed:
-;CHECK: vld1.8 {d16[]}, [r1]!
+;CHECK: vld1.8 {d16[]}, [{{r[0-9]+|lr}}]!
%0 = load i8*, i8** %a, align 4
%1 = load i8, i8* %0, align 1
%2 = insertelement <8 x i8> undef, i8 %1, i32 0
@@ -39,7 +39,7 @@ entry:
define <8 x i8> @vld1dupi8_postinc_register(i8** noalias nocapture %a, i32 %n) nounwind {
entry:
;CHECK-LABEL: vld1dupi8_postinc_register:
-;CHECK: vld1.8 {d16[]}, [r2], r1
+;CHECK: vld1.8 {d16[]}, [{{r[0-9]+|lr}}], r1
%0 = load i8*, i8** %a, align 4
%1 = load i8, i8* %0, align 1
%2 = insertelement <8 x i8> undef, i8 %1, i32 0
@@ -52,7 +52,7 @@ entry:
define <16 x i8> @vld1dupqi8_preinc(i8** noalias nocapture %a, i32 %b) nounwind {
entry:
;CHECK-LABEL: vld1dupqi8_preinc:
-;CHECK: vld1.8 {d16[], d17[]}, [r1]
+;CHECK: vld1.8 {d16[], d17[]}, [{{r[0-9]+|lr}}]
%0 = load i8*, i8** %a, align 4
%add.ptr = getelementptr inbounds i8, i8* %0, i32 %b
%1 = load i8, i8* %add.ptr, align 1
@@ -65,7 +65,7 @@ entry:
define <16 x i8> @vld1dupqi8_postinc_fixed(i8** noalias nocapture %a) nounwind {
entry:
;CHECK-LABEL: vld1dupqi8_postinc_fixed:
-;CHECK: vld1.8 {d16[], d17[]}, [r1]!
+;CHECK: vld1.8 {d16[], d17[]}, [{{r[0-9]+|lr}}]!
%0 = load i8*, i8** %a, align 4
%1 = load i8, i8* %0, align 1
%2 = insertelement <16 x i8> undef, i8 %1, i32 0
@@ -78,7 +78,7 @@ entry:
define <16 x i8> @vld1dupqi8_postinc_register(i8** noalias nocapture %a, i32 %n) nounwind {
entry:
;CHECK-LABEL: vld1dupqi8_postinc_register:
-;CHECK: vld1.8 {d16[], d17[]}, [r2], r1
+;CHECK: vld1.8 {d16[], d17[]}, [{{r[0-9]+|lr}}], r1
%0 = load i8*, i8** %a, align 4
%1 = load i8, i8* %0, align 1
%2 = insertelement <16 x i8> undef, i8 %1, i32 0
@@ -91,7 +91,7 @@ entry:
define <4 x i16> @vld1dupi16(i16* %A) nounwind {
;CHECK-LABEL: vld1dupi16:
;Check the alignment value. Max for this instruction is 16 bits:
-;CHECK: vld1.16 {d16[]}, [r0:16]
+;CHECK: vld1.16 {d16[]}, [{{r[0-9]+|lr}}:16]
%tmp1 = load i16, i16* %A, align 8
%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -100,7 +100,7 @@ define <4 x i16> @vld1dupi16(i16* %A) nounwind {
define <4 x i16> @vld1dupi16_misaligned(i16* %A) nounwind {
;CHECK-LABEL: vld1dupi16_misaligned:
-;CHECK: vld1.16 {d16[]}, [r0]
+;CHECK: vld1.16 {d16[]}, [{{r[0-9]+|lr}}]
%tmp1 = load i16, i16* %A, align 1
%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -110,7 +110,7 @@ define <4 x i16> @vld1dupi16_misaligned(i16* %A) nounwind {
; This sort of looks like a vld1dup, but there's an extension in the way.
define <4 x i16> @load_i16_dup_zext(i8* %A) nounwind {
;CHECK-LABEL: load_i16_dup_zext:
-;CHECK: ldrb r0, [r0]
+;CHECK: ldrb r0, [{{r[0-9]+|lr}}]
;CHECK-NEXT: vdup.16 d16, r0
%tmp1 = load i8, i8* %A, align 1
%tmp2 = zext i8 %tmp1 to i16
@@ -122,7 +122,7 @@ define <4 x i16> @load_i16_dup_zext(i8* %A) nounwind {
; This sort of looks like a vld1dup, but there's an extension in the way.
define <4 x i16> @load_i16_dup_sext(i8* %A) nounwind {
;CHECK-LABEL: load_i16_dup_sext:
-;CHECK: ldrsb r0, [r0]
+;CHECK: ldrsb r0, [{{r[0-9]+|lr}}]
;CHECK-NEXT: vdup.16 d16, r0
%tmp1 = load i8, i8* %A, align 1
%tmp2 = sext i8 %tmp1 to i16
@@ -134,7 +134,7 @@ define <4 x i16> @load_i16_dup_sext(i8* %A) nounwind {
; This sort of looks like a vld1dup, but there's an extension in the way.
define <8 x i16> @load_i16_dupq_zext(i8* %A) nounwind {
;CHECK-LABEL: load_i16_dupq_zext:
-;CHECK: ldrb r0, [r0]
+;CHECK: ldrb r0, [{{r[0-9]+|lr}}]
;CHECK-NEXT: vdup.16 q8, r0
%tmp1 = load i8, i8* %A, align 1
%tmp2 = zext i8 %tmp1 to i16
@@ -146,7 +146,7 @@ define <8 x i16> @load_i16_dupq_zext(i8* %A) nounwind {
define <2 x i32> @vld1dupi32(i32* %A) nounwind {
;CHECK-LABEL: vld1dupi32:
;Check the alignment value. Max for this instruction is 32 bits:
-;CHECK: vld1.32 {d16[]}, [r0:32]
+;CHECK: vld1.32 {d16[]}, [{{r[0-9]+|lr}}:32]
%tmp1 = load i32, i32* %A, align 8
%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -156,7 +156,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
; This sort of looks like a vld1dup, but there's an extension in the way.
define <4 x i32> @load_i32_dup_zext(i8* %A) nounwind {
;CHECK-LABEL: load_i32_dup_zext:
-;CHECK: ldrb r0, [r0]
+;CHECK: ldrb r0, [{{r[0-9]+|lr}}]
;CHECK-NEXT: vdup.32 q8, r0
%tmp1 = load i8, i8* %A, align 1
%tmp2 = zext i8 %tmp1 to i32
@@ -168,7 +168,7 @@ define <4 x i32> @load_i32_dup_zext(i8* %A) nounwind {
; This sort of looks like a vld1dup, but there's an extension in the way.
define <4 x i32> @load_i32_dup_sext(i8* %A) nounwind {
;CHECK-LABEL: load_i32_dup_sext:
-;CHECK: ldrsb r0, [r0]
+;CHECK: ldrsb r0, [{{r[0-9]+|lr}}]
;CHECK-NEXT: vdup.32 q8, r0
%tmp1 = load i8, i8* %A, align 1
%tmp2 = sext i8 %tmp1 to i32
@@ -179,7 +179,7 @@ define <4 x i32> @load_i32_dup_sext(i8* %A) nounwind {
define <2 x float> @vld1dupf(float* %A) nounwind {
;CHECK-LABEL: vld1dupf:
-;CHECK: vld1.32 {d16[]}, [r0:32]
+;CHECK: vld1.32 {d16[]}, [{{r[0-9]+|lr}}:32]
%tmp0 = load float, float* %A
%tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
@@ -189,7 +189,7 @@ define <2 x float> @vld1dupf(float* %A) nounwind {
define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
;CHECK-LABEL: vld1dupQi8:
;Check the (default) alignment value.
-;CHECK: vld1.8 {d16[], d17[]}, [r0]
+;CHECK: vld1.8 {d16[], d17[]}, [{{r[0-9]+|lr}}]
%tmp1 = load i8, i8* %A, align 8
%tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0
%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -198,7 +198,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
define <4 x float> @vld1dupQf(float* %A) nounwind {
;CHECK-LABEL: vld1dupQf:
-;CHECK: vld1.32 {d16[], d17[]}, [r0:32]
+;CHECK: vld1.32 {d16[], d17[]}, [{{r[0-9]+|lr}}:32]
%tmp0 = load float, float* %A
%tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
@@ -212,7 +212,7 @@ define <4 x float> @vld1dupQf(float* %A) nounwind {
define <8 x i8> @vld2dupi8(i8* %A) nounwind {
;CHECK-LABEL: vld2dupi8:
;Check the (default) alignment value.
-;CHECK: vld2.8 {d16[], d17[]}, [r0]
+;CHECK: vld2.8 {d16[], d17[]}, [{{r[0-9]+|lr}}]
%tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
%tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
@@ -283,7 +283,7 @@ define <4 x i16> @vld2dupi16(i8* %A) nounwind {
;CHECK-LABEL: vld2dupi16:
;Check that a power-of-two alignment smaller than the total size of the memory
;being loaded is ignored.
-;CHECK: vld2.16 {d16[], d17[]}, [r0]
+;CHECK: vld2.16 {d16[], d17[]}, [{{r[0-9]+|lr}}]
%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -296,7 +296,7 @@ define <4 x i16> @vld2dupi16(i8* %A) nounwind {
;Check for a post-increment updating load.
define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
;CHECK-LABEL: vld2dupi16_update:
-;CHECK: vld2.16 {d16[], d17[]}, [r1]!
+;CHECK: vld2.16 {d16[], d17[]}, [{{r[0-9]+|lr}}]!
%A = load i16*, i16** %ptr
%A2 = bitcast i16* %A to i8*
%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
@@ -313,7 +313,7 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind {
;CHECK-LABEL: vld2dupi16_odd_update:
;CHECK: mov [[INC:r[0-9]+]], #6
-;CHECK: vld2.16 {d16[], d17[]}, [r1], [[INC]]
+;CHECK: vld2.16 {d16[], d17[]}, [{{r[0-9]+|lr}}], [[INC]]
%A = load i16*, i16** %ptr
%A2 = bitcast i16* %A to i8*
%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
@@ -330,7 +330,7 @@ define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind {
define <2 x i32> @vld2dupi32(i8* %A) nounwind {
;CHECK-LABEL: vld2dupi32:
;Check the alignment value. Max for this instruction is 64 bits:
-;CHECK: vld2.32 {d16[], d17[]}, [r0:64]
+;CHECK: vld2.32 {d16[], d17[]}, [{{r[0-9]+|lr}}:64]
%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -350,7 +350,7 @@ declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8*, <2 x
;Check for a post-increment updating load with register increment.
define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld3dupi8_update:
-;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
+;CHECK: vld3.8 {d16[], d17[], d18[]}, [{{r[0-9]+|lr}}], r1
%A = load i8*, i8** %ptr
%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0
@@ -369,7 +369,7 @@ define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
define <4 x i16> @vld3dupi16(i8* %A) nounwind {
;CHECK-LABEL: vld3dupi16:
;Check the (default) alignment value. VLD3 does not support alignment.
-;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
+;CHECK: vld3.16 {d16[], d17[], d18[]}, [{{r[0-9]+|lr}}]
%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -391,7 +391,7 @@ declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8*, <4 x
;Check for a post-increment updating load.
define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
;CHECK-LABEL: vld4dupi16_update:
-;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
+;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [{{r[0-9]+|lr}}]!
%A = load i16*, i16** %ptr
%A2 = bitcast i16* %A to i8*
%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
@@ -415,7 +415,7 @@ define <2 x i32> @vld4dupi32(i8* %A) nounwind {
;CHECK-LABEL: vld4dupi32:
;Check the alignment value. An 8-byte alignment is allowed here even though
;it is smaller than the total size of the memory being loaded.
-;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0:64]
+;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [{{r[0-9]+|lr}}:64]
%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 866641f3fbbd..f5c0f09ed440 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -308,7 +308,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;Check for a post-increment updating load with register increment.
define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
;CHECK-LABEL: vld3laneQi16_update:
-;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
+;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+|lr}}], {{r[0-9]+}}
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll
index 3409d37a31f4..3fa93bb43f03 100644
--- a/test/CodeGen/ARM/vpadd.ll
+++ b/test/CodeGen/ARM/vpadd.ll
@@ -285,17 +285,17 @@ define void @addCombineToVPADDLq_s8(<16 x i8> *%cbcr, <8 x i16> *%X) nounwind ss
define void @addCombineToVPADDL_s8(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp {
; CHECK-LABEL: addCombineToVPADDL_s8:
; CHECK: @ BB#0:
-; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
-; CHECK-NEXT: vmov.i16 d18, #0x8
-; CHECK-NEXT: vneg.s16 d18, d18
-; CHECK-NEXT: vext.8 d19, d16, d16, #1
-; CHECK-NEXT: vshl.i16 d16, d16, #8
-; CHECK-NEXT: vshl.i16 d17, d19, #8
-; CHECK-NEXT: vshl.s16 d16, d16, d18
-; CHECK-NEXT: vshl.s16 d17, d17, d18
-; CHECK-NEXT: vadd.i16 d16, d17, d16
-; CHECK-NEXT: vstr d16, [r1]
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vmov.i16 d16, #0x8
+; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT: vext.8 d17, d18, d16, #1
+; CHECK-NEXT: vneg.s16 d16, d16
+; CHECK-NEXT: vshl.i16 d18, d18, #8
+; CHECK-NEXT: vshl.i16 d17, d17, #8
+; CHECK-NEXT: vshl.s16 d18, d18, d16
+; CHECK-NEXT: vshl.s16 d16, d17, d16
+; CHECK-NEXT: vadd.i16 d16, d16, d18
+; CHECK-NEXT: vstr d16, [r1]
+; CHECK-NEXT: mov pc, lr
%tmp = load <16 x i8>, <16 x i8>* %cbcr
%tmp1 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index 404129a7e6ad..e351a2ec2373 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -39,7 +39,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
;Check for a post-increment updating store.
define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
;CHECK-LABEL: vst1f_update:
-;CHECK: vst1.32 {d16}, [r1]!
+;CHECK: vst1.32 {d16}, [r{{[0-9]+}}]!
%A = load float*, float** %ptr
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index 188955102290..afa4321c91a0 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -12,7 +12,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
;Check for a post-increment updating store with register increment.
define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
;CHECK-LABEL: vst4i8_update:
-;CHECK: vst4.8 {d16, d17, d18, d19}, [r1:128], r2
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r{{[0-9]+}}:128], r2
%A = load i8*, i8** %ptr
%tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
@@ -62,7 +62,7 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vst4i64_update:
-;CHECK: vst1.64 {d16, d17, d18, d19}, [r1]!
+;CHECK: vst1.64 {d16, d17, d18, d19}, [r{{[0-9]+}}]!
%A = load i64*, i64** %ptr
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>, <1 x i64>* %B
@@ -116,8 +116,8 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
;Check for a post-increment updating store.
define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
;CHECK-LABEL: vst4Qf_update:
-;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!
-;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!
+ ;CHECK: vst4.32 {d16, d18, d20, d22}, [r[[REG:[0-9]+]]]!
+;CHECK: vst4.32 {d17, d19, d21, d23}, [r[[REG]]]!
%A = load float*, float** %ptr
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 7e130ea01b64..49af0be92316 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -127,7 +127,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
;Check for a post-increment updating store with register increment.
define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
;CHECK-LABEL: vst2lanei16_update:
-;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
+;CHECK: vst2.16 {d16[1], d17[1]}, [r{{[0-9]+}}], r{{[0-9]+}}
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
@@ -251,7 +251,7 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;Check for a post-increment updating store.
define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vst3laneQi32_update:
-;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
+;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r{{[0-9]+}}]!
%A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
@@ -292,7 +292,7 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
;Check for a post-increment updating store.
define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst4lanei8_update:
-;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r{{[0-9]+}}:32]!
%A = load i8*, i8** %ptr
%tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
index 0a5235df319f..24090cfd6c65 100644
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -324,26 +324,23 @@ define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8
; truncate from i32 to i16 and one vmovn.i16 to perform the final truncation for i8.
; CHECK-LABEL: cmpsel_trunc:
; CHECK: @ BB#0:
-; CHECK-NEXT: .save {r4, r5, r11, lr}
-; CHECK-NEXT: push {r4, r5, r11, lr}
-; CHECK-NEXT: add r4, sp, #64
-; CHECK-NEXT: add r5, sp, #32
-; CHECK-NEXT: add r12, sp, #48
-; CHECK-NEXT: add lr, sp, #16
-; CHECK-NEXT: vld1.64 {d16, d17}, [r5]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r4]
-; CHECK-NEXT: vld1.64 {d20, d21}, [lr]
-; CHECK-NEXT: vld1.64 {d22, d23}, [r12]
-; CHECK-NEXT: vcgt.u32 q8, q9, q8
-; CHECK-NEXT: vcgt.u32 q9, q11, q10
-; CHECK-NEXT: vmovn.i32 d17, q8
-; CHECK-NEXT: vmovn.i32 d16, q9
-; CHECK-NEXT: vmov d18, r2, r3
-; CHECK-NEXT: vmov d19, r0, r1
-; CHECK-NEXT: vmovn.i16 d16, q8
-; CHECK-NEXT: vbsl d16, d19, d18
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: pop {r4, r5, r11, lr}
+; CHECK-NEXT: add r12, sp, #16
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: add r12, sp, #48
+; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
+; CHECK-NEXT: add r12, sp, #32
+; CHECK-NEXT: vcgt.u32 q8, q10, q8
+; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
+; CHECK-NEXT: vcgt.u32 q9, q10, q9
+; CHECK-NEXT: vmov d20, r2, r3
+; CHECK-NEXT: vmovn.i32 d17, q8
+; CHECK-NEXT: vmovn.i32 d16, q9
+; CHECK-NEXT: vmov d18, r0, r1
+; CHECK-NEXT: vmovn.i16 d16, q8
+; CHECK-NEXT: vbsl d16, d18, d20
+; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
%c = icmp ult <8 x i32> %cmp0, %cmp1
%res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1
@@ -356,28 +353,28 @@ define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8
define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
; CHECK-LABEL: vuzp_trunc_and_shuffle:
; CHECK: @ BB#0:
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: ldr r12, [sp, #40]
-; CHECK-NEXT: add lr, sp, #24
-; CHECK-NEXT: add r4, sp, #8
-; CHECK-NEXT: vld1.64 {d16, d17}, [r4]
-; CHECK-NEXT: vld1.64 {d18, d19}, [lr]
-; CHECK-NEXT: vld1.32 {d20[0]}, [r12:32]
-; CHECK-NEXT: vcgt.u32 q8, q9, q8
-; CHECK-NEXT: vmovn.i32 d16, q8
-; CHECK-NEXT: vmov.i8 d17, #0x7
-; CHECK-NEXT: vneg.s8 d17, d17
-; CHECK-NEXT: vmovl.u8 q9, d20
-; CHECK-NEXT: vuzp.8 d16, d18
-; CHECK-NEXT: vshl.i8 d16, d16, #7
-; CHECK-NEXT: vmov d18, r2, r3
-; CHECK-NEXT: vmov d19, r0, r1
-; CHECK-NEXT: vshl.s8 d16, d16, d17
-; CHECK-NEXT: vbsl d16, d19, d18
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: pop {r4, lr}
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: add r12, sp, #8
+; CHECK-NEXT: add lr, sp, #24
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: ldr r12, [sp, #40]
+; CHECK-NEXT: vld1.64 {d18, d19}, [lr]
+; CHECK-NEXT: vcgt.u32 q8, q9, q8
+; CHECK-NEXT: vld1.32 {d18[0]}, [r12:32]
+; CHECK-NEXT: vmov.i8 d19, #0x7
+; CHECK-NEXT: vmovl.u8 q10, d18
+; CHECK-NEXT: vmovn.i32 d16, q8
+; CHECK-NEXT: vneg.s8 d17, d19
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vuzp.8 d16, d20
+; CHECK-NEXT: vshl.i8 d16, d16, #7
+; CHECK-NEXT: vshl.s8 d16, d16, d17
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vbsl d16, d17, d18
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: pop {r11, lr}
+; CHECK-NEXT: mov pc, lr
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
%cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
@@ -392,25 +389,22 @@ define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1,
; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right:
; CHECK: @ BB#0:
-; CHECK-NEXT: .save {r11, lr}
-; CHECK-NEXT: push {r11, lr}
-; CHECK-NEXT: add r12, sp, #24
-; CHECK-NEXT: add lr, sp, #8
-; CHECK-NEXT: vld1.64 {d16, d17}, [lr]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vcgt.u32 q8, q9, q8
-; CHECK-NEXT: vmov d19, r0, r1
-; CHECK-NEXT: vmovn.i32 d16, q8
-; CHECK-NEXT: vmov.i8 d17, #0x7
-; CHECK-NEXT: vuzp.8 d16, d18
-; CHECK-NEXT: vneg.s8 d17, d17
-; CHECK-NEXT: vshl.i8 d16, d16, #7
-; CHECK-NEXT: vmov d18, r2, r3
-; CHECK-NEXT: vshl.s8 d16, d16, d17
-; CHECK-NEXT: vbsl d16, d19, d18
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: pop {r11, lr}
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: add r12, sp, #16
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vcgt.u32 q8, q9, q8
+; CHECK-NEXT: vmov.i8 d18, #0x7
+; CHECK-NEXT: vmovn.i32 d16, q8
+; CHECK-NEXT: vuzp.8 d16, d17
+; CHECK-NEXT: vneg.s8 d17, d18
+; CHECK-NEXT: vshl.i8 d16, d16, #7
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vshl.s8 d16, d16, d17
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vbsl d16, d17, d18
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
%cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
@@ -423,26 +417,23 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1
define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left:
; CHECK: @ BB#0:
-; CHECK-NEXT: .save {r11, lr}
-; CHECK-NEXT: push {r11, lr}
-; CHECK-NEXT: add r12, sp, #24
-; CHECK-NEXT: add lr, sp, #8
-; CHECK-NEXT: vldr d20, .LCPI22_0
-; CHECK-NEXT: vld1.64 {d16, d17}, [lr]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vcgt.u32 q8, q9, q8
-; CHECK-NEXT: vmov d18, r2, r3
-; CHECK-NEXT: vmov d19, r0, r1
-; CHECK-NEXT: vmovn.i32 d16, q8
-; CHECK-NEXT: vmov.i8 d17, #0x7
-; CHECK-NEXT: vtbl.8 d16, {d16}, d20
-; CHECK-NEXT: vneg.s8 d17, d17
-; CHECK-NEXT: vshl.i8 d16, d16, #7
-; CHECK-NEXT: vshl.s8 d16, d16, d17
-; CHECK-NEXT: vbsl d16, d19, d18
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: pop {r11, lr}
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: add r12, sp, #16
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vcgt.u32 q8, q9, q8
+; CHECK-NEXT: vldr d18, .LCPI22_0
+; CHECK-NEXT: vmov.i8 d19, #0x7
+; CHECK-NEXT: vmovn.i32 d16, q8
+; CHECK-NEXT: vtbl.8 d16, {d16}, d18
+; CHECK-NEXT: vneg.s8 d17, d19
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vshl.i8 d16, d16, #7
+; CHECK-NEXT: vshl.s8 d16, d16, d17
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vbsl d16, d17, d18
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ BB#1:
; CHECK-NEXT: .LCPI22_0:
@@ -468,65 +459,63 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
; CHECK-LABEL: vuzp_wide_type:
; CHECK: @ BB#0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
-; CHECK-NEXT: .setfp r11, sp, #16
-; CHECK-NEXT: add r11, sp, #16
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, sp, #8
-; CHECK-NEXT: bic sp, sp, #15
-; CHECK-NEXT: add r5, r11, #52
-; CHECK-NEXT: add r7, r11, #32
-; CHECK-NEXT: add r4, r11, #44
-; CHECK-NEXT: add r6, r11, #24
-; CHECK-NEXT: add r12, r11, #60
-; CHECK-NEXT: add lr, r11, #40
-; CHECK-NEXT: vld1.32 {d17[0]}, [r7:32]
-; CHECK-NEXT: vld1.32 {d19[0]}, [r5:32]
-; CHECK-NEXT: vld1.32 {d22[0]}, [r12:32]
-; CHECK-NEXT: ldr r12, [r11, #64]
-; CHECK-NEXT: vld1.32 {d20[0]}, [lr:32]
-; CHECK-NEXT: add r7, r11, #48
-; CHECK-NEXT: add r5, r11, #28
-; CHECK-NEXT: vld1.32 {d16[0]}, [r6:32]
-; CHECK-NEXT: vld1.32 {d18[0]}, [r4:32]
-; CHECK-NEXT: add r6, r11, #56
-; CHECK-NEXT: add r4, r11, #36
-; CHECK-NEXT: vcgt.u32 q10, q11, q10
-; CHECK-NEXT: vld1.32 {d19[1]}, [r6:32]
-; CHECK-NEXT: vld1.32 {d17[1]}, [r4:32]
-; CHECK-NEXT: add r6, r12, #4
-; CHECK-NEXT: vld1.32 {d18[1]}, [r7:32]
-; CHECK-NEXT: vld1.32 {d16[1]}, [r5:32]
-; CHECK-NEXT: ldr r7, [r12]
-; CHECK-NEXT: vcgt.u32 q8, q9, q8
-; CHECK-NEXT: vmovn.i32 d18, q10
-; CHECK-NEXT: vmov.32 d21[0], r7
-; CHECK-NEXT: vmovn.i32 d16, q8
-; CHECK-NEXT: vmov.u8 r7, d21[3]
-; CHECK-NEXT: vmov.i8 d17, #0x7
-; CHECK-NEXT: vuzp.8 d16, d18
-; CHECK-NEXT: vmov.8 d23[0], r7
-; CHECK-NEXT: vneg.s8 d17, d17
-; CHECK-NEXT: add r7, r11, #8
-; CHECK-NEXT: vldr d18, .LCPI23_0
-; CHECK-NEXT: vld1.8 {d23[1]}, [r6]
-; CHECK-NEXT: vshl.i8 d16, d16, #7
-; CHECK-NEXT: vshl.s8 d20, d16, d17
-; CHECK-NEXT: vmov.i8 q8, #0x7
-; CHECK-NEXT: vneg.s8 q8, q8
-; CHECK-NEXT: vtbl.8 d22, {d20, d21}, d18
-; CHECK-NEXT: vld1.64 {d18, d19}, [r7]
-; CHECK-NEXT: vshl.i8 q10, q11, #7
-; CHECK-NEXT: vmov d23, r2, r3
-; CHECK-NEXT: vmov d22, r0, r1
-; CHECK-NEXT: vshl.s8 q8, q10, q8
-; CHECK-NEXT: vbsl q8, q11, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: sub sp, r11, #16
-; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: .save {r4, r10, r11, lr}
+; CHECK-NEXT: push {r4, r10, r11, lr}
+; CHECK-NEXT: .setfp r11, sp, #8
+; CHECK-NEXT: add r11, sp, #8
+; CHECK-NEXT: bic sp, sp, #15
+; CHECK-NEXT: add r12, r11, #32
+; CHECK-NEXT: add lr, r11, #60
+; CHECK-NEXT: vld1.32 {d17[0]}, [r12:32]
+; CHECK-NEXT: add r12, r11, #24
+; CHECK-NEXT: vld1.32 {d22[0]}, [lr:32]
+; CHECK-NEXT: add lr, r11, #36
+; CHECK-NEXT: vld1.32 {d16[0]}, [r12:32]
+; CHECK-NEXT: add r12, r11, #52
+; CHECK-NEXT: vld1.32 {d19[0]}, [r12:32]
+; CHECK-NEXT: add r12, r11, #44
+; CHECK-NEXT: vld1.32 {d17[1]}, [lr:32]
+; CHECK-NEXT: vld1.32 {d18[0]}, [r12:32]
+; CHECK-NEXT: add r12, r11, #40
+; CHECK-NEXT: vld1.32 {d20[0]}, [r12:32]
+; CHECK-NEXT: ldr r12, [r11, #64]
+; CHECK-NEXT: vcgt.u32 q10, q11, q10
+; CHECK-NEXT: ldr r4, [r12]
+; CHECK-NEXT: vmov.32 d25[0], r4
+; CHECK-NEXT: add r4, r11, #28
+; CHECK-NEXT: vld1.32 {d16[1]}, [r4:32]
+; CHECK-NEXT: add r4, r11, #56
+; CHECK-NEXT: vld1.32 {d19[1]}, [r4:32]
+; CHECK-NEXT: add r4, r11, #48
+; CHECK-NEXT: vmov.u8 lr, d25[3]
+; CHECK-NEXT: vld1.32 {d18[1]}, [r4:32]
+; CHECK-NEXT: add r4, r12, #4
+; CHECK-NEXT: vcgt.u32 q8, q9, q8
+; CHECK-NEXT: vmovn.i32 d19, q10
+; CHECK-NEXT: vldr d20, .LCPI23_0
+; CHECK-NEXT: vmov.i8 d18, #0x7
+; CHECK-NEXT: vmovn.i32 d16, q8
+; CHECK-NEXT: vneg.s8 d17, d18
+; CHECK-NEXT: vuzp.8 d16, d19
+; CHECK-NEXT: vmov.i8 q9, #0x7
+; CHECK-NEXT: vshl.i8 d16, d16, #7
+; CHECK-NEXT: vneg.s8 q9, q9
+; CHECK-NEXT: vshl.s8 d24, d16, d17
+; CHECK-NEXT: vmov.8 d17[0], lr
+; CHECK-NEXT: vtbl.8 d16, {d24, d25}, d20
+; CHECK-NEXT: vld1.8 {d17[1]}, [r4]
+; CHECK-NEXT: add r4, r11, #8
+; CHECK-NEXT: vshl.i8 q8, q8, #7
+; CHECK-NEXT: vld1.64 {d20, d21}, [r4]
+; CHECK-NEXT: vshl.s8 q8, q8, q9
+; CHECK-NEXT: vmov d19, r2, r3
+; CHECK-NEXT: vmov d18, r0, r1
+; CHECK-NEXT: vbsl q8, q9, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: sub sp, r11, #8
+; CHECK-NEXT: pop {r4, r10, r11, lr}
+; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ BB#1:
; CHECK-NEXT: .LCPI23_0:
diff --git a/test/CodeGen/BPF/remove_truncate_1.ll b/test/CodeGen/BPF/remove_truncate_1.ll
new file mode 100644
index 000000000000..65433853b9d5
--- /dev/null
+++ b/test/CodeGen/BPF/remove_truncate_1.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -march=bpf -verify-machineinstrs | FileCheck %s
+
+; Source code:
+; struct xdp_md {
+; unsigned data;
+; unsigned data_end;
+; };
+;
+; int gbl;
+; int xdp_dummy(struct xdp_md *xdp)
+; {
+; char tmp;
+; long addr;
+;
+; if (gbl) {
+; long addr1 = (long)xdp->data;
+; tmp = *(char *)addr1;
+; if (tmp == 1)
+; return 3;
+; } else {
+; tmp = *(volatile char *)(long)xdp->data_end;
+; if (tmp == 1)
+; return 2;
+; }
+; addr = (long)xdp->data;
+; tmp = *(volatile char *)addr;
+; if (tmp == 0)
+; return 1;
+; return 0;
+; }
+
+%struct.xdp_md = type { i32, i32 }
+
+@gbl = common local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: norecurse nounwind
+define i32 @xdp_dummy(%struct.xdp_md* nocapture readonly %xdp) local_unnamed_addr #0 {
+entry:
+ %0 = load i32, i32* @gbl, align 4
+ %tobool = icmp eq i32 %0, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ %data = getelementptr inbounds %struct.xdp_md, %struct.xdp_md* %xdp, i64 0, i32 0
+ %1 = load i32, i32* %data, align 4
+ %conv = zext i32 %1 to i64
+ %2 = inttoptr i64 %conv to i8*
+ %3 = load i8, i8* %2, align 1
+ %cmp = icmp eq i8 %3, 1
+ br i1 %cmp, label %cleanup20, label %if.end12
+; CHECK: r1 = *(u32 *)(r1 + 0)
+; CHECK: r2 = *(u8 *)(r1 + 0)
+
+if.else: ; preds = %entry
+ %data_end = getelementptr inbounds %struct.xdp_md, %struct.xdp_md* %xdp, i64 0, i32 1
+ %4 = load i32, i32* %data_end, align 4
+ %conv6 = zext i32 %4 to i64
+; CHECK: r2 = *(u32 *)(r1 + 4)
+ %5 = inttoptr i64 %conv6 to i8*
+ %6 = load volatile i8, i8* %5, align 1
+ %cmp8 = icmp eq i8 %6, 1
+ br i1 %cmp8, label %cleanup20, label %if.else.if.end12_crit_edge
+
+if.else.if.end12_crit_edge: ; preds = %if.else
+ %data13.phi.trans.insert = getelementptr inbounds %struct.xdp_md, %struct.xdp_md* %xdp, i64 0, i32 0
+ %.pre = load i32, i32* %data13.phi.trans.insert, align 4
+ br label %if.end12
+; CHECK: r1 = *(u32 *)(r1 + 0)
+
+if.end12: ; preds = %if.else.if.end12_crit_edge, %if.then
+ %7 = phi i32 [ %.pre, %if.else.if.end12_crit_edge ], [ %1, %if.then ]
+ %conv14 = zext i32 %7 to i64
+; CHECK-NOT: r1 <<= 32
+; CHECK-NOT: r1 >>= 32
+ %8 = inttoptr i64 %conv14 to i8*
+ %9 = load volatile i8, i8* %8, align 1
+; CHECK: r1 = *(u8 *)(r1 + 0)
+ %cmp16 = icmp eq i8 %9, 0
+ %.28 = zext i1 %cmp16 to i32
+ br label %cleanup20
+
+cleanup20: ; preds = %if.then, %if.end12, %if.else
+ %retval.1 = phi i32 [ 3, %if.then ], [ 2, %if.else ], [ %.28, %if.end12 ]
+ ret i32 %retval.1
+}
+
+attributes #0 = { norecurse nounwind }
diff --git a/test/CodeGen/BPF/remove_truncate_2.ll b/test/CodeGen/BPF/remove_truncate_2.ll
new file mode 100644
index 000000000000..979d820dd857
--- /dev/null
+++ b/test/CodeGen/BPF/remove_truncate_2.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -march=bpf -verify-machineinstrs | FileCheck %s
+
+; Source code:
+; struct xdp_md {
+; unsigned data;
+; unsigned data_end;
+; };
+;
+; int gbl;
+; int xdp_dummy(struct xdp_md *xdp)
+; {
+; char addr = *(char *)(long)xdp->data;
+; if (gbl) {
+; if (gbl == 1)
+; return 1;
+; if (addr == 1)
+; return 3;
+; } else if (addr == 0)
+; return 2;
+; return 0;
+; }
+
+%struct.xdp_md = type { i32, i32 }
+
+@gbl = common local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: norecurse nounwind readonly
+define i32 @xdp_dummy(%struct.xdp_md* nocapture readonly %xdp) local_unnamed_addr #0 {
+entry:
+ %data = getelementptr inbounds %struct.xdp_md, %struct.xdp_md* %xdp, i64 0, i32 0
+ %0 = load i32, i32* %data, align 4
+ %conv = zext i32 %0 to i64
+ %1 = inttoptr i64 %conv to i8*
+ %2 = load i8, i8* %1, align 1
+; CHECK: r1 = *(u32 *)(r1 + 0)
+; CHECK: r1 = *(u8 *)(r1 + 0)
+ %3 = load i32, i32* @gbl, align 4
+ switch i32 %3, label %if.end [
+ i32 0, label %if.else
+ i32 1, label %cleanup
+ ]
+
+if.end: ; preds = %entry
+ %cmp4 = icmp eq i8 %2, 1
+; CHECK: r0 = 3
+; CHECK-NOT: r1 &= 255
+; CHECK: if r1 == 1 goto
+ br i1 %cmp4, label %cleanup, label %if.end13
+
+if.else: ; preds = %entry
+ %cmp9 = icmp eq i8 %2, 0
+; CHECK: r0 = 2
+; CHECK-NOT: r1 &= 255
+; CHECK: if r1 == 0 goto
+ br i1 %cmp9, label %cleanup, label %if.end13
+
+if.end13: ; preds = %if.else, %if.end
+ br label %cleanup
+
+cleanup: ; preds = %if.else, %if.end, %entry, %if.end13
+ %retval.0 = phi i32 [ 0, %if.end13 ], [ 1, %entry ], [ 3, %if.end ], [ 2, %if.else ]
+ ret i32 %retval.0
+}
+
+attributes #0 = { norecurse nounwind readonly }
diff --git a/test/CodeGen/Hexagon/addrmode-keepdeadphis.mir b/test/CodeGen/Hexagon/addrmode-keepdeadphis.mir
new file mode 100644
index 000000000000..b77a7b1bd365
--- /dev/null
+++ b/test/CodeGen/Hexagon/addrmode-keepdeadphis.mir
@@ -0,0 +1,30 @@
+# RUN: llc -march=hexagon -run-pass amode-opt %s -o - | FileCheck %s
+
+# Check that the addasl is not propagated into the addressing mode.
+# CHECK-NOT: L4_loadri_ur
+
+--- |
+ @g = global i32 zeroinitializer
+ define void @fred() { ret void }
+...
+
+---
+name: fred
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: %p0
+ %r0 = A2_tfrsi @g
+ %r1 = A2_tfrsi 1
+ %r2 = S2_addasl_rrri %r0, %r1, 1
+ J2_jumpt %p0, %bb.2, implicit-def %pc
+
+ bb.1:
+ liveins: %r0, %r2
+ %r1 = A2_tfrsi 2
+
+ bb.2:
+ liveins: %r0, %r2
+ %r3 = L2_loadri_io %r2, 0
+...
diff --git a/test/CodeGen/Hexagon/expand-condsets-undefvni.ll b/test/CodeGen/Hexagon/expand-condsets-undefvni.ll
new file mode 100644
index 000000000000..45ba5131e668
--- /dev/null
+++ b/test/CodeGen/Hexagon/expand-condsets-undefvni.ll
@@ -0,0 +1,49 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+;
+; Check that this compiles successfully.
+; CHECK: jumpr r31
+
+target triple = "hexagon"
+
+define i64 @fred(i64 %a0, i64 %a1) local_unnamed_addr #0 {
+b2:
+ %v3 = lshr i64 %a1, 52
+ %v4 = trunc i64 %v3 to i11
+ switch i11 %v4, label %b15 [
+ i11 -1, label %b5
+ i11 0, label %b14
+ ]
+
+b5: ; preds = %b2
+ br i1 undef, label %b13, label %b6
+
+b6: ; preds = %b5
+ %v7 = or i64 %a1, 2251799813685248
+ br i1 undef, label %b8, label %b10
+
+b8: ; preds = %b6
+ %v9 = select i1 undef, i64 %v7, i64 undef
+ br label %b16
+
+b10: ; preds = %b6
+ br i1 undef, label %b16, label %b11
+
+b11: ; preds = %b10
+ %v12 = select i1 undef, i64 undef, i64 %v7
+ br label %b16
+
+b13: ; preds = %b5
+ br label %b16
+
+b14: ; preds = %b2
+ br label %b16
+
+b15: ; preds = %b2
+ br label %b16
+
+b16: ; preds = %b15, %b14, %b13, %b11, %b10, %b8
+ %v17 = phi i64 [ undef, %b13 ], [ -2251799813685248, %b14 ], [ 0, %b15 ], [ %v12, %b11 ], [ %v9, %b8 ], [ %v7, %b10 ]
+ ret i64 %v17
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv62" }
diff --git a/test/CodeGen/Hexagon/expand-vselect-kill.ll b/test/CodeGen/Hexagon/expand-vselect-kill.ll
new file mode 100644
index 000000000000..1d07859665c0
--- /dev/null
+++ b/test/CodeGen/Hexagon/expand-vselect-kill.ll
@@ -0,0 +1,53 @@
+; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s
+;
+; Check that this does not crash.
+
+target triple = "hexagon"
+
+; CHECK-LABEL: danny:
+; CHECK-DAG: if ([[PREG:p[0-3]]]) [[VREG:v[0-9]+]]
+; CHECK-DAG: if (![[PREG]]) [[VREG]]
+define void @danny() local_unnamed_addr #0 {
+b0:
+ %v1 = icmp eq i32 0, undef
+ %v2 = select i1 %v1, <16 x i32> zeroinitializer, <16 x i32> undef
+ %v3 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v2, <16 x i32> zeroinitializer, i32 2)
+ %v4 = tail call <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1> undef, <16 x i32> undef, <16 x i32> %v3)
+ %v5 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v4)
+ %v6 = tail call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> undef, <16 x i32> %v5, i32 62)
+ %v7 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v6)
+ store <16 x i32> %v7, <16 x i32>* undef, align 64
+ unreachable
+}
+
+declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #2
+declare <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1>, <16 x i32>, <16 x i32>) #2
+declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #2
+declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #2
+declare <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32>, <16 x i32>, i32) #2
+
+; CHECK-LABEL: sammy:
+; CHECK-DAG: if ([[PREG:p[0-3]]]) [[VREG:v[0-9]+]]
+; CHECK-DAG: if (![[PREG]]) [[VREG]]
+define void @sammy() local_unnamed_addr #1 {
+b0:
+ %v1 = icmp eq i32 0, undef
+ %v2 = select i1 %v1, <32 x i32> zeroinitializer, <32 x i32> undef
+ %v3 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> %v2, <32 x i32> zeroinitializer, i32 2)
+ %v4 = tail call <64 x i32> @llvm.hexagon.V6.vswap.128B(<1024 x i1> undef, <32 x i32> undef, <32 x i32> %v3)
+ %v5 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v4)
+ %v6 = tail call <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32> undef, <32 x i32> %v5, i32 62)
+ %v7 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v6)
+ store <32 x i32> %v7, <32 x i32>* undef, align 128
+ unreachable
+}
+
+declare <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32>, <32 x i32>, i32) #2
+declare <64 x i32> @llvm.hexagon.V6.vswap.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #2
+declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #2
+declare <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32>) #2
+declare <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32>, <32 x i32>, i32) #2
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" }
+attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-double" }
+attributes #2 = { nounwind readnone }
diff --git a/test/CodeGen/Hexagon/fpelim-basic.ll b/test/CodeGen/Hexagon/fpelim-basic.ll
new file mode 100644
index 000000000000..ffec07f7dbfe
--- /dev/null
+++ b/test/CodeGen/Hexagon/fpelim-basic.ll
@@ -0,0 +1,91 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+target triple = "hexagon"
+
+; FP elimination enabled.
+;
+; CHECK-LABEL: danny:
+; CHECK: r29 = add(r29,#-[[SIZE:[0-9]+]])
+; CHECK: r29 = add(r29,#[[SIZE]])
+define i32 @danny(i32 %a0, i32 %a1) local_unnamed_addr #0 {
+b2:
+ %v3 = alloca [32 x i32], align 8
+ %v4 = bitcast [32 x i32]* %v3 to i8*
+ call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %v4) #3
+ br label %b5
+
+b5: ; preds = %b5, %b2
+ %v6 = phi i32 [ 0, %b2 ], [ %v8, %b5 ]
+ %v7 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %v6
+ store i32 %v6, i32* %v7, align 4
+ %v8 = add nuw nsw i32 %v6, 1
+ %v9 = icmp eq i32 %v8, 32
+ br i1 %v9, label %b10, label %b5
+
+b10: ; preds = %b5
+ %v11 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %a0
+ store i32 %a1, i32* %v11, align 4
+ br label %b12
+
+b12: ; preds = %b12, %b10
+ %v13 = phi i32 [ 0, %b10 ], [ %v18, %b12 ]
+ %v14 = phi i32 [ 0, %b10 ], [ %v17, %b12 ]
+ %v15 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %v13
+ %v16 = load i32, i32* %v15, align 4
+ %v17 = add nsw i32 %v16, %v14
+ %v18 = add nuw nsw i32 %v13, 1
+ %v19 = icmp eq i32 %v18, 32
+ br i1 %v19, label %b20, label %b12
+
+b20: ; preds = %b12
+ call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %v4) #3
+ ret i32 %v17
+}
+
+; FP elimination disabled.
+;
+; CHECK-LABEL: sammy:
+; CHECK: allocframe
+; CHECK: dealloc_return
+define i32 @sammy(i32 %a0, i32 %a1) local_unnamed_addr #1 {
+b2:
+ %v3 = alloca [32 x i32], align 8
+ %v4 = bitcast [32 x i32]* %v3 to i8*
+ call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %v4) #3
+ br label %b5
+
+b5: ; preds = %b5, %b2
+ %v6 = phi i32 [ 0, %b2 ], [ %v8, %b5 ]
+ %v7 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %v6
+ store i32 %v6, i32* %v7, align 4
+ %v8 = add nuw nsw i32 %v6, 1
+ %v9 = icmp eq i32 %v8, 32
+ br i1 %v9, label %b10, label %b5
+
+b10: ; preds = %b5
+ %v11 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %a0
+ store i32 %a1, i32* %v11, align 4
+ br label %b12
+
+b12: ; preds = %b12, %b10
+ %v13 = phi i32 [ 0, %b10 ], [ %v18, %b12 ]
+ %v14 = phi i32 [ 0, %b10 ], [ %v17, %b12 ]
+ %v15 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %v13
+ %v16 = load i32, i32* %v15, align 4
+ %v17 = add nsw i32 %v16, %v14
+ %v18 = add nuw nsw i32 %v13, 1
+ %v19 = icmp eq i32 %v18, 32
+ br i1 %v19, label %b20, label %b12
+
+b20: ; preds = %b12
+ call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %v4) #3
+ ret i32 %v17
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2
+
+attributes #0 = { nounwind readnone "no-frame-pointer-elim"="false" "target-cpu"="hexagonv60" }
+attributes #1 = { nounwind readnone "no-frame-pointer-elim"="true" "target-cpu"="hexagonv60" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }
diff --git a/test/CodeGen/Hexagon/frame.ll b/test/CodeGen/Hexagon/frame.ll
deleted file mode 100644
index e87acb8cd796..000000000000
--- a/test/CodeGen/Hexagon/frame.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-
-@num = external global i32
-@acc = external global i32
-@num2 = external global i32
-
-; CHECK: allocframe
-; CHECK: dealloc_return
-
-define i32 @foo() nounwind {
-entry:
- %i = alloca i32, align 4
- %0 = load i32, i32* @num, align 4
- store i32 %0, i32* %i, align 4
- %1 = load i32, i32* %i, align 4
- %2 = load i32, i32* @acc, align 4
- %mul = mul nsw i32 %1, %2
- %3 = load i32, i32* @num2, align 4
- %add = add nsw i32 %mul, %3
- store i32 %add, i32* %i, align 4
- %4 = load i32, i32* %i, align 4
- ret i32 %4
-}
diff --git a/test/CodeGen/Hexagon/jt-in-text.ll b/test/CodeGen/Hexagon/jt-in-text.ll
new file mode 100644
index 000000000000..62b5caef6aaa
--- /dev/null
+++ b/test/CodeGen/Hexagon/jt-in-text.ll
@@ -0,0 +1,57 @@
+; RUN: llc -hexagon-emit-jt-text=true < %s | FileCheck %s
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon-unknown--elf"
+
+; CHECK: .text
+; CHECK-NOT: .rodata
+; CHECK: .word
+
+@lane0_pwr_st = global i32 0, align 4
+@lane1_pwr_st = global i32 0, align 4
+@lane2_pwr_st = global i32 0, align 4
+@lane3_pwr_st = global i32 0, align 4
+
+; Function Attrs: noinline nounwind
+define void @test2(i32 %lane_id, i32 %rx_pwr_st) #0 {
+entry:
+ %lane_id.addr = alloca i32, align 4
+ %rx_pwr_st.addr = alloca i32, align 4
+ store i32 %lane_id, i32* %lane_id.addr, align 4
+ store i32 %rx_pwr_st, i32* %rx_pwr_st.addr, align 4
+ %0 = load i32, i32* %lane_id.addr, align 4
+ switch i32 %0, label %sw.epilog [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ i32 15, label %sw.bb4
+ ]
+
+sw.bb: ; preds = %entry
+ store i32 1, i32* @lane0_pwr_st, align 4
+ br label %sw.epilog
+
+sw.bb1: ; preds = %entry
+ store i32 1, i32* @lane1_pwr_st, align 4
+ br label %sw.epilog
+
+sw.bb2: ; preds = %entry
+ store i32 1, i32* @lane2_pwr_st, align 4
+ br label %sw.epilog
+
+sw.bb3: ; preds = %entry
+ store i32 1, i32* @lane3_pwr_st, align 4
+ br label %sw.epilog
+
+sw.bb4: ; preds = %entry
+ store i32 1, i32* @lane0_pwr_st, align 4
+ store i32 1, i32* @lane1_pwr_st, align 4
+ store i32 1, i32* @lane2_pwr_st, align 4
+ store i32 1, i32* @lane3_pwr_st, align 4
+ br label %sw.epilog
+
+sw.epilog: ; preds = %entry, %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
+ ret void
+}
+
+attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Hexagon/newvaluejump-kill2.mir b/test/CodeGen/Hexagon/newvaluejump-kill2.mir
new file mode 100644
index 000000000000..565d07dc87ee
--- /dev/null
+++ b/test/CodeGen/Hexagon/newvaluejump-kill2.mir
@@ -0,0 +1,18 @@
+# RUN: llc -march=hexagon -run-pass hexagon-nvj -verify-machineinstrs %s -o - | FileCheck %s
+# CHECK: J4_cmpgtu_t_jumpnv_t killed %r3, killed %r1, %bb.1, implicit-def %pc
+
+---
+name: fred
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: %r0
+ %r1 = A2_addi %r0, -1
+ %r2 = A2_tfrsi -1431655765
+ %r3 = A2_tfrsi 2
+ %p0 = C2_cmpgtu killed %r3, %r1
+ %r2 = S4_subaddi killed %r1, 1, killed %r2
+ J2_jumpt killed %p0, %bb.1, implicit-def %pc
+ bb.1:
+...
diff --git a/test/CodeGen/Hexagon/newvaluejump2.ll b/test/CodeGen/Hexagon/newvaluejump2.ll
index 4c897f0830f3..fbc3f2925d19 100644
--- a/test/CodeGen/Hexagon/newvaluejump2.ll
+++ b/test/CodeGen/Hexagon/newvaluejump2.ll
@@ -6,7 +6,7 @@
@Reg = common global i32 0, align 4
define i32 @main() nounwind {
entry:
-; CHECK: if (cmp.gt(r{{[0-9]+}},r{{[0-9]+}}.new)) jump:{{[t|nt]}} .LBB{{[0-9]+}}_{{[0-9]+}}
+; CHECK: if (cmp.gt(r{{[0-9]+}}.new,r{{[0-9]+}})) jump:{{[t|nt]}} .LBB{{[0-9]+}}_{{[0-9]+}}
%Reg2 = alloca i32, align 4
%0 = load i32, i32* %Reg2, align 4
%1 = load i32, i32* @Reg, align 4
diff --git a/test/CodeGen/Hexagon/regalloc-liveout-undef.mir b/test/CodeGen/Hexagon/regalloc-liveout-undef.mir
new file mode 100644
index 000000000000..6a41514b060e
--- /dev/null
+++ b/test/CodeGen/Hexagon/regalloc-liveout-undef.mir
@@ -0,0 +1,35 @@
+# RUN: llc -march=hexagon -run-pass liveintervals -run-pass machineverifier -run-pass simple-register-coalescing %s -o - | FileCheck %s
+#
+# If there is no consumer of the live intervals, the live intervals pass
+# will be freed immediately after it runs, before the verifier. Add a
+# user (register coalescer in this case), so that the verification will
+# cover live intervals as well.
+#
+# Make sure that this compiles successfully.
+# CHECK: undef %1.isub_lo = A2_addi %1.isub_lo, 1
+
+---
+name: fred
+tracksRegLiveness: true
+
+registers:
+ - { id: 0, class: intregs }
+ - { id: 1, class: doubleregs }
+ - { id: 2, class: predregs }
+ - { id: 3, class: doubleregs }
+body: |
+ bb.0:
+ liveins: %d0
+ successors: %bb.1
+ %0 = IMPLICIT_DEF
+ %1 = COPY %d0
+
+ bb.1:
+ successors: %bb.1
+ %2 = C2_cmpgt %0, %1.isub_lo
+ %3 = COPY %1
+ %1 = COPY %3
+ undef %1.isub_lo = A2_addi %1.isub_lo, 1
+ J2_jump %bb.1, implicit-def %pc
+...
+
diff --git a/test/CodeGen/MIR/Generic/multiRunPass.mir b/test/CodeGen/MIR/Generic/multiRunPass.mir
index bd1c0d0b458e..e055c44205b5 100644
--- a/test/CodeGen/MIR/Generic/multiRunPass.mir
+++ b/test/CodeGen/MIR/Generic/multiRunPass.mir
@@ -7,7 +7,8 @@
# This test ensures that the command line accepts
# several run passes on the same command line and
# actually create the proper pipeline for it.
-# PSEUDO_PEEPHOLE: -expand-isel-pseudos {{(-machineverifier )?}}-peephole-opt
+# PSEUDO_PEEPHOLE: -expand-isel-pseudos
+# PSEUDO_PEEPHOLE-SAME: {{(-machineverifier )?}}-peephole-opt
# PEEPHOLE_PSEUDO: -peephole-opt {{(-machineverifier )?}}-expand-isel-pseudos
# Make sure there are no other passes happening after what we asked.
diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll
index 5e6092fc7848..c61e1cdedea7 100644
--- a/test/CodeGen/Mips/2008-06-05-Carry.ll
+++ b/test/CodeGen/Mips/2008-06-05-Carry.ll
@@ -2,21 +2,20 @@
define i64 @add64(i64 %u, i64 %v) nounwind {
entry:
-; CHECK-LABEL: add64:
; CHECK: addu
-; CHECK-DAG: sltu
-; CHECK-DAG: addu
+; CHECK: sltu
; CHECK: addu
- %tmp2 = add i64 %u, %v
+; CHECK: addu
+ %tmp2 = add i64 %u, %v
ret i64 %tmp2
}
define i64 @sub64(i64 %u, i64 %v) nounwind {
entry:
-; CHECK-LABEL: sub64
-; CHECK-DAG: sltu
-; CHECK-DAG: subu
+; CHECK: sub64
; CHECK: subu
+; CHECK: sltu
+; CHECK: addu
; CHECK: subu
%tmp2 = sub i64 %u, %v
ret i64 %tmp2
diff --git a/test/CodeGen/Mips/dsp-patterns.ll b/test/CodeGen/Mips/dsp-patterns.ll
index 250d3eff37dc..837c0d8bfc52 100644
--- a/test/CodeGen/Mips/dsp-patterns.ll
+++ b/test/CodeGen/Mips/dsp-patterns.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=mips -mcpu=mips32r2 -mattr=dsp < %s | FileCheck %s -check-prefix=R1
-; RUN: llc -march=mips -mcpu=mips32r2 -mattr=dspr2 < %s | FileCheck %s -check-prefix=R2
+; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s -check-prefix=R1
+; RUN: llc -march=mips -mattr=dspr2 < %s | FileCheck %s -check-prefix=R2
; R1-LABEL: test_lbux:
; R1: lbux ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/llcarry.ll b/test/CodeGen/Mips/llcarry.ll
index b7cc6fc8ea75..fcf129420234 100644
--- a/test/CodeGen/Mips/llcarry.ll
+++ b/test/CodeGen/Mips/llcarry.ll
@@ -14,9 +14,9 @@ entry:
%add = add nsw i64 %1, %0
store i64 %add, i64* @k, align 8
; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
-; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
; 16: sltu ${{[0-9]+}}, ${{[0-9]+}}
-; 16: move ${{[0-9]+}}, $24
+; 16: move ${{[0-9]+}}, $t8
+; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
ret void
}
@@ -28,8 +28,8 @@ entry:
%sub = sub nsw i64 %0, %1
; 16: subu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
; 16: sltu ${{[0-9]+}}, ${{[0-9]+}}
-; 16: move ${{[0-9]+}}, $24
-; 16: subu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: move ${{[0-9]+}}, $t8
+; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
; 16: subu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
store i64 %sub, i64* @l, align 8
ret void
@@ -41,7 +41,8 @@ entry:
%add = add nsw i64 %0, 15
; 16: addiu ${{[0-9]+}}, 15
; 16: sltu ${{[0-9]+}}, ${{[0-9]+}}
-; 16: move ${{[0-9]+}}, $24
+; 16: move ${{[0-9]+}}, $t8
+; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
store i64 %add, i64* @m, align 8
ret void
diff --git a/test/CodeGen/Mips/llvm-ir/add.ll b/test/CodeGen/Mips/llvm-ir/add.ll
index 63884eb03b8c..a5ecdda94ce2 100644
--- a/test/CodeGen/Mips/llvm-ir/add.ll
+++ b/test/CodeGen/Mips/llvm-ir/add.ll
@@ -1,35 +1,35 @@
; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
-; RUN: -check-prefixes=ALL,NOT-R2-R6,GP32,PRE4
+; RUN: -check-prefixes=ALL,NOT-R2-R6,GP32
; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
-; RUN: -check-prefixes=ALL,NOT-R2-R6,GP32,GP32-CMOV
+; RUN: -check-prefixes=ALL,NOT-R2-R6,GP32
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
-; RUN: -check-prefixes=ALL,R2-R6,GP32,GP32-CMOV
+; RUN: -check-prefixes=ALL,R2-R6,GP32
; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
-; RUN: -check-prefixes=ALL,R2-R6,GP32,GP32-CMOV
+; RUN: -check-prefixes=ALL,R2-R6,GP32
; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
-; RUN: -check-prefixes=ALL,R2-R6,GP32,GP32-CMOV
+; RUN: -check-prefixes=ALL,R2-R6,GP32
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefixes=ALL,R2-R6,GP32
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
-; RUN: -check-prefixes=ALL,NOT-R2-R6,GP64,GP64-NOT-R2-R6
+; RUN: -check-prefixes=ALL,NOT-R2-R6,GP64
; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
-; RUN: -check-prefixes=ALL,NOT-R2-R6,GP64,GP64-NOT-R2-R6
+; RUN: -check-prefixes=ALL,NOT-R2-R6,GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
-; RUN: -check-prefixes=ALL,NOT-R2-R6,GP64,GP64-NOT-R2-R6
+; RUN: -check-prefixes=ALL,NOT-R2-R6,GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
-; RUN: -check-prefixes=ALL,R2-R6,GP64,GP64-R2-R6
+; RUN: -check-prefixes=ALL,R2-R6,GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
-; RUN: -check-prefixes=ALL,R2-R6,GP64,GP64-R2-R6
+; RUN: -check-prefixes=ALL,R2-R6,GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
-; RUN: -check-prefixes=ALL,R2-R6,GP64,GP64-R2-R6
+; RUN: -check-prefixes=ALL,R2-R6,GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
-; RUN: -check-prefixes=ALL,R2-R6,GP64,GP64-R2-R6
+; RUN: -check-prefixes=ALL,R2-R6,GP64
; RUN: llc < %s -march=mips -mcpu=mips32r3 -mattr=+micromips -O2 -verify-machineinstrs | FileCheck %s \
-; RUN: -check-prefixes=ALL,MMR3,MM32
+; RUN: -check-prefixes=ALL,MMR6,MM32
; RUN: llc < %s -march=mips -mcpu=mips32r6 -mattr=+micromips -O2 | FileCheck %s \
; RUN: -check-prefixes=ALL,MMR6,MM32
; RUN: llc < %s -march=mips -mcpu=mips64r6 -target-abi n64 -mattr=+micromips -O2 | FileCheck %s \
-; RUN: -check-prefixes=ALL,MM64
+; RUN: -check-prefixes=ALL,MMR6,MM64
; FIXME: This code sequence is inefficient as it should be 'subu $[[T0]], $zero, $[[T0]'.
@@ -110,17 +110,17 @@ define signext i64 @add_i64(i64 signext %a, i64 signext %b) {
entry:
; ALL-LABEL: add_i64:
- ; GP32-DAG: addu $[[T0:[0-9]+]], $4, $6
- ; GP32-DAG: addu $3, $5, $7
- ; GP32: sltu $[[T1:[0-9]+]], $3, $5
- ; GP32: addu $2, $[[T0]], $[[T1]]
+ ; GP32: addu $3, $5, $7
+ ; GP32: sltu $[[T0:[0-9]+]], $3, $7
+ ; GP32: addu $[[T1:[0-9]+]], $[[T0]], $6
+ ; GP32: addu $2, $4, $[[T1]]
; GP64: daddu $2, $4, $5
- ; MM32-DAG: addu16 $3, $5, $7
- ; MM32-DAG: addu16 $[[T0:[0-9]+]], $4, $6
- ; MM32: sltu $[[T1:[0-9]+]], $3, $5
- ; MM32: addu16 $2, $[[T0]], $[[T1]]
+ ; MM32: addu16 $3, $5, $7
+ ; MM32: sltu $[[T0:[0-9]+]], $3, $7
+ ; MM32: addu $[[T1:[0-9]+]], $[[T0]], $6
+ ; MM32: addu $2, $4, $[[T1]]
; MM64: daddu $2, $4, $5
@@ -132,108 +132,49 @@ define signext i128 @add_i128(i128 signext %a, i128 signext %b) {
entry:
; ALL-LABEL: add_i128:
- ; PRE4: move $[[R1:[0-9]+]], $5
- ; PRE4: move $[[R2:[0-9]+]], $4
- ; PRE4: lw $[[R3:[0-9]+]], 24($sp)
- ; PRE4: addu $[[R4:[0-9]+]], $6, $[[R3]]
- ; PRE4: lw $[[R5:[0-9]+]], 28($sp)
- ; PRE4: addu $[[R6:[0-9]+]], $7, $[[R5]]
- ; PRE4: sltu $[[R7:[0-9]+]], $[[R6]], $7
- ; PRE4: addu $[[R8:[0-9]+]], $[[R4]], $[[R7]]
- ; PRE4: xor $[[R9:[0-9]+]], $[[R8]], $6
- ; PRE4: sltiu $[[R10:[0-9]+]], $[[R9]], 1
- ; PRE4: bnez $[[R10]], $BB5_2
- ; PRE4: sltu $[[R7]], $[[R8]], $6
- ; PRE4: lw $[[R12:[0-9]+]], 20($sp)
- ; PRE4: addu $[[R13:[0-9]+]], $[[R1]], $[[R12]]
- ; PRE4: lw $[[R14:[0-9]+]], 16($sp)
- ; PRE4: addu $[[R15:[0-9]+]], $[[R13]], $[[R7]]
- ; PRE4: addu $[[R16:[0-9]+]], $[[R2]], $[[R14]]
- ; PRE4: sltu $[[R17:[0-9]+]], $[[R15]], $[[R13]]
- ; PRE4: sltu $[[R18:[0-9]+]], $[[R13]], $[[R1]]
- ; PRE4: addu $[[R19:[0-9]+]], $[[R16]], $[[R18]]
- ; PRE4: addu $2, $[[R19]], $[[R17]]
-
- ; GP32-CMOV: lw $[[T0:[0-9]+]], 24($sp)
- ; GP32-CMOV: addu $[[T1:[0-9]+]], $6, $[[T0]]
- ; GP32-CMOV: lw $[[T2:[0-9]+]], 28($sp)
- ; GP32-CMOV: addu $[[T3:[0-9]+]], $7, $[[T2]]
- ; GP32-CMOV: sltu $[[T4:[0-9]+]], $[[T3]], $7
- ; GP32-CMOV: addu $[[T5:[0-9]+]], $[[T1]], $[[T4]]
- ; GP32-CMOV: sltu $[[T6:[0-9]+]], $[[T5]], $6
- ; GP32-CMOV: xor $[[T7:[0-9]+]], $[[T5]], $6
- ; GP32-CMOV: movz $[[T8:[0-9]+]], $[[T4]], $[[T7]]
- ; GP32-CMOV: lw $[[T9:[0-9]+]], 20($sp)
- ; GP32-CMOV: addu $[[T10:[0-9]+]], $5, $[[T4]]
- ; GP32-CMOV: addu $[[T11:[0-9]+]], $[[T10]], $[[T8]]
- ; GP32-CMOV: lw $[[T12:[0-9]+]], 16($sp)
- ; GP32-CMOV: sltu $[[T13:[0-9]+]], $[[T11]], $[[T10]]
- ; GP32-CMOV: addu $[[T14:[0-9]+]], $4, $[[T12]]
- ; GP32-CMOV: sltu $[[T15:[0-9]+]], $[[T10]], $5
- ; GP32-CMOV: addu $[[T16:[0-9]+]], $[[T14]], $[[T15]]
- ; GP32-CMOV: addu $[[T17:[0-9]+]], $[[T16]], $[[T13]]
- ; GP32-CMOV: move $4, $[[T5]]
- ; GP32-CMOV: move $5, $[[T3]]
-
- ; GP64: daddu $[[T0:[0-9]+]], $4, $6
- ; GP64: daddu $[[T1:[0-9]+]], $5, $7
- ; GP64: sltu $[[T2:[0-9]+]], $[[T1]], $5
- ; GP64-NOT-R2-R6: dsll $[[T3:[0-9]+]], $[[T2]], 32
- ; GP64-NOT-R2-R6: dsrl $[[T4:[0-9]+]], $[[T3]], 32
- ; GP64-R2-R6: dext $[[T4:[0-9]+]], $[[T2]], 0, 32
-
- ; GP64: daddu $2, $[[T0]], $[[T4]]
-
- ; MMR3: move $[[T1:[0-9]+]], $5
- ; MMR3-DAG: lw $[[T2:[0-9]+]], 32($sp)
- ; MMR3: addu16 $[[T3:[0-9]+]], $6, $[[T2]]
- ; MMR3-DAG: lw $[[T4:[0-9]+]], 36($sp)
- ; MMR3: addu16 $[[T5:[0-9]+]], $7, $[[T4]]
- ; MMR3: sltu $[[T6:[0-9]+]], $[[T5]], $7
- ; MMR3: addu16 $[[T7:[0-9]+]], $[[T3]], $[[T6]]
- ; MMR3: sltu $[[T8:[0-9]+]], $[[T7]], $6
- ; MMR3: xor $[[T9:[0-9]+]], $[[T7]], $6
- ; MMR3: movz $[[T8]], $[[T6]], $[[T9]]
- ; MMR3: lw $[[T10:[0-9]+]], 28($sp)
- ; MMR3: addu16 $[[T11:[0-9]+]], $[[T1]], $[[T10]]
- ; MMR3: addu16 $[[T12:[0-9]+]], $[[T11]], $[[T8]]
- ; MMR3: lw $[[T13:[0-9]+]], 24($sp)
- ; MMR3: sltu $[[T14:[0-9]+]], $[[T12]], $[[T11]]
- ; MMR3: addu16 $[[T15:[0-9]+]], $4, $[[T13]]
- ; MMR3: sltu $[[T16:[0-9]+]], $[[T11]], $[[T1]]
- ; MMR3: addu16 $[[T17:[0-9]+]], $[[T15]], $[[T16]]
- ; MMR3: addu16 $2, $2, $[[T14]]
-
- ; MMR6: move $[[T1:[0-9]+]], $5
- ; MMR6: move $[[T2:[0-9]+]], $4
- ; MMR6: lw $[[T3:[0-9]+]], 32($sp)
- ; MMR6: addu16 $[[T4:[0-9]+]], $6, $[[T3]]
- ; MMR6: lw $[[T5:[0-9]+]], 36($sp)
- ; MMR6: addu16 $[[T6:[0-9]+]], $7, $[[T5]]
- ; MMR6: sltu $[[T7:[0-9]+]], $[[T6]], $7
- ; MMR6: addu16 $[[T8:[0-9]+]], $[[T4]], $7
- ; MMR6: sltu $[[T9:[0-9]+]], $[[T8]], $6
- ; MMR6: xor $[[T10:[0-9]+]], $[[T4]], $6
- ; MMR6: sltiu $[[T11:[0-9]+]], $[[T10]], 1
- ; MMR6: seleqz $[[T12:[0-9]+]], $[[T9]], $[[T11]]
- ; MMR6: selnez $[[T13:[0-9]+]], $[[T7]], $[[T11]]
- ; MMR6: lw $[[T14:[0-9]+]], 24($sp)
- ; MMR6: or $[[T15:[0-9]+]], $[[T13]], $[[T12]]
- ; MMR6: addu16 $[[T16:[0-9]+]], $[[T2]], $[[T14]]
- ; MMR6: lw $[[T17:[0-9]+]], 28($sp)
- ; MMR6: addu16 $[[T18:[0-9]+]], $[[T1]], $[[T17]]
- ; MMR6: addu16 $[[T19:[0-9]+]], $[[T18]], $[[T15]]
- ; MMR6: sltu $[[T20:[0-9]+]], $[[T18]], $[[T1]]
- ; MMR6: sltu $[[T21:[0-9]+]], $[[T17]], $[[T18]]
- ; MMR6: addu16 $2, $[[T16]], $[[T20]]
- ; MMR6: addu16 $2, $[[T20]], $[[T21]]
-
- ; MM64: daddu $[[T0:[0-9]+]], $4, $6
+ ; GP32: lw $[[T0:[0-9]+]], 28($sp)
+ ; GP32: addu $[[T1:[0-9]+]], $7, $[[T0]]
+ ; GP32: sltu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+ ; GP32: lw $[[T3:[0-9]+]], 24($sp)
+ ; GP32: addu $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+ ; GP32: addu $[[T5:[0-9]+]], $6, $[[T4]]
+ ; GP32: sltu $[[T6:[0-9]+]], $[[T5]], $[[T3]]
+ ; GP32: lw $[[T7:[0-9]+]], 20($sp)
+ ; GP32: addu $[[T8:[0-9]+]], $[[T6]], $[[T7]]
+ ; GP32: lw $[[T9:[0-9]+]], 16($sp)
+ ; GP32: addu $3, $5, $[[T8]]
+ ; GP32: sltu $[[T10:[0-9]+]], $3, $[[T7]]
+ ; GP32: addu $[[T11:[0-9]+]], $[[T10]], $[[T9]]
+ ; GP32: addu $2, $4, $[[T11]]
+ ; GP32: move $4, $[[T5]]
+ ; GP32: move $5, $[[T1]]
+
+ ; GP64: daddu $3, $5, $7
+ ; GP64: sltu $[[T0:[0-9]+]], $3, $7
+ ; GP64: daddu $[[T1:[0-9]+]], $[[T0]], $6
+ ; GP64: daddu $2, $4, $[[T1]]
+
+ ; MM32: lw $[[T0:[0-9]+]], 28($sp)
+ ; MM32: addu $[[T1:[0-9]+]], $7, $[[T0]]
+ ; MM32: sltu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+ ; MM32: lw $[[T3:[0-9]+]], 24($sp)
+ ; MM32: addu16 $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+ ; MM32: addu16 $[[T5:[0-9]+]], $6, $[[T4]]
+ ; MM32: sltu $[[T6:[0-9]+]], $[[T5]], $[[T3]]
+ ; MM32: lw $[[T7:[0-9]+]], 20($sp)
+ ; MM32: addu16 $[[T8:[0-9]+]], $[[T6]], $[[T7]]
+ ; MM32: lw $[[T9:[0-9]+]], 16($sp)
+ ; MM32: addu16 $[[T10:[0-9]+]], $5, $[[T8]]
+ ; MM32: sltu $[[T11:[0-9]+]], $[[T10]], $[[T7]]
+ ; MM32: addu $[[T12:[0-9]+]], $[[T11]], $[[T9]]
+ ; MM32: addu16 $[[T13:[0-9]+]], $4, $[[T12]]
+ ; MM32: move $4, $[[T5]]
+ ; MM32: move $5, $[[T1]]
+
; MM64: daddu $3, $5, $7
- ; MM64: sltu $[[T1:[0-9]+]], $3, $5
- ; MM64: dsll $[[T2:[0-9]+]], $[[T1]], 32
- ; MM64: dsrl $[[T3:[0-9]+]], $[[T2]], 32
- ; MM64: daddu $2, $[[T0]], $[[T3]]
+ ; MM64: sltu $[[T0:[0-9]+]], $3, $7
+ ; MM64: daddu $[[T1:[0-9]+]], $[[T0]], $6
+ ; MM64: daddu $2, $4, $[[T1]]
%r = add i128 %a, %b
ret i128 %r
@@ -308,16 +249,17 @@ define signext i32 @add_i32_4(i32 signext %a) {
define signext i64 @add_i64_4(i64 signext %a) {
; ALL-LABEL: add_i64_4:
- ; GP32: addiu $3, $5, 4
- ; GP32: sltu $[[T0:[0-9]+]], $3, $5
- ; GP32: addu $2, $4, $[[T0]]
-
- ; MM32: addiur2 $[[T1:[0-9]+]], $5, 4
- ; MM32: sltu $[[T2:[0-9]+]], $[[T1]], $5
- ; MM32: addu16 $2, $4, $[[T2]]
+ ; GP32: addiu $[[T0:[0-9]+]], $5, 4
+ ; GP32: addiu $[[T1:[0-9]+]], $zero, 4
+ ; GP32: sltu $[[T1]], $[[T0]], $[[T1]]
+ ; GP32: addu $2, $4, $[[T1]]
; GP64: daddiu $2, $4, 4
+ ; MM32: addiu $[[T0:[0-9]+]], $5, 4
+ ; MM32: li16 $[[T1:[0-9]+]], 4
+ ; MM32: sltu $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+ ; MM32: addu $2, $4, $[[T2]]
; MM64: daddiu $2, $4, 4
@@ -328,67 +270,38 @@ define signext i64 @add_i64_4(i64 signext %a) {
define signext i128 @add_i128_4(i128 signext %a) {
; ALL-LABEL: add_i128_4:
- ; PRE4: move $[[T0:[0-9]+]], $5
- ; PRE4: addiu $[[T1:[0-9]+]], $7, 4
- ; PRE4: sltu $[[T2:[0-9]+]], $[[T1]], $7
- ; PRE4: xori $[[T3:[0-9]+]], $[[T2]], 1
- ; PRE4: bnez $[[T3]], $BB[[BB0:[0-9_]+]]
- ; PRE4: addu $[[T4:[0-9]+]], $6, $[[T2]]
- ; PRE4: sltu $[[T5:[0-9]+]], $[[T4]], $6
- ; PRE4; $BB[[BB0:[0-9]+]]:
- ; PRE4: addu $[[T6:[0-9]+]], $[[T0]], $[[T5]]
- ; PRE4: sltu $[[T7:[0-9]+]], $[[T6]], $[[T0]]
- ; PRE4: addu $[[T8:[0-9]+]], $4, $[[T7]]
- ; PRE4: move $4, $[[T4]]
-
- ; GP32-CMOV: addiu $[[T0:[0-9]+]], $7, 4
- ; GP32-CMOV: sltu $[[T1:[0-9]+]], $[[T0]], $7
- ; GP32-CMOV: addu $[[T2:[0-9]+]], $6, $[[T1]]
- ; GP32-CMOV: sltu $[[T3:[0-9]+]], $[[T2]], $6
- ; GP32-CMOV: movz $[[T3]], $[[T1]], $[[T1]]
- ; GP32-CMOV: addu $[[T4:[0-9]+]], $5, $[[T3]]
- ; GP32-CMOV: sltu $[[T5:[0-9]+]], $[[T4]], $5
- ; GP32-CMOV: addu $[[T7:[0-9]+]], $4, $[[T5]]
- ; GP32-CMOV: move $4, $[[T2]]
- ; GP32-CMOV: move $5, $[[T0]]
-
- ; GP64: daddiu $[[T0:[0-9]+]], $5, 4
- ; GP64: sltu $[[T1:[0-9]+]], $[[T0]], $5
- ; GP64-NOT-R2-R6: dsll $[[T2:[0-9]+]], $[[T1]], 32
- ; GP64-NOT-R2-R6: dsrl $[[T3:[0-9]+]], $[[T2]], 32
- ; GP64-R2-R6: dext $[[T3:[0-9]+]], $[[T1]], 0, 32
-
- ; GP64: daddu $2, $4, $[[T3]]
-
- ; MMR3: addiur2 $[[T0:[0-9]+]], $7, 4
- ; MMR3: sltu $[[T1:[0-9]+]], $[[T0]], $7
- ; MMR3: sltu $[[T2:[0-9]+]], $[[T0]], $7
- ; MMR3: addu16 $[[T3:[0-9]+]], $6, $[[T2]]
- ; MMR3: sltu $[[T4:[0-9]+]], $[[T3]], $6
- ; MMR3: movz $[[T4]], $[[T2]], $[[T1]]
- ; MMR3: addu16 $[[T6:[0-9]+]], $5, $[[T4]]
- ; MMR3: sltu $[[T7:[0-9]+]], $[[T6]], $5
- ; MMR3: addu16 $2, $4, $[[T7]]
-
- ; MMR6: addiur2 $[[T1:[0-9]+]], $7, 4
- ; MMR6: sltu $[[T2:[0-9]+]], $[[T1]], $7
- ; MMR6: xori $[[T3:[0-9]+]], $[[T2]], 1
- ; MMR6: selnez $[[T4:[0-9]+]], $[[T2]], $[[T3]]
- ; MMR6: addu16 $[[T5:[0-9]+]], $6, $[[T2]]
- ; MMR6: sltu $[[T6:[0-9]+]], $[[T5]], $6
- ; MMR6: seleqz $[[T7:[0-9]+]], $[[T6]], $[[T3]]
- ; MMR6: or $[[T8:[0-9]+]], $[[T4]], $[[T7]]
- ; MMR6: addu16 $[[T9:[0-9]+]], $5, $[[T8]]
- ; MMR6: sltu $[[T10:[0-9]+]], $[[T9]], $5
- ; MMR6: addu16 $[[T11:[0-9]+]], $4, $[[T10]]
- ; MMR6: move $4, $7
- ; MMR6: move $5, $[[T1]]
+ ; GP32: addiu $[[T0:[0-9]+]], $7, 4
+ ; GP32: addiu $[[T1:[0-9]+]], $zero, 4
+ ; GP32: sltu $[[T1]], $[[T0]], $[[T1]]
+ ; GP32: addu $[[T2:[0-9]+]], $6, $[[T1]]
+ ; GP32: sltu $[[T1]], $[[T2]], $zero
+ ; GP32: addu $[[T3:[0-9]+]], $5, $[[T1]]
+ ; GP32: sltu $[[T1]], $[[T3]], $zero
+ ; GP32: addu $[[T1]], $4, $[[T1]]
+ ; GP32: move $4, $[[T2]]
+ ; GP32: move $5, $[[T0]]
+
+ ; GP64: daddiu $[[T0:[0-9]+]], $5, 4
+ ; GP64: daddiu $[[T1:[0-9]+]], $zero, 4
+ ; GP64: sltu $[[T1]], $[[T0]], $[[T1]]
+ ; GP64: daddu $2, $4, $[[T1]]
+
+ ; MM32: addiu $[[T0:[0-9]+]], $7, 4
+ ; MM32: li16 $[[T1:[0-9]+]], 4
+ ; MM32: sltu $[[T1]], $[[T0]], $[[T1]]
+ ; MM32: addu16 $[[T2:[0-9]+]], $6, $[[T1]]
+ ; MM32: li16 $[[T1]], 0
+ ; MM32: sltu $[[T3:[0-9]+]], $[[T2]], $[[T1]]
+ ; MM32: addu16 $[[T3]], $5, $[[T3]]
+ ; MM32: sltu $[[T1]], $[[T3]], $[[T1]]
+ ; MM32: addu16 $[[T1]], $4, $[[T1]]
+ ; MM32: move $4, $[[T2]]
+ ; MM32: move $5, $[[T0]]
; MM64: daddiu $[[T0:[0-9]+]], $5, 4
- ; MM64: sltu $[[T1:[0-9]+]], $[[T0]], $5
- ; MM64: dsll $[[T2:[0-9]+]], $[[T1]], 32
- ; MM64: dsrl $[[T3:[0-9]+]], $[[T2]], 32
- ; MM64: daddu $2, $4, $[[T3]]
+ ; MM64: daddiu $[[T1:[0-9]+]], $zero, 4
+ ; MM64: sltu $[[T1]], $[[T0]], $[[T1]]
+ ; MM64: daddu $2, $4, $[[T1]]
%r = add i128 4, %a
ret i128 %r
@@ -467,15 +380,16 @@ define signext i64 @add_i64_3(i64 signext %a) {
; ALL-LABEL: add_i64_3:
; GP32: addiu $[[T0:[0-9]+]], $5, 3
- ; GP32: sltu $[[T1:[0-9]+]], $[[T0]], $5
+ ; GP32: addiu $[[T1:[0-9]+]], $zero, 3
+ ; GP32: sltu $[[T1]], $[[T0]], $[[T1]]
; GP32: addu $2, $4, $[[T1]]
; GP64: daddiu $2, $4, 3
- ; MM32: move $[[T1:[0-9]+]], $5
- ; MM32: addius5 $[[T1]], 3
- ; MM32: sltu $[[T2:[0-9]+]], $[[T1]], $5
- ; MM32: addu16 $2, $4, $[[T2]]
+ ; MM32: addiu $[[T0:[0-9]+]], $5, 3
+ ; MM32: li16 $[[T1:[0-9]+]], 3
+ ; MM32: sltu $[[T2:[0-9]+]], $[[T0]], $[[T1]]
+ ; MM32: addu $2, $4, $[[T2]]
; MM64: daddiu $2, $4, 3
@@ -486,70 +400,38 @@ define signext i64 @add_i64_3(i64 signext %a) {
define signext i128 @add_i128_3(i128 signext %a) {
; ALL-LABEL: add_i128_3:
- ; PRE4: move $[[T0:[0-9]+]], $5
- ; PRE4: addiu $[[T1:[0-9]+]], $7, 3
- ; PRE4: sltu $[[T2:[0-9]+]], $[[T1]], $7
- ; PRE4: xori $[[T3:[0-9]+]], $[[T2]], 1
- ; PRE4: bnez $[[T3]], $BB[[BB0:[0-9_]+]]
- ; PRE4: addu $[[T4:[0-9]+]], $6, $[[T2]]
- ; PRE4: sltu $[[T5:[0-9]+]], $[[T4]], $6
- ; PRE4; $BB[[BB0:[0-9]+]]:
- ; PRE4: addu $[[T6:[0-9]+]], $[[T0]], $[[T5]]
- ; PRE4: sltu $[[T7:[0-9]+]], $[[T6]], $[[T0]]
- ; PRE4: addu $[[T8:[0-9]+]], $4, $[[T7]]
- ; PRE4: move $4, $[[T4]]
-
- ; GP32-CMOV: addiu $[[T0:[0-9]+]], $7, 3
- ; GP32-CMOV: sltu $[[T1:[0-9]+]], $[[T0]], $7
- ; GP32-CMOV: addu $[[T2:[0-9]+]], $6, $[[T1]]
- ; GP32-CMOV: sltu $[[T3:[0-9]+]], $[[T2]], $6
- ; GP32-CMOV: movz $[[T3]], $[[T1]], $[[T1]]
- ; GP32-CMOV: addu $[[T4:[0-9]+]], $5, $[[T3]]
- ; GP32-CMOV: sltu $[[T5:[0-9]+]], $[[T4]], $5
- ; GP32-CMOV: addu $[[T7:[0-9]+]], $4, $[[T5]]
- ; GP32-CMOV: move $4, $[[T2]]
- ; GP32-CMOV: move $5, $[[T0]]
-
- ; GP64: daddiu $[[T0:[0-9]+]], $5, 3
- ; GP64: sltu $[[T1:[0-9]+]], $[[T0]], $5
-
- ; GP64-NOT-R2-R6: dsll $[[T2:[0-9]+]], $[[T1]], 32
- ; GP64-NOT-R2-R6: dsrl $[[T3:[0-9]+]], $[[T2]], 32
- ; GP64-R2-R6: dext $[[T3:[0-9]+]], $[[T1]], 0, 32
-
- ; GP64: daddu $2, $4, $[[T3]]
-
- ; MMR3: move $[[T1:[0-9]+]], $7
- ; MMR3: addius5 $[[T1]], 3
- ; MMR3: sltu $[[T2:[0-9]+]], $[[T1]], $7
- ; MMR3: sltu $[[T3:[0-9]+]], $[[T1]], $7
- ; MMR3: addu16 $[[T4:[0-9]+]], $6, $[[T3]]
- ; MMR3: sltu $[[T5:[0-9]+]], $[[T4]], $6
- ; MMR3: movz $[[T5]], $[[T3]], $[[T2]]
- ; MMR3: addu16 $[[T6:[0-9]+]], $5, $[[T5]]
- ; MMR3: sltu $[[T7:[0-9]+]], $[[T6]], $5
- ; MMR3: addu16 $2, $4, $[[T7]]
-
- ; MMR6: move $[[T1:[0-9]+]], $7
- ; MMR6: addius5 $[[T1]], 3
- ; MMR6: sltu $[[T2:[0-9]+]], $[[T1]], $7
- ; MMR6: xori $[[T3:[0-9]+]], $[[T2]], 1
- ; MMR6: selnez $[[T4:[0-9]+]], $[[T2]], $[[T3]]
- ; MMR6: addu16 $[[T5:[0-9]+]], $6, $[[T2]]
- ; MMR6: sltu $[[T6:[0-9]+]], $[[T5]], $6
- ; MMR6: seleqz $[[T7:[0-9]+]], $[[T6]], $[[T3]]
- ; MMR6: or $[[T8:[0-9]+]], $[[T4]], $[[T7]]
- ; MMR6: addu16 $[[T9:[0-9]+]], $5, $[[T8]]
- ; MMR6: sltu $[[T10:[0-9]+]], $[[T9]], $5
- ; MMR6: addu16 $[[T11:[0-9]+]], $4, $[[T10]]
- ; MMR6: move $4, $[[T5]]
- ; MMR6: move $5, $[[T1]]
+ ; GP32: addiu $[[T0:[0-9]+]], $7, 3
+ ; GP32: addiu $[[T1:[0-9]+]], $zero, 3
+ ; GP32: sltu $[[T1]], $[[T0]], $[[T1]]
+ ; GP32: addu $[[T2:[0-9]+]], $6, $[[T1]]
+ ; GP32: sltu $[[T3:[0-9]+]], $[[T2]], $zero
+ ; GP32: addu $[[T4:[0-9]+]], $5, $[[T3]]
+ ; GP32: sltu $[[T5:[0-9]+]], $[[T4]], $zero
+ ; GP32: addu $[[T5]], $4, $[[T5]]
+ ; GP32: move $4, $[[T2]]
+ ; GP32: move $5, $[[T0]]
+
+ ; GP64: daddiu $[[T0:[0-9]+]], $5, 3
+ ; GP64: daddiu $[[T1:[0-9]+]], $zero, 3
+ ; GP64: sltu $[[T1]], $[[T0]], $[[T1]]
+ ; GP64: daddu $2, $4, $[[T1]]
+
+ ; MM32: addiu $[[T0:[0-9]+]], $7, 3
+ ; MM32: li16 $[[T1:[0-9]+]], 3
+ ; MM32: sltu $[[T1]], $[[T0]], $[[T1]]
+ ; MM32: addu16 $[[T2:[0-9]+]], $6, $[[T1]]
+ ; MM32: li16 $[[T3:[0-9]+]], 0
+ ; MM32: sltu $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+ ; MM32: addu16 $[[T4]], $5, $[[T4]]
+ ; MM32: sltu $[[T5:[0-9]+]], $[[T4]], $[[T3]]
+ ; MM32: addu16 $[[T5]], $4, $[[T5]]
+ ; MM32: move $4, $[[T2]]
+ ; MM32: move $5, $[[T0]]
; MM64: daddiu $[[T0:[0-9]+]], $5, 3
- ; MM64: sltu $[[T1:[0-9]+]], $[[T0]], $5
- ; MM64: dsll $[[T2:[0-9]+]], $[[T1]], 32
- ; MM64: dsrl $[[T3:[0-9]+]], $[[T2]], 32
- ; MM64: daddu $2, $4, $[[T3]]
+ ; MM64: daddiu $[[T1:[0-9]+]], $zero, 3
+ ; MM64: sltu $[[T1]], $[[T0]], $[[T1]]
+ ; MM64: daddu $2, $4, $[[T1]]
%r = add i128 3, %a
ret i128 %r
diff --git a/test/CodeGen/Mips/llvm-ir/sub.ll b/test/CodeGen/Mips/llvm-ir/sub.ll
index 655addb10a64..a730063c552f 100644
--- a/test/CodeGen/Mips/llvm-ir/sub.ll
+++ b/test/CodeGen/Mips/llvm-ir/sub.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
-; RUN: -check-prefixes=NOT-R2-R6,GP32,GP32-NOT-MM,NOT-MM,PRE4
+; RUN: -check-prefixes=NOT-R2-R6,GP32,GP32-NOT-MM,NOT-MM
; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
; RUN: -check-prefixes=NOT-R2-R6,GP32,GP32-NOT-MM,NOT-MM
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
@@ -11,25 +11,25 @@
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefixes=R2-R6,GP32,GP32-NOT-MM,NOT-MM
; RUN: llc < %s -march=mips -mcpu=mips32r3 -mattr=+micromips -verify-machineinstrs | FileCheck %s \
-; RUN: -check-prefixes=GP32-MM,GP32,MM32,MMR3
+; RUN: -check-prefixes=GP32-MM,GP32,MM
; RUN: llc < %s -march=mips -mcpu=mips32r6 -mattr=+micromips | FileCheck %s \
-; RUN: -check-prefixes=GP32-MM,GP32,MM32,MMR6
+; RUN: -check-prefixes=GP32-MM,GP32,MM
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
-; RUN: -check-prefixes=NOT-R2-R6,GP64,NOT-MM,GP64-NOT-R2
+; RUN: -check-prefixes=NOT-R2-R6,GP64,NOT-MM
; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
-; RUN: -check-prefixes=NOT-R2-R6,GP64,NOT-MM,GP64-NOT-R2
+; RUN: -check-prefixes=NOT-R2-R6,GP64,NOT-MM
; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
-; RUN: -check-prefixes=NOT-R2-R6,GP64,NOT-MM,GP64-NOT-R2
+; RUN: -check-prefixes=NOT-R2-R6,GP64,NOT-MM
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
-; RUN: -check-prefixes=R2-R6,GP64,NOT-MM,GP64-R2
+; RUN: -check-prefixes=R2-R6,GP64,NOT-MM
; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
-; RUN: -check-prefixes=R2-R6,GP64,NOT-MM,GP64-R2
+; RUN: -check-prefixes=R2-R6,GP64,NOT-MM
; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
-; RUN: -check-prefixes=R2-R6,GP64,NOT-MM,GP64-R2
+; RUN: -check-prefixes=R2-R6,GP64,NOT-MM
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
-; RUN: -check-prefixes=R2-R6,GP64,NOT-MM,GP64-R2
+; RUN: -check-prefixes=R2-R6,GP64,NOT-MM
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 -mattr=+micromips | FileCheck %s \
-; RUN: -check-prefixes=GP64,MM64
+; RUN: -check-prefixes=GP64,MM
define signext i1 @sub_i1(i1 signext %a, i1 signext %b) {
entry:
@@ -100,15 +100,10 @@ define signext i64 @sub_i64(i64 signext %a, i64 signext %b) {
entry:
; ALL-LABEL: sub_i64:
- ; GP32-NOT-MM: sltu $[[T0:[0-9]+]], $5, $7
- ; GP32-NOT-MM: subu $2, $4, $6
- ; GP32-NOT-MM: subu $2, $2, $[[T0]]
- ; GP32-NOT-MM: subu $3, $5, $7
-
- ; MM32: sltu $[[T0:[0-9]+]], $5, $7
- ; MM32: subu16 $3, $4, $6
- ; MM32: subu16 $2, $3, $[[T0]]
- ; MM32: subu16 $3, $5, $7
+ ; GP32-NOT-MM subu $3, $5, $7
+ ; GP32: sltu $[[T0:[0-9]+]], $5, $7
+ ; GP32: addu $[[T1:[0-9]+]], $[[T0]], $6
+ ; GP32: subu $2, $4, $[[T1]]
; GP64: dsubu $2, $4, $5
@@ -120,109 +115,42 @@ define signext i128 @sub_i128(i128 signext %a, i128 signext %b) {
entry:
; ALL-LABEL: sub_i128:
-; PRE4: lw $[[T0:[0-9]+]], 24($sp)
-; PRE4: lw $[[T1:[0-9]+]], 28($sp)
-; PRE4: sltu $[[T2:[0-9]+]], $7, $[[T1]]
-; PRE4: xor $[[T3:[0-9]+]], $6, $[[T0]]
-; PRE4: sltiu $[[T4:[0-9]+]], $[[T3]], 1
-; PRE4: bnez $[[T4]]
-; PRE4: move $[[T5:[0-9]+]], $[[T2]]
-; PRE4: sltu $[[T5]], $6, $[[T0]]
-
-; PRE4: lw $[[T6:[0-9]+]], 20($sp)
-; PRE4: subu $[[T7:[0-9]+]], $5, $[[T6]]
-; PRE4: subu $[[T8:[0-9]+]], $[[T7]], $[[T5]]
-; PRE4: sltu $[[T9:[0-9]+]], $[[T7]], $[[T5]]
-; PRE4: sltu $[[T10:[0-9]+]], $5, $[[T6]]
-; PRE4: lw $[[T11:[0-9]+]], 16($sp)
-; PRE4: subu $[[T12:[0-9]+]], $4, $[[T11]]
-; PRE4: subu $[[T13:[0-9]+]], $[[T12]], $[[T10]]
-; PRE4: subu $[[T14:[0-9]+]], $[[T13]], $[[T9]]
-; PRE4: subu $[[T15:[0-9]+]], $6, $[[T0]]
-; PRE4: subu $[[T16:[0-9]+]], $[[T15]], $[[T2]]
-; PRE4: subu $5, $7, $[[T1]]
-
-; MMR3: lw $[[T1:[0-9]+]], 48($sp)
-; MMR3: sltu $[[T2:[0-9]+]], $6, $[[T1]]
-; MMR3: xor $[[T3:[0-9]+]], $6, $[[T1]]
-; MMR3: lw $[[T4:[0-9]+]], 52($sp)
-; MMR3: sltu $[[T5:[0-9]+]], $7, $[[T4]]
-; MMR3: movz $[[T6:[0-9]+]], $[[T5]], $[[T3]]
-; MMR3: lw $[[T7:[0-8]+]], 44($sp)
-; MMR3: subu16 $[[T8:[0-9]+]], $5, $[[T7]]
-; MMR3: subu16 $[[T9:[0-9]+]], $[[T8]], $[[T6]]
-; MMR3: sltu $[[T10:[0-9]+]], $[[T8]], $[[T2]]
-; MMR3: sltu $[[T11:[0-9]+]], $5, $[[T7]]
-; MMR3: lw $[[T12:[0-9]+]], 40($sp)
-; MMR3: lw $[[T13:[0-9]+]], 12($sp)
-; MMR3: subu16 $[[T14:[0-9]+]], $[[T13]], $[[T12]]
-; MMR3: subu16 $[[T15:[0-9]+]], $[[T14]], $[[T11]]
-; MMR3: subu16 $[[T16:[0-9]+]], $[[T15]], $[[T10]]
-; MMR3: subu16 $[[T17:[0-9]+]], $6, $[[T1]]
-; MMR3: subu16 $[[T18:[0-9]+]], $[[T17]], $7
-; MMR3: lw $[[T19:[0-9]+]], 8($sp)
-; MMR3: lw $[[T20:[0-9]+]], 0($sp)
-; MMR3: subu16 $5, $[[T19]], $[[T20]]
-
-; MMR6: move $[[T0:[0-9]+]], $7
-; MMR6: sw $[[T0]], 8($sp)
-; MMR6: move $[[T1:[0-9]+]], $5
-; MMR6: sw $4, 12($sp)
-; MMR6: lw $[[T2:[0-9]+]], 48($sp)
-; MMR6: sltu $[[T3:[0-9]+]], $6, $[[T2]]
-; MMR6: xor $[[T4:[0-9]+]], $6, $[[T2]]
-; MMR6: sltiu $[[T5:[0-9]+]], $[[T4]], 1
-; MMR6: seleqz $[[T6:[0-9]+]], $[[T3]], $[[T5]]
-; MMR6: lw $[[T7:[0-9]+]], 52($sp)
-; MMR6: sltu $[[T8:[0-9]+]], $[[T0]], $[[T7]]
-; MMR6: selnez $[[T9:[0-9]+]], $[[T8]], $[[T5]]
-; MMR6: or $[[T10:[0-9]+]], $[[T9]], $[[T6]]
-; MMR6: lw $[[T11:[0-9]+]], 44($sp)
-; MMR6: subu16 $[[T12:[0-9]+]], $[[T1]], $[[T11]]
-; MMR6: subu16 $[[T13:[0-9]+]], $[[T12]], $[[T7]]
-; MMR6: sltu $[[T16:[0-9]+]], $[[T12]], $[[T7]]
-; MMR6: sltu $[[T17:[0-9]+]], $[[T1]], $[[T11]]
-; MMR6: lw $[[T18:[0-9]+]], 40($sp)
-; MMR6: lw $[[T19:[0-9]+]], 12($sp)
-; MMR6: subu16 $[[T20:[0-9]+]], $[[T19]], $[[T18]]
-; MMR6: subu16 $[[T21:[0-9]+]], $[[T20]], $[[T17]]
-; MMR6: subu16 $[[T22:[0-9]+]], $[[T21]], $[[T16]]
-; MMR6: subu16 $[[T23:[0-9]+]], $6, $[[T2]]
-; MMR6: subu16 $4, $[[T23]], $5
-; MMR6: lw $[[T24:[0-9]+]], 8($sp)
-; MMR6: lw $[[T25:[0-9]+]], 0($sp)
-; MMR6: subu16 $5, $[[T24]], $[[T25]]
-; MMR6: lw $3, 4($sp)
-
-; FIXME: The sltu, dsll, dsrl pattern here occurs when an i32 is zero
-; extended to 64 bits. Fortunately slt(i)(u) actually gives an i1.
-; These should be combined away.
-
-; GP64-NOT-R2: dsubu $1, $4, $6
-; GP64-NOT-R2: sltu $[[T0:[0-9]+]], $5, $7
-; GP64-NOT-R2: dsll $[[T1:[0-9]+]], $[[T0]], 32
-; GP64-NOT-R2: dsrl $[[T2:[0-9]+]], $[[T1]], 32
-; GP64-NOT-R2: dsubu $2, $1, $[[T2]]
-; GP64-NOT-R2: dsubu $3, $5, $7
-
-; FIXME: Likewise for the sltu, dext here.
-
-; GP64-R2: dsubu $1, $4, $6
-; GP64-R2: sltu $[[T0:[0-9]+]], $5, $7
-; GP64-R2: dext $[[T1:[0-9]+]], $[[T0]], 0, 32
-; GP64-R2: dsubu $2, $1, $[[T1]]
-; GP64-R2: dsubu $3, $5, $7
-
-; FIXME: Again, redundant sign extension. Also, microMIPSR6 has the
-; dext instruction which should be used here.
-
-; MM64: dsubu $[[T0:[0-9]+]], $4, $6
-; MM64: sltu $[[T1:[0-9]+]], $5, $7
-; MM64: dsll $[[T2:[0-9]+]], $[[T1]], 32
-; MM64: dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; MM64: dsubu $2, $[[T0]], $[[T3]]
-; MM64: dsubu $3, $5, $7
-; MM64: jr $ra
+ ; GP32-NOT-MM: lw $[[T0:[0-9]+]], 20($sp)
+ ; GP32-NOT-MM: sltu $[[T1:[0-9]+]], $5, $[[T0]]
+ ; GP32-NOT-MM: lw $[[T2:[0-9]+]], 16($sp)
+ ; GP32-NOT-MM: addu $[[T3:[0-9]+]], $[[T1]], $[[T2]]
+ ; GP32-NOT-MM: lw $[[T4:[0-9]+]], 24($sp)
+ ; GP32-NOT-MM: lw $[[T5:[0-9]+]], 28($sp)
+ ; GP32-NOT-MM: subu $[[T6:[0-9]+]], $7, $[[T5]]
+ ; GP32-NOT-MM: subu $2, $4, $[[T3]]
+ ; GP32-NOT-MM: sltu $[[T8:[0-9]+]], $6, $[[T4]]
+ ; GP32-NOT-MM: addu $[[T9:[0-9]+]], $[[T8]], $[[T0]]
+ ; GP32-NOT-MM: subu $3, $5, $[[T9]]
+ ; GP32-NOT-MM: sltu $[[T10:[0-9]+]], $7, $[[T5]]
+ ; GP32-NOT-MM: addu $[[T11:[0-9]+]], $[[T10]], $[[T4]]
+ ; GP32-NOT-MM: subu $4, $6, $[[T11]]
+ ; GP32-NOT-MM: move $5, $[[T6]]
+
+ ; GP32-MM: lw $[[T0:[0-9]+]], 20($sp)
+ ; GP32-MM: sltu $[[T1:[0-9]+]], $[[T2:[0-9]+]], $[[T0]]
+ ; GP32-MM: lw $[[T3:[0-9]+]], 16($sp)
+ ; GP32-MM: addu $[[T3]], $[[T1]], $[[T3]]
+ ; GP32-MM: lw $[[T4:[0-9]+]], 24($sp)
+ ; GP32-MM: lw $[[T5:[0-9]+]], 28($sp)
+ ; GP32-MM: subu $[[T1]], $7, $[[T5]]
+ ; GP32-MM: subu16 $[[T3]], $[[T6:[0-9]+]], $[[T3]]
+ ; GP32-MM: sltu $[[T6]], $6, $[[T4]]
+ ; GP32-MM: addu16 $[[T0]], $[[T6]], $[[T0]]
+ ; GP32-MM: subu16 $[[T0]], $5, $[[T0]]
+ ; GP32-MM: sltu $[[T6]], $7, $[[T5]]
+ ; GP32-MM: addu $[[T6]], $[[T6]], $[[T4]]
+ ; GP32-MM: subu16 $[[T6]], $6, $[[T6]]
+ ; GP32-MM: move $[[T2]], $[[T1]]
+
+ ; GP64: dsubu $3, $5, $7
+ ; GP64: sltu $[[T0:[0-9]+]], $5, $7
+ ; GP64: daddu $[[T1:[0-9]+]], $[[T0]], $6
+ ; GP64: dsubu $2, $4, $[[T1]]
%r = sub i128 %a, %b
ret i128 %r
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
index 3e1a2e8b9708..7baba005a072 100644
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -25,11 +25,11 @@
; 32R6-DAG: mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
; 32R6-DAG: addu $[[T1:[0-9]+]], $[[T0]], $6
-; 32R6-DAG: sltu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG: muh $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG: sra $[[T4:[0-9]+]], $6, 31
-; 32R6-DAG: addu $[[T5:[0-9]+]], $[[T3]], $[[T4]]
-; 32R6-DAG: addu $2, $[[T5]], $[[T2]]
+; 32R6-DAG: sltu $[[T2:[0-9]+]], $[[T1]], $6
+; 32R6-DAG: sra $[[T3:[0-9]+]], $6, 31
+; 32R6-DAG: addu $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+; 32R6-DAG: muh $[[T5:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG: addu $2, $[[T5]], $[[T4]]
; 64-DAG: sll $[[T0:[0-9]+]], $4, 0
; 64-DAG: sll $[[T1:[0-9]+]], $5, 0
@@ -71,7 +71,7 @@ entry:
; 32R6-DAG: mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
; 32R6-DAG: addu $[[T1:[0-9]+]], $[[T0]], $6
-; 32R6-DAG: sltu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-DAG: sltu $[[T2:[0-9]+]], $[[T1]], $6
; FIXME: There's a redundant move here. We should remove it
; 32R6-DAG: muhu $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}}
; 32R6-DAG: addu $2, $[[T3]], $[[T2]]
@@ -109,10 +109,10 @@ entry:
; 32R6-DAG: mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
; 32R6-DAG: addu $[[T1:[0-9]+]], $[[T0]], $7
-; 32R6-DAG: sltu $[[T2:[0-9]+]], $[[T1]], $1
-; 32R6-DAG: muh $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG: addu $[[T4:[0-9]+]], $[[T3]], $6
-; 32R6-DAG: addu $2, $[[T4]], $[[T2]]
+; 32R6-DAG: sltu $[[T2:[0-9]+]], $[[T1]], $7
+; 32R6-DAG: addu $[[T4:[0-9]+]], $[[T2]], $6
+; 32R6-DAG: muh $[[T5:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG: addu $2, $[[T5]], $[[T4]]
; 64-DAG: sll $[[T0:[0-9]+]], $4, 0
; 64-DAG: sll $[[T1:[0-9]+]], $5, 0
@@ -134,17 +134,6 @@ entry:
ret i64 %add
}
-; ALL-LABEL: madd4
-; ALL-NOT: madd ${{[0-9]+}}, ${{[0-9]+}}
-
-define i32 @madd4(i32 %a, i32 %b, i32 %c) {
-entry:
- %mul = mul nsw i32 %a, %b
- %add = add nsw i32 %c, %mul
-
- ret i32 %add
-}
-
; ALL-LABEL: msub1:
; 32-DAG: sra $[[T0:[0-9]+]], $6, 31
@@ -159,13 +148,13 @@ entry:
; DSP-DAG: mfhi $2, $[[AC]]
; DSP-DAG: mflo $3, $[[AC]]
-; 32R6-DAG: mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG: sltu $[[T1:[0-9]+]], $6, $[[T0]]
-; 32R6-DAG: muh $[[T2:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG: sra $[[T3:[0-9]+]], $6, 31
-; 32R6-DAG: subu $[[T4:[0-9]+]], $[[T3]], $[[T2]]
-; 32R6-DAG: subu $2, $[[T4]], $[[T1]]
-; 32R6-DAG: subu $3, $6, $[[T0]]
+; 32R6-DAG: muh $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG: mul $[[T1:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG: sltu $[[T3:[0-9]+]], $6, $[[T1]]
+; 32R6-DAG: addu $[[T4:[0-9]+]], $[[T3]], $[[T0]]
+; 32R6-DAG: sra $[[T5:[0-9]+]], $6, 31
+; 32R6-DAG: subu $2, $[[T5]], $[[T4]]
+; 32R6-DAG: subu $3, $6, $[[T1]]
; 64-DAG: sll $[[T0:[0-9]+]], $4, 0
; 64-DAG: sll $[[T1:[0-9]+]], $5, 0
@@ -205,12 +194,13 @@ entry:
; DSP-DAG: mfhi $2, $[[AC]]
; DSP-DAG: mflo $3, $[[AC]]
-; 32R6-DAG: mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG: sltu $[[T1:[0-9]+]], $6, $[[T0]]
-; 32R6-DAG: muhu $[[T2:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG: negu $[[T3:[0-9]+]], $[[T2]]
-; 32R6-DAG: subu $2, $[[T3]], $[[T1]]
-; 32R6-DAG: subu $3, $6, $[[T0]]
+; 32R6-DAG: muhu $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG: mul $[[T1:[0-9]+]], ${{[45]}}, ${{[45]}}
+
+; 32R6-DAG: sltu $[[T2:[0-9]+]], $6, $[[T1]]
+; 32R6-DAG: addu $[[T3:[0-9]+]], $[[T2]], $[[T0]]
+; 32R6-DAG: negu $2, $[[T3]]
+; 32R6-DAG: subu $3, $6, $[[T1]]
; 64-DAG: d[[m:m]]ult $5, $4
; 64-DAG: [[m]]flo $[[T0:[0-9]+]]
@@ -244,12 +234,12 @@ entry:
; DSP-DAG: mfhi $2, $[[AC]]
; DSP-DAG: mflo $3, $[[AC]]
-; 32R6-DAG: mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG: sltu $[[T1:[0-9]+]], $7, $[[T0]]
-; 32R6-DAG: muh $[[T2:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG: subu $[[T3:[0-9]+]], $6, $[[T2]]
-; 32R6-DAG: subu $2, $[[T3]], $[[T1]]
-; 32R6-DAG: subu $3, $7, $[[T0]]
+; 32R6-DAG: muh $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG: mul $[[T1:[0-9]+]], ${{[45]}}, ${{[45]}}
+; 32R6-DAG: sltu $[[T2:[0-9]+]], $7, $[[T1]]
+; 32R6-DAG: addu $[[T3:[0-9]+]], $[[T2]], $[[T0]]
+; 32R6-DAG: subu $2, $6, $[[T3]]
+; 32R6-DAG: subu $3, $7, $[[T1]]
; 64-DAG: sll $[[T0:[0-9]+]], $4, 0
; 64-DAG: sll $[[T1:[0-9]+]], $5, 0
@@ -270,14 +260,3 @@ entry:
%sub = sub nsw i64 %c, %mul
ret i64 %sub
}
-
-; ALL-LABEL: msub4
-; ALL-NOT: msub ${{[0-9]+}}, ${{[0-9]+}}
-
-define i32 @msub4(i32 %a, i32 %b, i32 %c) {
-entry:
- %mul = mul nsw i32 %a, %b
- %sub = sub nsw i32 %c, %mul
-
- ret i32 %sub
-}
diff --git a/test/CodeGen/NVPTX/lower-aggr-copies.ll b/test/CodeGen/NVPTX/lower-aggr-copies.ll
index 192d4becb059..f522c6722ee6 100644
--- a/test/CodeGen/NVPTX/lower-aggr-copies.ll
+++ b/test/CodeGen/NVPTX/lower-aggr-copies.ll
@@ -17,6 +17,8 @@ entry:
ret i8* %dst
; IR-LABEL: @memcpy_caller
+; IR: [[CMPREG:%[0-9]+]] = icmp eq i64 0, %n
+; IR: br i1 [[CMPREG]], label %split, label %loadstoreloop
; IR: loadstoreloop:
; IR: [[LOADPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64
; IR-NEXT: [[VAL:%[0-9]+]] = load i8, i8* [[LOADPTR]]
@@ -73,6 +75,8 @@ entry:
; IR-LABEL: @memset_caller
; IR: [[VAL:%[0-9]+]] = trunc i32 %c to i8
+; IR: [[CMPREG:%[0-9]+]] = icmp eq i64 0, %n
+; IR: br i1 [[CMPREG]], label %split, label %loadstoreloop
; IR: loadstoreloop:
; IR: [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64
; IR-NEXT: store i8 [[VAL]], i8* [[STOREPTR]]
diff --git a/test/CodeGen/PowerPC/anon_aggr.ll b/test/CodeGen/PowerPC/anon_aggr.ll
index 9b32a8f55f34..2c1735844477 100644
--- a/test/CodeGen/PowerPC/anon_aggr.ll
+++ b/test/CodeGen/PowerPC/anon_aggr.ll
@@ -1,6 +1,6 @@
; RUN: llc -verify-machineinstrs -O0 -mcpu=ppc64 -mtriple=powerpc64-unknown-linux-gnu -fast-isel=false < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -O0 -mcpu=g4 -mtriple=powerpc-apple-darwin8 < %s | FileCheck -check-prefix=DARWIN32 %s
-; RUN: llc -verify-machineinstrs -O0 -mcpu=ppc970 -mtriple=powerpc64-apple-darwin8 < %s | FileCheck -check-prefix=DARWIN64 %s
+; RUN: llc -verify-machineinstrs -O0 -mcpu=970 -mtriple=powerpc64-apple-darwin8 < %s | FileCheck -check-prefix=DARWIN64 %s
; Test case for PR 14779: anonymous aggregates are not handled correctly.
; Darwin bug report PR 15821 is similar.
@@ -22,7 +22,7 @@ unequal:
; CHECK-LABEL: func1:
; CHECK: cmpld {{([0-9]+,)?}}4, 5
-; CHECK-DAG: std 4, -[[OFFSET1:[0-9]+]]
+; CHECK-DAG: std 3, -[[OFFSET1:[0-9]+]]
; CHECK-DAG: std 5, -[[OFFSET2:[0-9]+]]
; CHECK: ld 3, -[[OFFSET1]](1)
; CHECK: ld 3, -[[OFFSET2]](1)
@@ -31,19 +31,19 @@ unequal:
; DARWIN32: mr
; DARWIN32: mr r[[REG1:[0-9]+]], r[[REGA:[0-9]+]]
; DARWIN32: mr r[[REG2:[0-9]+]], r[[REGB:[0-9]+]]
-; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REGA]], r[[REGB]]
+; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REGB]], r[[REGA]]
; DARWIN32: stw r[[REG1]], -[[OFFSET1:[0-9]+]]
; DARWIN32: stw r[[REG2]], -[[OFFSET2:[0-9]+]]
-; DARWIN32: lwz r3, -[[OFFSET1]]
; DARWIN32: lwz r3, -[[OFFSET2]]
+; DARWIN32: lwz r3, -[[OFFSET1]]
; DARWIN64: _func1:
; DARWIN64: mr
; DARWIN64: mr r[[REG1:[0-9]+]], r[[REGA:[0-9]+]]
; DARWIN64: mr r[[REG2:[0-9]+]], r[[REGB:[0-9]+]]
-; DARWIN64: cmpld {{(cr[0-9]+,)?}}r[[REGA]], r[[REGB]]
-; DARWIN64: std r[[REG1]], -[[OFFSET1:[0-9]+]]
-; DARWIN64: std r[[REG2]], -[[OFFSET2:[0-9]+]]
+; DARWIN64: cmpld {{(cr[0-9]+,)?}}r[[REGB]], r[[REGA]]
+; DARWIN64: std r[[REG1]], -[[OFFSET2:[0-9]+]]
+; DARWIN64: std r[[REG2]], -[[OFFSET1:[0-9]+]]
; DARWIN64: ld r3, -[[OFFSET1]]
; DARWIN64: ld r3, -[[OFFSET2]]
@@ -61,19 +61,19 @@ unequal:
ret i8* %array2_ptr
}
; CHECK-LABEL: func2:
-; CHECK: cmpld {{([0-9]+,)?}}4, 6
+; CHECK-DAG: cmpld {{([0-9]+,)?}}4, 6
; CHECK-DAG: std 6, 72(1)
; CHECK-DAG: std 5, 64(1)
; CHECK-DAG: std 6, -[[OFFSET1:[0-9]+]]
-; CHECK-DAG: std 4, -[[OFFSET2:[0-9]+]]
+; CHECK-DAG: std 5, -[[OFFSET2:[0-9]+]]
; CHECK: ld 3, -[[OFFSET2]](1)
; CHECK: ld 3, -[[OFFSET1]](1)
; DARWIN32-LABEL: _func2
-; DARWIN32-DAG: addi r[[REG8:[0-9]+]], r[[REGSP:[0-9]+]], 36
-; DARWIN32-DAG: lwz r[[REG2:[0-9]+]], 44(r[[REGSP]])
; DARWIN32: mr
+; DARWIN32: addi r[[REG8:[0-9]+]], r[[REGSP:[0-9]+]], 36
; DARWIN32: mr r[[REG7:[0-9]+]], r5
+; DARWIN32: lwz r[[REG2:[0-9]+]], 44(r[[REGSP]])
; DARWIN32-DAG: cmplw {{(cr[0-9]+,)?}}r5, r[[REG2]]
; DARWIN32-DAG: stw r[[REG7]], -[[OFFSET1:[0-9]+]]
; DARWIN32-DAG: stw r[[REG2]], -[[OFFSET2:[0-9]+]]
@@ -82,9 +82,9 @@ unequal:
; DARWIN64: _func2:
-; DARWIN64: ld r[[REG2:[0-9]+]], 72(r1)
; DARWIN64: mr
; DARWIN64: mr r[[REG3:[0-9]+]], r[[REGA:[0-9]+]]
+; DARWIN64: ld r[[REG2:[0-9]+]], 72(r1)
; DARWIN64: cmpld {{(cr[0-9]+,)?}}r[[REGA]], r[[REG2]]
; DARWIN64: std r[[REG2]], -[[OFFSET2:[0-9]+]]
; DARWIN64: std r[[REG3]], -[[OFFSET1:[0-9]+]]
@@ -107,9 +107,9 @@ unequal:
}
; CHECK-LABEL: func3:
-; CHECK: cmpld {{([0-9]+,)?}}4, 6
-; CHECK-DAG: std 4, -[[OFFSET2:[0-9]+]](1)
-; CHECK-DAG: std 6, -[[OFFSET1:[0-9]+]](1)
+; CHECK-DAG: cmpld {{([0-9]+,)?}}3, 4
+; CHECK-DAG: std 3, -[[OFFSET2:[0-9]+]](1)
+; CHECK-DAG: std 4, -[[OFFSET1:[0-9]+]](1)
; CHECK: ld 3, -[[OFFSET2]](1)
; CHECK: ld 3, -[[OFFSET1]](1)
@@ -127,13 +127,13 @@ unequal:
; DARWIN32-DAG: lwz r3, -[[OFFSET2:[0-9]+]]
; DARWIN64: _func3:
-; DARWIN64: ld r[[REG3:[0-9]+]], 72(r1)
-; DARWIN64: ld r[[REG4:[0-9]+]], 56(r1)
+; DARWIN64-DAG: ld r[[REG3:[0-9]+]], 72(r1)
+; DARWIN64-DAG: ld r[[REG4:[0-9]+]], 56(r1)
; DARWIN64: cmpld {{(cr[0-9]+,)?}}r[[REG4]], r[[REG3]]
-; DARWIN64: std r[[REG3]], -[[OFFSET1:[0-9]+]]
-; DARWIN64: std r[[REG4]], -[[OFFSET2:[0-9]+]]
-; DARWIN64: ld r3, -[[OFFSET2]]
+; DARWIN64: std r[[REG4]], -[[OFFSET1:[0-9]+]]
+; DARWIN64: std r[[REG3]], -[[OFFSET2:[0-9]+]]
; DARWIN64: ld r3, -[[OFFSET1]]
+; DARWIN64: ld r3, -[[OFFSET2]]
define i8* @func4(i64 %p1, i64 %p2, i64 %p3, i64 %p4,
@@ -152,31 +152,31 @@ unequal:
}
; CHECK-LABEL: func4:
-; CHECK: ld [[REG3:[0-9]+]], 136(1)
-; CHECK: ld [[REG2:[0-9]+]], 120(1)
-; CHECK: cmpld {{([0-9]+,)?}}[[REG2]], [[REG3]]
-; CHECK: std [[REG3]], -[[OFFSET2:[0-9]+]](1)
+; CHECK-DAG: ld [[REG2:[0-9]+]], 120(1)
+; CHECK-DAG: ld [[REG3:[0-9]+]], 136(1)
+; CHECK-DAG: cmpld {{([0-9]+,)?}}[[REG2]], [[REG3]]
; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]](1)
+; CHECK: std [[REG3]], -[[OFFSET2:[0-9]+]](1)
; CHECK: ld 3, -[[OFFSET1]](1)
; CHECK: ld 3, -[[OFFSET2]](1)
; DARWIN32: _func4:
; DARWIN32: lwz r[[REG4:[0-9]+]], 96(r1)
; DARWIN32: addi r[[REG1:[0-9]+]], r1, 100
-; DARWIN32: lwz r[[REG3:[0-9]+]], 108(r1)
; DARWIN32: mr r[[REG2:[0-9]+]], r[[REG4]]
+; DARWIN32: lwz r[[REG3:[0-9]+]], 108(r1)
; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REG4]], r[[REG3]]
-; DARWIN32: stw r[[REG2]], -[[OFFSET1:[0-9]+]]
-; DARWIN32: stw r[[REG3]], -[[OFFSET2:[0-9]+]]
-; DARWIN32: lwz r[[REG1]], -[[OFFSET1]]
-; DARWIN32: lwz r[[REG1]], -[[OFFSET2]]
+; DARWIN32-DAG: stw r[[REG2]], -[[OFFSET1:[0-9]+]]
+; DARWIN32-DAG: stw r[[REG3]], -[[OFFSET2:[0-9]+]]
+; DARWIN32: lwz r3, -[[OFFSET1]]
+; DARWIN32: lwz r3, -[[OFFSET2]]
; DARWIN64: _func4:
; DARWIN64: ld r[[REG2:[0-9]+]], 120(r1)
-; DARWIN64: ld r[[REG3:[0-9]+]], 136(r1)
-; DARWIN64: mr r[[REG4:[0-9]+]], r[[REG2]]
+; DARWIN64-DAG: ld r[[REG3:[0-9]+]], 136(r1)
+; DARWIN64-DAG: mr r[[REG4:[0-9]+]], r[[REG2]]
; DARWIN64: cmpld {{(cr[0-9]+,)?}}r[[REG2]], r[[REG3]]
-; DARWIN64: std r[[REG4]], -[[OFFSET1:[0-9]+]]
; DARWIN64: std r[[REG3]], -[[OFFSET2:[0-9]+]]
+; DARWIN64: std r[[REG4]], -[[OFFSET1:[0-9]+]]
; DARWIN64: ld r3, -[[OFFSET1]]
; DARWIN64: ld r3, -[[OFFSET2]]
diff --git a/test/CodeGen/PowerPC/floatPSA.ll b/test/CodeGen/PowerPC/floatPSA.ll
index ccda9d56a147..73dea19adbd5 100644
--- a/test/CodeGen/PowerPC/floatPSA.ll
+++ b/test/CodeGen/PowerPC/floatPSA.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -O0 -mtriple=powerpc64-unknown-linux-gnu -fast-isel=false < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -O2 -mtriple=powerpc64-unknown-linux-gnu -fast-isel=false < %s | FileCheck %s
; This verifies that single-precision floating point values that can't
; be passed in registers are stored in the rightmost word of the parameter
diff --git a/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
index d398dfe7fc92..059665adc351 100644
--- a/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
+++ b/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
@@ -100,28 +100,26 @@ define signext i32 @zeroEqualityTest04() {
; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest02.buffer2@toc@l
; CHECK-NEXT: ldbrx 3, 0, 6
; CHECK-NEXT: ldbrx 4, 0, 5
-; CHECK-NEXT: subf. 7, 4, 3
+; CHECK-NEXT: cmpld 3, 4
; CHECK-NEXT: bne 0, .LBB3_2
; CHECK-NEXT: # BB#1: # %loadbb1
; CHECK-NEXT: li 4, 8
; CHECK-NEXT: ldbrx 3, 6, 4
; CHECK-NEXT: ldbrx 4, 5, 4
-; CHECK-NEXT: subf. 5, 4, 3
-; CHECK-NEXT: beq 0, .LBB3_4
+; CHECK-NEXT: li 5, 0
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: beq 0, .LBB3_3
; CHECK-NEXT: .LBB3_2: # %res_block
; CHECK-NEXT: cmpld 3, 4
-; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: li 11, 1
; CHECK-NEXT: li 12, -1
-; CHECK-NEXT: isel 3, 12, 3, 0
+; CHECK-NEXT: isel 5, 12, 11, 0
; CHECK-NEXT: .LBB3_3: # %endblock
-; CHECK-NEXT: cmpwi 3, 1
+; CHECK-NEXT: cmpwi 5, 1
; CHECK-NEXT: li 3, 0
; CHECK-NEXT: li 4, 1
; CHECK-NEXT: isel 3, 4, 3, 0
; CHECK-NEXT: blr
-; CHECK-NEXT: .LBB3_4:
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: b .LBB3_3
%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16)
%not.cmp = icmp slt i32 %call, 1
%. = zext i1 %not.cmp to i32
@@ -138,27 +136,25 @@ define signext i32 @zeroEqualityTest05() {
; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest03.buffer2@toc@l
; CHECK-NEXT: ldbrx 3, 0, 6
; CHECK-NEXT: ldbrx 4, 0, 5
-; CHECK-NEXT: subf. 7, 4, 3
+; CHECK-NEXT: cmpld 3, 4
; CHECK-NEXT: bne 0, .LBB4_2
; CHECK-NEXT: # BB#1: # %loadbb1
; CHECK-NEXT: li 4, 8
; CHECK-NEXT: ldbrx 3, 6, 4
; CHECK-NEXT: ldbrx 4, 5, 4
-; CHECK-NEXT: subf. 5, 4, 3
-; CHECK-NEXT: beq 0, .LBB4_4
+; CHECK-NEXT: li 5, 0
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: beq 0, .LBB4_3
; CHECK-NEXT: .LBB4_2: # %res_block
; CHECK-NEXT: cmpld 3, 4
-; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: li 11, 1
; CHECK-NEXT: li 12, -1
-; CHECK-NEXT: isel 3, 12, 3, 0
+; CHECK-NEXT: isel 5, 12, 11, 0
; CHECK-NEXT: .LBB4_3: # %endblock
-; CHECK-NEXT: srwi 3, 3, 31
+; CHECK-NEXT: srwi 3, 5, 31
; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: clrldi 3, 3, 32
; CHECK-NEXT: blr
-; CHECK-NEXT: .LBB4_4:
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: b .LBB4_3
%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16)
%call.lobit = lshr i32 %call, 31
%call.lobit.not = xor i32 %call.lobit, 1
diff --git a/test/CodeGen/PowerPC/memcmp.ll b/test/CodeGen/PowerPC/memcmp.ll
index bae713cb2072..fbaaa8bb74c9 100644
--- a/test/CodeGen/PowerPC/memcmp.ll
+++ b/test/CodeGen/PowerPC/memcmp.ll
@@ -1,87 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECK
-; Check size 8
-; Function Attrs: nounwind readonly
-define signext i32 @test1(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) local_unnamed_addr #0 {
-entry:
- %0 = bitcast i32* %buffer1 to i8*
- %1 = bitcast i32* %buffer2 to i8*
- %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 8) #2
+define signext i32 @memcmp8(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
+; CHECK-LABEL: memcmp8:
+; CHECK: # BB#0:
+; CHECK-NEXT: ldbrx 3, 0, 3
+; CHECK-NEXT: ldbrx 4, 0, 4
+; CHECK-NEXT: li 5, 1
+; CHECK-NEXT: li 12, -1
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: isel 3, 12, 5, 0
+; CHECK-NEXT: isel 3, 0, 3, 2
+; CHECK-NEXT: blr
+ %t0 = bitcast i32* %buffer1 to i8*
+ %t1 = bitcast i32* %buffer2 to i8*
+ %call = tail call signext i32 @memcmp(i8* %t0, i8* %t1, i64 8)
ret i32 %call
-
-; CHECK-LABEL: @test1
-; CHECK: ldbrx [[LOAD1:[0-9]+]]
-; CHECK-NEXT: ldbrx [[LOAD2:[0-9]+]]
-; CHECK-NEXT: li [[LI:[0-9]+]], 1
-; CHECK-NEXT: cmpld [[CMPLD:[0-9]+]], [[LOAD1]], [[LOAD2]]
-; CHECK-NEXT: subf. [[SUB:[0-9]+]], [[LOAD2]], [[LOAD1]]
-; CHECK-NEXT: li [[LI2:[0-9]+]], -1
-; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 4
-; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2
-; CHECK-NEXT: extsw 3, [[ISEL2]]
-; CHECK-NEXT: blr
}
-; Check size 4
-; Function Attrs: nounwind readonly
-define signext i32 @test2(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) local_unnamed_addr #0 {
-entry:
- %0 = bitcast i32* %buffer1 to i8*
- %1 = bitcast i32* %buffer2 to i8*
- %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 4) #2
+define signext i32 @memcmp4(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
+; CHECK-LABEL: memcmp4:
+; CHECK: # BB#0:
+; CHECK-NEXT: lwbrx 3, 0, 3
+; CHECK-NEXT: lwbrx 4, 0, 4
+; CHECK-NEXT: li 5, 1
+; CHECK-NEXT: li 12, -1
+; CHECK-NEXT: cmplw 3, 4
+; CHECK-NEXT: isel 3, 12, 5, 0
+; CHECK-NEXT: isel 3, 0, 3, 2
+; CHECK-NEXT: blr
+ %t0 = bitcast i32* %buffer1 to i8*
+ %t1 = bitcast i32* %buffer2 to i8*
+ %call = tail call signext i32 @memcmp(i8* %t0, i8* %t1, i64 4)
ret i32 %call
-
-; CHECK-LABEL: @test2
-; CHECK: lwbrx [[LOAD1:[0-9]+]]
-; CHECK-NEXT: lwbrx [[LOAD2:[0-9]+]]
-; CHECK-NEXT: li [[LI:[0-9]+]], 1
-; CHECK-NEXT: cmpld [[CMPLD:[0-9]+]], [[LOAD1]], [[LOAD2]]
-; CHECK-NEXT: subf. [[SUB:[0-9]+]], [[LOAD2]], [[LOAD1]]
-; CHECK-NEXT: li [[LI2:[0-9]+]], -1
-; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 4
-; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2
-; CHECK-NEXT: extsw 3, [[ISEL2]]
-; CHECK-NEXT: blr
}
-; Check size 2
-; Function Attrs: nounwind readonly
-define signext i32 @test3(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) local_unnamed_addr #0 {
-entry:
- %0 = bitcast i32* %buffer1 to i8*
- %1 = bitcast i32* %buffer2 to i8*
- %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 2) #2
+define signext i32 @memcmp2(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
+; CHECK-LABEL: memcmp2:
+; CHECK: # BB#0:
+; CHECK-NEXT: lhbrx 3, 0, 3
+; CHECK-NEXT: lhbrx 4, 0, 4
+; CHECK-NEXT: li 5, 1
+; CHECK-NEXT: li 12, -1
+; CHECK-NEXT: cmplw 3, 4
+; CHECK-NEXT: isel 3, 12, 5, 0
+; CHECK-NEXT: isel 3, 0, 3, 2
+; CHECK-NEXT: blr
+ %t0 = bitcast i32* %buffer1 to i8*
+ %t1 = bitcast i32* %buffer2 to i8*
+ %call = tail call signext i32 @memcmp(i8* %t0, i8* %t1, i64 2)
ret i32 %call
-
-; CHECK-LABEL: @test3
-; CHECK: lhbrx [[LOAD1:[0-9]+]]
-; CHECK-NEXT: lhbrx [[LOAD2:[0-9]+]]
-; CHECK-NEXT: li [[LI:[0-9]+]], 1
-; CHECK-NEXT: cmpld [[CMPLD:[0-9]+]], [[LOAD1]], [[LOAD2]]
-; CHECK-NEXT: subf. [[SUB:[0-9]+]], [[LOAD2]], [[LOAD1]]
-; CHECK-NEXT: li [[LI2:[0-9]+]], -1
-; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 4
-; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2
-; CHECK-NEXT: extsw 3, [[ISEL2]]
-; CHECK-NEXT: blr
}
-; Check size 1
-; Function Attrs: nounwind readonly
-define signext i32 @test4(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) local_unnamed_addr #0 {
-entry:
- %0 = bitcast i32* %buffer1 to i8*
- %1 = bitcast i32* %buffer2 to i8*
- %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 1) #2
+define signext i32 @memcmp1(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
+; CHECK-LABEL: memcmp1:
+; CHECK: # BB#0:
+; CHECK-NEXT: lbz 3, 0(3)
+; CHECK-NEXT: lbz 4, 0(4)
+; CHECK-NEXT: li 5, 1
+; CHECK-NEXT: li 12, -1
+; CHECK-NEXT: cmplw 3, 4
+; CHECK-NEXT: isel 3, 12, 5, 0
+; CHECK-NEXT: isel 3, 0, 3, 2
+; CHECK-NEXT: blr
+ %t0 = bitcast i32* %buffer1 to i8*
+ %t1 = bitcast i32* %buffer2 to i8*
+ %call = tail call signext i32 @memcmp(i8* %t0, i8* %t1, i64 1) #2
ret i32 %call
-
-; CHECK-LABEL: @test4
-; CHECK: lbz [[LOAD1:[0-9]+]]
-; CHECK-NEXT: lbz [[LOAD2:[0-9]+]]
-; CHECK-NEXT: subf [[SUB:[0-9]+]], [[LOAD2]], [[LOAD1]]
-; CHECK-NEXT: extsw 3, [[SUB]]
-; CHECK-NEXT: blr
}
-; Function Attrs: nounwind readonly
-declare signext i32 @memcmp(i8*, i8*, i64) #1
+declare signext i32 @memcmp(i8*, i8*, i64)
diff --git a/test/CodeGen/PowerPC/memcmpIR.ll b/test/CodeGen/PowerPC/memcmpIR.ll
index f052cc258df8..55f48ad19a63 100644
--- a/test/CodeGen/PowerPC/memcmpIR.ll
+++ b/test/CodeGen/PowerPC/memcmpIR.ll
@@ -3,48 +3,47 @@
define signext i32 @test1(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
entry:
+ ; CHECK-LABEL: @test1(
; CHECK: [[LOAD1:%[0-9]+]] = load i64, i64*
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
- ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[BSWAP1]], [[BSWAP2]]
- ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
- ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]
+ ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block
; CHECK-LABEL: res_block:{{.*}}
; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64
; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
; CHECK-NEXT: br label %endblock
+ ; CHECK-LABEL: loadbb1:{{.*}}
; CHECK: [[GEP1:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1
; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1
; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[GEP1]]
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[GEP2]]
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
- ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[BSWAP1]], [[BSWAP2]]
- ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
- ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label %endblock
-
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]
+ ; CHECK-NEXT: br i1 [[ICMP]], label %endblock, label %res_block
+ ; CHECK-BE-LABEL: @test1(
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64*
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
- ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[LOAD1]], [[LOAD2]]
- ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
- ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label
+ ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
+ ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block
; CHECK-BE-LABEL: res_block:{{.*}}
; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
; CHECK-BE-NEXT: br label %endblock
+ ; CHECK-BE-LABEL: loadbb1:{{.*}}
; CHECK-BE: [[GEP1:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1
; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1
; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[GEP1]]
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[GEP2]]
- ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[LOAD1]], [[LOAD2]]
- ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
- ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label %endblock
+ ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
+ ; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block
%0 = bitcast i32* %buffer1 to i8*
%1 = bitcast i32* %buffer2 to i8*
@@ -55,33 +54,25 @@ entry:
declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #1
define signext i32 @test2(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
+ ; CHECK-LABEL: @test2(
; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32*
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
- ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64
- ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64
- ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]]
- ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
- ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label %endblock
-
- ; CHECK-LABEL: res_block:{{.*}}
- ; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64
- ; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
- ; CHECK-NEXT: br label %endblock
+ ; CHECK-NEXT: [[CMP1:%[0-9]+]] = icmp ne i32 [[BSWAP1]], [[BSWAP2]]
+ ; CHECK-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[BSWAP1]], [[BSWAP2]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]] = select i1 [[CMP2]], i32 -1, i32 1
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]] = select i1 [[CMP1]], i32 [[SELECT1]], i32 0
+ ; CHECK-NEXT: ret i32 [[SELECT2]]
+ ; CHECK-BE-LABEL: @test2(
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32*
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
- ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64
- ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64
- ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]]
- ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
- ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label %endblock
-
- ; CHECK-BE-LABEL: res_block:{{.*}}
- ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
- ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
- ; CHECK-BE-NEXT: br label %endblock
+ ; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ne i32 [[LOAD1]], [[LOAD2]]
+ ; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]]
+ ; CHECK-BE-NEXT: [[SELECT1:%[0-9]+]] = select i1 [[CMP2]], i32 -1, i32 1
+ ; CHECK-BE-NEXT: [[SELECT2:%[0-9]+]] = select i1 [[CMP1]], i32 [[SELECT1]], i32 0
+ ; CHECK-BE-NEXT: ret i32 [[SELECT2]]
entry:
%0 = bitcast i32* %buffer1 to i8*
@@ -95,35 +86,35 @@ define signext i32 @test3(i32* nocapture readonly %buffer1, i32* nocapture reado
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
- ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[BSWAP1]], [[BSWAP2]]
- ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
- ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]
+ ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block
; CHECK-LABEL: res_block:{{.*}}
; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64
; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
; CHECK-NEXT: br label %endblock
+ ; CHECK-LABEL: loadbb1:{{.*}}
; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32*
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64
; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64
- ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]]
- ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
- ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
+ ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block
+ ; CHECK-LABEL: loadbb2:{{.*}}
; CHECK: [[LOAD1:%[0-9]+]] = load i16, i16*
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16*
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD1]])
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD2]])
; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[BSWAP1]] to i64
; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[BSWAP2]] to i64
- ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]]
- ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
- ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
+ ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block
+ ; CHECK-LABEL: loadbb3:{{.*}}
; CHECK: [[LOAD1:%[0-9]+]] = load i8, i8*
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8*
; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32
@@ -133,9 +124,8 @@ define signext i32 @test3(i32* nocapture readonly %buffer1, i32* nocapture reado
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64*
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
- ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[LOAD1]], [[LOAD2]]
- ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
- ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label
+ ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
+ ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block
; CHECK-BE-LABEL: res_block:{{.*}}
; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
@@ -146,17 +136,15 @@ define signext i32 @test3(i32* nocapture readonly %buffer1, i32* nocapture reado
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64
; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64
- ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]]
- ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
- ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label
+ ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
+ ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i16, i16*
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16*
; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[LOAD1]] to i64
; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[LOAD2]] to i64
- ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]]
- ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
- ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label
+ ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
+ ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i8, i8*
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8*
diff --git a/test/CodeGen/PowerPC/merge_stores_dereferenceable.ll b/test/CodeGen/PowerPC/merge_stores_dereferenceable.ll
new file mode 100644
index 000000000000..29aee7a3825f
--- /dev/null
+++ b/test/CodeGen/PowerPC/merge_stores_dereferenceable.ll
@@ -0,0 +1,24 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; This code causes an assertion failure if dereferenceable flag is not properly set when in merging consecutive stores
+; CHECK-LABEL: func:
+; CHECK: lxvd2x [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOT: lxvd2x
+; CHECK: stxvd2x [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+
+define <2 x i64> @func(i64* %pdst) {
+entry:
+ %a = alloca [4 x i64], align 8
+ %psrc0 = bitcast [4 x i64]* %a to i64*
+ %psrc1 = getelementptr inbounds i64, i64* %psrc0, i64 1
+ %d0 = load i64, i64* %psrc0
+ %d1 = load i64, i64* %psrc1
+ %pdst0 = getelementptr inbounds i64, i64* %pdst, i64 0
+ %pdst1 = getelementptr inbounds i64, i64* %pdst, i64 1
+ store i64 %d0, i64* %pdst0, align 8
+ store i64 %d1, i64* %pdst1, align 8
+ %psrcd = bitcast [4 x i64]* %a to <2 x i64>*
+ %vec = load <2 x i64>, <2 x i64>* %psrcd
+ ret <2 x i64> %vec
+}
+
diff --git a/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/test/CodeGen/PowerPC/ppc64-align-long-double.ll
index d59dc64dcf85..ba56dbaa83d0 100644
--- a/test/CodeGen/PowerPC/ppc64-align-long-double.ll
+++ b/test/CodeGen/PowerPC/ppc64-align-long-double.ll
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=-vsx < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-P9 %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O2 -fast-isel=false -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O2 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-P9 %s
; Verify internal alignment of long double in a struct. The double
; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain
@@ -44,9 +44,9 @@ entry:
; CHECK-VSX-DAG: std 3, 48(1)
; CHECK-VSX-DAG: std 5, -16(1)
; CHECK-VSX-DAG: std 6, -8(1)
-; CHECK-VSX: addi 3, 1, -16
-; CHECK-VSX: lxsdx 1, 0, 3
-; CHECK-VSX: addi 3, 1, -8
+; CHECK-VSX-DAG: addi [[REG1:[0-9]+]], 1, -16
+; CHECK-VSX-DAG: addi 3, 1, -8
+; CHECK-VSX: lxsdx 1, 0, [[REG1]]
; CHECK-VSX: lxsdx 2, 0, 3
; FIXME-VSX: addi 4, 1, 48
@@ -54,9 +54,9 @@ entry:
; FIXME-VSX: li 3, 24
; FIXME-VSX: lxsdx 2, 4, 3
-; CHECK-P9: std 6, 72(1)
-; CHECK-P9: std 5, 64(1)
-; CHECK-P9: std 4, 56(1)
-; CHECK-P9: std 3, 48(1)
-; CHECK-P9: mtvsrd 1, 5
-; CHECK-P9: mtvsrd 2, 6
+; CHECK-P9-DAG: std 6, 72(1)
+; CHECK-P9-DAG: std 5, 64(1)
+; CHECK-P9-DAG: std 4, 56(1)
+; CHECK-P9-DAG: std 3, 48(1)
+; CHECK-P9-DAG: mtvsrd 1, 5
+; CHECK-P9-DAG: mtvsrd 2, 6
diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll
index 55df71b53761..63f498c1662c 100644
--- a/test/CodeGen/PowerPC/tls.ll
+++ b/test/CodeGen/PowerPC/tls.ll
@@ -11,8 +11,8 @@ target triple = "powerpc64-unknown-linux-gnu"
define i32 @localexec() nounwind {
entry:
;OPT0: addis [[REG1:[0-9]+]], 13, a@tprel@ha
-;OPT0-NEXT: li [[REG2:[0-9]+]], 42
;OPT0-NEXT: addi [[REG1]], [[REG1]], a@tprel@l
+;OPT0-NEXT: li [[REG2:[0-9]+]], 42
;OPT0: stw [[REG2]], 0([[REG1]])
;OPT1: addis [[REG1:[0-9]+]], 13, a@tprel@ha
;OPT1-NEXT: li [[REG2:[0-9]+]], 42
diff --git a/test/CodeGen/PowerPC/tls_get_addr_fence1.mir b/test/CodeGen/PowerPC/tls_get_addr_fence1.mir
new file mode 100644
index 000000000000..fa8e73e321dd
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls_get_addr_fence1.mir
@@ -0,0 +1,66 @@
+# ADJCALLSTACKDOWN and ADJCALLSTACKUP must be generated around TLS pseudo code as scheduling fence (PR25839).
+# RUN: llc -mtriple=powerpc64le-linux-gnu -run-pass=ppc-tls-dynamic-call -verify-machineinstrs -o - %s | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-i64:64-n32:64"
+ target triple = "powerpc64le-unknown-linux-gnu"
+
+ @tls_var = external thread_local local_unnamed_addr global i32
+
+ define i32 @tls_func() local_unnamed_addr {
+ entry:
+ %0 = load i32, i32* @tls_var
+ ret i32 %0
+ }
+
+...
+---
+name: tls_func
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 2, class: g8rc, preferred-register: '' }
+liveins:
+ - { reg: '%x2' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+stack:
+constants:
+body: |
+ bb.0.entry:
+ liveins: %x2
+ %0 = ADDIStlsgdHA %x2, @tls_var
+ %1 = ADDItlsgdLADDR killed %0, @tls_var, @tls_var, implicit-def dead %x0, implicit-def dead %x3, implicit-def dead %x4, implicit-def dead %x5, implicit-def dead %x6, implicit-def dead %x7, implicit-def dead %x8, implicit-def dead %x9, implicit-def dead %x10, implicit-def dead %x11, implicit-def dead %x12, implicit-def dead %lr8, implicit-def dead %ctr8, implicit-def dead %cr0, implicit-def dead %cr1, implicit-def dead %cr5, implicit-def dead %cr6, implicit-def dead %cr7
+ %2 = LWZ8 0, killed %1 :: (dereferenceable load 4 from @tls_var)
+ %x3 = COPY %2
+ BLR8 implicit %lr8, implicit %rm, implicit %x3
+ ; CHECK-LABEL: bb.0.entry
+ ; CHECK: %[[reg1:[0-9]+]] = ADDIStlsgdHA %x2, @tls_var
+ ; CHECK: ADJCALLSTACKDOWN 0, 0
+ ; CHECK: %x3 = ADDItlsgdL %[[reg1]], @tls_var
+ ; CHECK: %x3 = GETtlsADDR %x3, @tls_var
+ ; CHECK: ADJCALLSTACKUP 0, 0
+ ; CHECK: BLR8
+...
diff --git a/test/CodeGen/PowerPC/tls_get_addr_fence2.mir b/test/CodeGen/PowerPC/tls_get_addr_fence2.mir
new file mode 100644
index 000000000000..2bb88147fcf4
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls_get_addr_fence2.mir
@@ -0,0 +1,65 @@
+# ADJCALLSTACKDOWN and ADJCALLSTACKUP should not be generated around TLS pseudo code if it is located within existing ADJCALLSTACKDOWN/ADJCALLSTACKUP pair.
+# RUN: llc -mtriple=powerpc64le-linux-gnu -run-pass=ppc-tls-dynamic-call -verify-machineinstrs -o - %s | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-i64:64-n32:64"
+ target triple = "powerpc64le-unknown-linux-gnu"
+
+ @tls_var = external thread_local local_unnamed_addr global i32
+
+ define i32 @tls_func() local_unnamed_addr {
+ entry:
+ %0 = load i32, i32* @tls_var
+ ret i32 %0
+ }
+
+...
+---
+name: tls_func
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 2, class: g8rc, preferred-register: '' }
+liveins:
+ - { reg: '%x2' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+stack:
+constants:
+body: |
+ bb.0.entry:
+ liveins: %x2
+ ADJCALLSTACKDOWN 32, 0, implicit-def %r1, implicit %r1
+ %0 = ADDIStlsgdHA %x2, @tls_var
+ %1 = ADDItlsgdLADDR killed %0, @tls_var, @tls_var, implicit-def dead %x0, implicit-def dead %x3, implicit-def dead %x4, implicit-def dead %x5, implicit-def dead %x6, implicit-def dead %x7, implicit-def dead %x8, implicit-def dead %x9, implicit-def dead %x10, implicit-def dead %x11, implicit-def dead %x12, implicit-def dead %lr8, implicit-def dead %ctr8, implicit-def dead %cr0, implicit-def dead %cr1, implicit-def dead %cr5, implicit-def dead %cr6, implicit-def dead %cr7
+ %2 = LWZ8 0, killed %1 :: (dereferenceable load 4 from @tls_var)
+ %x3 = COPY %2
+ ADJCALLSTACKUP 32, 0, implicit-def %r1, implicit %r1
+ BLR8 implicit %lr8, implicit %rm, implicit %x3
+ ; CHECK-LABEL: bb.0.entry
+ ; CHECK-NOT: ADJCALLSTACKDOWN 0, 0
+ ; CHECK-NOT: ADJCALLSTACKUP 0, 0
+ ; CHECK: BLR8
+...
diff --git a/test/CodeGen/Thumb/long-setcc.ll b/test/CodeGen/Thumb/long-setcc.ll
index 3460edb96f0d..7db06d0ae35e 100644
--- a/test/CodeGen/Thumb/long-setcc.ll
+++ b/test/CodeGen/Thumb/long-setcc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi < %s | FileCheck %s
define i1 @t1(i64 %x) {
%B = icmp slt i64 %x, 0
diff --git a/test/CodeGen/Thumb2/constant-islands-new-island.ll b/test/CodeGen/Thumb2/constant-islands-new-island.ll
index 8ed657ef1f2a..de7b0cce3792 100644
--- a/test/CodeGen/Thumb2/constant-islands-new-island.ll
+++ b/test/CodeGen/Thumb2/constant-islands-new-island.ll
@@ -1,25 +1,25 @@
; RUN: llc < %s -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s
-; Check that new water is created by splitting the basic block right after the
+; Check that new water is created by splitting the basic block after the
; load instruction. Previously, new water was created before the load
; instruction, which caused the pass to fail to converge.
define void @test(i1 %tst) {
; CHECK-LABEL: test:
; CHECK: vldr {{s[0-9]+}}, [[CONST:\.LCPI[0-9]+_[0-9]+]]
-; CHECK-NEXT: b.w [[CONTINUE:\.LBB[0-9]+_[0-9]+]]
+; CHECK: b.w [[CONTINUE:\.LBB[0-9]+_[0-9]+]]
; CHECK: [[CONST]]:
; CHECK-NEXT: .long
; CHECK: [[CONTINUE]]:
entry:
- call i32 @llvm.arm.space(i32 2000, i32 undef)
br i1 %tst, label %true, label %false
true:
%val = phi float [12345.0, %entry], [undef, %false]
+ call i32 @llvm.arm.space(i32 2000, i32 undef)
call void @bar(float %val)
ret void
diff --git a/test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll b/test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll
new file mode 100644
index 000000000000..9fcc0f5d617b
--- /dev/null
+++ b/test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll
@@ -0,0 +1,154 @@
+; RUN: llc < %s -mtriple=thumbv7m -mcpu=cortex-m7 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BP
+; RUN: llc < %s -mtriple=thumbv7m -mcpu=cortex-m3 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOBP
+
+declare void @otherfn()
+
+; CHECK-LABEL: triangle1:
+; CHECK: itt ne
+; CHECK: movne
+; CHECK: strne
+define i32 @triangle1(i32 %n, i32* %p) {
+entry:
+ %tobool = icmp eq i32 %n, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+ store i32 1, i32* %p, align 4
+ br label %if.end
+
+if.end:
+ tail call void @otherfn()
+ ret i32 0
+}
+
+; CHECK-LABEL: triangle2:
+; CHECK-BP: itttt ne
+; CHECK-BP: movne
+; CHECK-BP: strne
+; CHECK-BP: movne
+; CHECK-BP: strne
+; CHECK-NOBP: cbz
+; CHECK-NOBP: movs
+; CHECK-NOBP: str
+; CHECK-NOBP: movs
+; CHECK-NOBP: str
+define i32 @triangle2(i32 %n, i32* %p, i32* %q) {
+entry:
+ %tobool = icmp eq i32 %n, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+ store i32 1, i32* %p, align 4
+ store i32 2, i32* %q, align 4
+ br label %if.end
+
+if.end:
+ tail call void @otherfn()
+ ret i32 0
+}
+
+; CHECK-LABEL: triangle3:
+; CHECK: cbz
+; CHECK: movs
+; CHECK: str
+; CHECK: movs
+; CHECK: str
+; CHECK: movs
+; CHECK: str
+define i32 @triangle3(i32 %n, i32* %p, i32* %q, i32* %r) {
+entry:
+ %tobool = icmp eq i32 %n, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+ store i32 1, i32* %p, align 4
+ store i32 2, i32* %q, align 4
+ store i32 3, i32* %r, align 4
+ br label %if.end
+
+if.end:
+ tail call void @otherfn()
+ ret i32 0
+}
+
+; CHECK-LABEL: diamond1:
+; CHECK: ite eq
+; CHECK: ldreq
+; CHECK: strne
+define i32 @diamond1(i32 %n, i32* %p) {
+entry:
+ %tobool = icmp eq i32 %n, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+ store i32 %n, i32* %p, align 4
+ br label %if.end
+
+if.else:
+ %0 = load i32, i32* %p, align 4
+ br label %if.end
+
+if.end:
+ %n.addr.0 = phi i32 [ %n, %if.then ], [ %0, %if.else ]
+ tail call void @otherfn()
+ ret i32 %n.addr.0
+}
+
+; CHECK-LABEL: diamond2:
+; CHECK-BP: itte
+; CHECK-BP: streq
+; CHECK-BP: ldreq
+; CHECK-BP: strne
+; CHECK-NOBP: cbz
+; CHECK-NOBP: str
+; CHECK-NOBP: b
+; CHECK-NOBP: str
+; CHECK-NOBP: ldr
+define i32 @diamond2(i32 %n, i32 %m, i32* %p, i32* %q) {
+entry:
+ %tobool = icmp eq i32 %n, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+ store i32 %n, i32* %p, align 4
+ br label %if.end
+
+if.else:
+ store i32 %m, i32* %q, align 4
+ %0 = load i32, i32* %p, align 4
+ br label %if.end
+
+if.end:
+ %n.addr.0 = phi i32 [ %n, %if.then ], [ %0, %if.else ]
+ tail call void @otherfn()
+ ret i32 %n.addr.0
+}
+
+; CHECK-LABEL: diamond3:
+; CHECK: cbz
+; CHECK: movs
+; CHECK: str
+; CHECK: b
+; CHECK: ldr
+; CHECK: ldr
+; CHECK: adds
+define i32 @diamond3(i32 %n, i32* %p, i32* %q) {
+entry:
+ %tobool = icmp eq i32 %n, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+ store i32 1, i32* %p, align 4
+ br label %if.end
+
+if.else:
+ %0 = load i32, i32* %p, align 4
+ %1 = load i32, i32* %q, align 4
+ %add = add nsw i32 %1, %0
+ br label %if.end
+
+if.end:
+ %n.addr.0 = phi i32 [ %n, %if.then ], [ %add, %if.else ]
+ tail call void @otherfn()
+ ret i32 %n.addr.0
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 4a76e100b658..3c74dde11148 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -35,9 +35,6 @@ entry:
; CHECK: cmp
; CHECK: it eq
; CHECK: cmpeq
-; CHECK: itt eq
-; CHECK: moveq
-; CHECK: popeq
br label %tailrecurse
tailrecurse: ; preds = %bb, %entry
diff --git a/test/CodeGen/WebAssembly/exception.ll b/test/CodeGen/WebAssembly/exception.ll
new file mode 100644
index 000000000000..eedb5c78b241
--- /dev/null
+++ b/test/CodeGen/WebAssembly/exception.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown-wasm"
+
+declare void @llvm.wasm.throw(i32, i8*)
+declare void @llvm.wasm.rethrow()
+
+; CHECK-LABEL: throw:
+; CHECK-NEXT: i32.const $push0=, 0
+; CHECK-NEXT: throw 0, $pop0
+define void @throw() {
+ call void @llvm.wasm.throw(i32 0, i8* null)
+ ret void
+}
+
+; CHECK-LABEL: rethrow:
+; CHECK-NEXT: rethrow 0
+define void @rethrow() {
+ call void @llvm.wasm.rethrow()
+ ret void
+}
diff --git a/test/CodeGen/X86/GlobalISel/and-scalar.ll b/test/CodeGen/X86/GlobalISel/and-scalar.ll
new file mode 100644
index 000000000000..b19321421087
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/and-scalar.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL
+
+define i8 @test_and_i8(i8 %arg1, i8 %arg2) {
+; ALL-LABEL: test_and_i8:
+; ALL: # BB#0:
+; ALL-NEXT: andb %dil, %sil
+; ALL-NEXT: movl %esi, %eax
+; ALL-NEXT: retq
+ %ret = and i8 %arg1, %arg2
+ ret i8 %ret
+}
+
+define i16 @test_and_i16(i16 %arg1, i16 %arg2) {
+; ALL-LABEL: test_and_i16:
+; ALL: # BB#0:
+; ALL-NEXT: andw %di, %si
+; ALL-NEXT: movl %esi, %eax
+; ALL-NEXT: retq
+ %ret = and i16 %arg1, %arg2
+ ret i16 %ret
+}
+
+define i32 @test_and_i32(i32 %arg1, i32 %arg2) {
+; ALL-LABEL: test_and_i32:
+; ALL: # BB#0:
+; ALL-NEXT: andl %edi, %esi
+; ALL-NEXT: movl %esi, %eax
+; ALL-NEXT: retq
+ %ret = and i32 %arg1, %arg2
+ ret i32 %ret
+}
+
+define i64 @test_and_i64(i64 %arg1, i64 %arg2) {
+; ALL-LABEL: test_and_i64:
+; ALL: # BB#0:
+; ALL-NEXT: andq %rdi, %rsi
+; ALL-NEXT: movq %rsi, %rax
+; ALL-NEXT: retq
+ %ret = and i64 %arg1, %arg2
+ ret i64 %ret
+}
+
diff --git a/test/CodeGen/X86/GlobalISel/fadd-scalar.ll b/test/CodeGen/X86/GlobalISel/fadd-scalar.ll
new file mode 100644
index 000000000000..6aee06a75f6a
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/fadd-scalar.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+define float @test_fadd_float(float %arg1, float %arg2) {
+; ALL-LABEL: test_fadd_float:
+; ALL: # BB#0:
+; ALL-NEXT: addss %xmm1, %xmm0
+; ALL-NEXT: retq
+ %ret = fadd float %arg1, %arg2
+ ret float %ret
+}
+
+define double @test_fadd_double(double %arg1, double %arg2) {
+; ALL-LABEL: test_fadd_double:
+; ALL: # BB#0:
+; ALL-NEXT: addsd %xmm1, %xmm0
+; ALL-NEXT: retq
+ %ret = fadd double %arg1, %arg2
+ ret double %ret
+}
+
diff --git a/test/CodeGen/X86/GlobalISel/fdiv-scalar.ll b/test/CodeGen/X86/GlobalISel/fdiv-scalar.ll
new file mode 100644
index 000000000000..268802dc06aa
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/fdiv-scalar.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+define float @test_fdiv_float(float %arg1, float %arg2) {
+; ALL-LABEL: test_fdiv_float:
+; ALL: # BB#0:
+; ALL-NEXT: divss %xmm1, %xmm0
+; ALL-NEXT: retq
+ %ret = fdiv float %arg1, %arg2
+ ret float %ret
+}
+
+define double @test_fdiv_double(double %arg1, double %arg2) {
+; ALL-LABEL: test_fdiv_double:
+; ALL: # BB#0:
+; ALL-NEXT: divsd %xmm1, %xmm0
+; ALL-NEXT: retq
+ %ret = fdiv double %arg1, %arg2
+ ret double %ret
+}
+
diff --git a/test/CodeGen/X86/GlobalISel/fmul-scalar.ll b/test/CodeGen/X86/GlobalISel/fmul-scalar.ll
new file mode 100644
index 000000000000..c7a37a14c33c
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/fmul-scalar.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+define float @test_fmul_float(float %arg1, float %arg2) {
+; ALL-LABEL: test_fmul_float:
+; ALL: # BB#0:
+; ALL-NEXT: mulss %xmm1, %xmm0
+; ALL-NEXT: retq
+ %ret = fmul float %arg1, %arg2
+ ret float %ret
+}
+
+define double @test_fmul_double(double %arg1, double %arg2) {
+; ALL-LABEL: test_fmul_double:
+; ALL: # BB#0:
+; ALL-NEXT: mulsd %xmm1, %xmm0
+; ALL-NEXT: retq
+ %ret = fmul double %arg1, %arg2
+ ret double %ret
+}
+
diff --git a/test/CodeGen/X86/GlobalISel/fsub-scalar.ll b/test/CodeGen/X86/GlobalISel/fsub-scalar.ll
new file mode 100644
index 000000000000..32c25a3a0822
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/fsub-scalar.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+define float @test_fsub_float(float %arg1, float %arg2) {
+; ALL-LABEL: test_fsub_float:
+; ALL: # BB#0:
+; ALL-NEXT: subss %xmm1, %xmm0
+; ALL-NEXT: retq
+ %ret = fsub float %arg1, %arg2
+ ret float %ret
+}
+
+define double @test_fsub_double(double %arg1, double %arg2) {
+; ALL-LABEL: test_fsub_double:
+; ALL: # BB#0:
+; ALL-NEXT: subsd %xmm1, %xmm0
+; ALL-NEXT: retq
+ %ret = fsub double %arg1, %arg2
+ ret double %ret
+}
+
diff --git a/test/CodeGen/X86/GlobalISel/legalize-and-scalar.mir b/test/CodeGen/X86/GlobalISel/legalize-and-scalar.mir
new file mode 100644
index 000000000000..b57db15d4646
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/legalize-and-scalar.mir
@@ -0,0 +1,124 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
+
+--- |
+ define i8 @test_and_i8() {
+ %ret = and i8 undef, undef
+ ret i8 %ret
+ }
+
+ define i16 @test_and_i16() {
+ %ret = and i16 undef, undef
+ ret i16 %ret
+ }
+
+ define i32 @test_and_i32() {
+ %ret = and i32 undef, undef
+ ret i32 %ret
+ }
+
+ define i64 @test_and_i64() {
+ %ret = and i64 undef, undef
+ ret i64 %ret
+ }
+
+...
+---
+name: test_and_i8
+# CHECK-LABEL: name: test_and_i8
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s8) = IMPLICIT_DEF
+# CHECK-NEXT: %1(s8) = G_AND %0, %0
+# CHECK-NEXT: %al = COPY %1(s8)
+# CHECK-NEXT: RET 0, implicit %al
+body: |
+ bb.1 (%ir-block.0):
+ %0(s8) = IMPLICIT_DEF
+ %1(s8) = G_AND %0, %0
+ %al = COPY %1(s8)
+ RET 0, implicit %al
+
+...
+---
+name: test_and_i16
+# CHECK-LABEL: name: test_and_i16
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s16) = IMPLICIT_DEF
+# CHECK-NEXT: %1(s16) = G_AND %0, %0
+# CHECK-NEXT: %ax = COPY %1(s16)
+# CHECK-NEXT: RET 0, implicit %ax
+body: |
+ bb.1 (%ir-block.0):
+ %0(s16) = IMPLICIT_DEF
+ %1(s16) = G_AND %0, %0
+ %ax = COPY %1(s16)
+ RET 0, implicit %ax
+
+...
+---
+name: test_and_i32
+# CHECK-LABEL: name: test_and_i32
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s32) = IMPLICIT_DEF
+# CHECK-NEXT: %1(s32) = G_AND %0, %0
+# CHECK-NEXT: %eax = COPY %1(s32)
+# CHECK-NEXT: RET 0, implicit %eax
+body: |
+ bb.1 (%ir-block.0):
+ %0(s32) = IMPLICIT_DEF
+ %1(s32) = G_AND %0, %0
+ %eax = COPY %1(s32)
+ RET 0, implicit %eax
+
+...
+---
+name: test_and_i64
+# CHECK-LABEL: name: test_and_i64
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s64) = IMPLICIT_DEF
+# CHECK-NEXT: %1(s64) = G_AND %0, %0
+# CHECK-NEXT: %rax = COPY %1(s64)
+# CHECK-NEXT: RET 0, implicit %rax
+body: |
+ bb.1 (%ir-block.0):
+ %0(s64) = IMPLICIT_DEF
+ %1(s64) = G_AND %0, %0
+ %rax = COPY %1(s64)
+ RET 0, implicit %rax
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/legalize-fadd-scalar.mir b/test/CodeGen/X86/GlobalISel/legalize-fadd-scalar.mir
new file mode 100644
index 000000000000..353a26ca2c8a
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/legalize-fadd-scalar.mir
@@ -0,0 +1,74 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
+--- |
+
+ define float @test_fadd_float(float %arg1, float %arg2) {
+ %ret = fadd float %arg1, %arg2
+ ret float %ret
+ }
+
+ define double @test_fadd_double(double %arg1, double %arg2) {
+ %ret = fadd double %arg1, %arg2
+ ret double %ret
+ }
+
+...
+---
+name: test_fadd_float
+# CHECK-LABEL: name: test_fadd_float
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+ - { id: 2, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s32) = COPY %xmm0
+# CHECK-NEXT: %1(s32) = COPY %xmm1
+# CHECK-NEXT: %2(s32) = G_FADD %0, %1
+# CHECK-NEXT: %xmm0 = COPY %2(s32)
+# CHECK-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s32) = COPY %xmm0
+ %1(s32) = COPY %xmm1
+ %2(s32) = G_FADD %0, %1
+ %xmm0 = COPY %2(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_fadd_double
+# CHECK-LABEL: name: test_fadd_double
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+ - { id: 2, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s64) = COPY %xmm0
+# CHECK-NEXT: %1(s64) = COPY %xmm1
+# CHECK-NEXT: %2(s64) = G_FADD %0, %1
+# CHECK-NEXT: %xmm0 = COPY %2(s64)
+# CHECK-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s64) = COPY %xmm0
+ %1(s64) = COPY %xmm1
+ %2(s64) = G_FADD %0, %1
+ %xmm0 = COPY %2(s64)
+ RET 0, implicit %xmm0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/legalize-fdiv-scalar.mir b/test/CodeGen/X86/GlobalISel/legalize-fdiv-scalar.mir
new file mode 100644
index 000000000000..102d95c6390c
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/legalize-fdiv-scalar.mir
@@ -0,0 +1,74 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
+--- |
+
+ define float @test_fdiv_float(float %arg1, float %arg2) {
+ %ret = fdiv float %arg1, %arg2
+ ret float %ret
+ }
+
+ define double @test_fdiv_double(double %arg1, double %arg2) {
+ %ret = fdiv double %arg1, %arg2
+ ret double %ret
+ }
+
+...
+---
+name: test_fdiv_float
+# CHECK-LABEL: name: test_fdiv_float
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+ - { id: 2, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s32) = COPY %xmm0
+# CHECK-NEXT: %1(s32) = COPY %xmm1
+# CHECK-NEXT: %2(s32) = G_FDIV %0, %1
+# CHECK-NEXT: %xmm0 = COPY %2(s32)
+# CHECK-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s32) = COPY %xmm0
+ %1(s32) = COPY %xmm1
+ %2(s32) = G_FDIV %0, %1
+ %xmm0 = COPY %2(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_fdiv_double
+# CHECK-LABEL: name: test_fdiv_double
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+ - { id: 2, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s64) = COPY %xmm0
+# CHECK-NEXT: %1(s64) = COPY %xmm1
+# CHECK-NEXT: %2(s64) = G_FDIV %0, %1
+# CHECK-NEXT: %xmm0 = COPY %2(s64)
+# CHECK-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s64) = COPY %xmm0
+ %1(s64) = COPY %xmm1
+ %2(s64) = G_FDIV %0, %1
+ %xmm0 = COPY %2(s64)
+ RET 0, implicit %xmm0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/legalize-fmul-scalar.mir b/test/CodeGen/X86/GlobalISel/legalize-fmul-scalar.mir
new file mode 100644
index 000000000000..eeacbfcf07b2
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/legalize-fmul-scalar.mir
@@ -0,0 +1,74 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
+--- |
+
+ define float @test_fmul_float(float %arg1, float %arg2) {
+ %ret = fmul float %arg1, %arg2
+ ret float %ret
+ }
+
+ define double @test_fmul_double(double %arg1, double %arg2) {
+ %ret = fmul double %arg1, %arg2
+ ret double %ret
+ }
+
+...
+---
+name: test_fmul_float
+# CHECK-LABEL: name: test_fmul_float
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+ - { id: 2, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s32) = COPY %xmm0
+# CHECK-NEXT: %1(s32) = COPY %xmm1
+# CHECK-NEXT: %2(s32) = G_FMUL %0, %1
+# CHECK-NEXT: %xmm0 = COPY %2(s32)
+# CHECK-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s32) = COPY %xmm0
+ %1(s32) = COPY %xmm1
+ %2(s32) = G_FMUL %0, %1
+ %xmm0 = COPY %2(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_fmul_double
+# CHECK-LABEL: name: test_fmul_double
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+ - { id: 2, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s64) = COPY %xmm0
+# CHECK-NEXT: %1(s64) = COPY %xmm1
+# CHECK-NEXT: %2(s64) = G_FMUL %0, %1
+# CHECK-NEXT: %xmm0 = COPY %2(s64)
+# CHECK-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s64) = COPY %xmm0
+ %1(s64) = COPY %xmm1
+ %2(s64) = G_FMUL %0, %1
+ %xmm0 = COPY %2(s64)
+ RET 0, implicit %xmm0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/legalize-fsub-scalar.mir b/test/CodeGen/X86/GlobalISel/legalize-fsub-scalar.mir
new file mode 100644
index 000000000000..3b3ee4aa0afb
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/legalize-fsub-scalar.mir
@@ -0,0 +1,74 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
+--- |
+
+ define float @test_fsub_float(float %arg1, float %arg2) {
+ %ret = fsub float %arg1, %arg2
+ ret float %ret
+ }
+
+ define double @test_fsub_double(double %arg1, double %arg2) {
+ %ret = fsub double %arg1, %arg2
+ ret double %ret
+ }
+
+...
+---
+name: test_fsub_float
+# CHECK-LABEL: name: test_fsub_float
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+ - { id: 2, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s32) = COPY %xmm0
+# CHECK-NEXT: %1(s32) = COPY %xmm1
+# CHECK-NEXT: %2(s32) = G_FSUB %0, %1
+# CHECK-NEXT: %xmm0 = COPY %2(s32)
+# CHECK-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s32) = COPY %xmm0
+ %1(s32) = COPY %xmm1
+ %2(s32) = G_FSUB %0, %1
+ %xmm0 = COPY %2(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_fsub_double
+# CHECK-LABEL: name: test_fsub_double
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+ - { id: 2, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s64) = COPY %xmm0
+# CHECK-NEXT: %1(s64) = COPY %xmm1
+# CHECK-NEXT: %2(s64) = G_FSUB %0, %1
+# CHECK-NEXT: %xmm0 = COPY %2(s64)
+# CHECK-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s64) = COPY %xmm0
+ %1(s64) = COPY %xmm1
+ %2(s64) = G_FSUB %0, %1
+ %xmm0 = COPY %2(s64)
+ RET 0, implicit %xmm0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/legalize-or-scalar.mir b/test/CodeGen/X86/GlobalISel/legalize-or-scalar.mir
new file mode 100644
index 000000000000..a014f56a3588
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/legalize-or-scalar.mir
@@ -0,0 +1,124 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
+
+--- |
+ define i8 @test_or_i8() {
+ %ret = or i8 undef, undef
+ ret i8 %ret
+ }
+
+ define i16 @test_or_i16() {
+ %ret = or i16 undef, undef
+ ret i16 %ret
+ }
+
+ define i32 @test_or_i32() {
+ %ret = or i32 undef, undef
+ ret i32 %ret
+ }
+
+ define i64 @test_or_i64() {
+ %ret = or i64 undef, undef
+ ret i64 %ret
+ }
+
+...
+---
+name: test_or_i8
+# CHECK-LABEL: name: test_or_i8
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s8) = IMPLICIT_DEF
+# CHECK-NEXT: %1(s8) = G_OR %0, %0
+# CHECK-NEXT: %al = COPY %1(s8)
+# CHECK-NEXT: RET 0, implicit %al
+body: |
+ bb.1 (%ir-block.0):
+ %0(s8) = IMPLICIT_DEF
+ %1(s8) = G_OR %0, %0
+ %al = COPY %1(s8)
+ RET 0, implicit %al
+
+...
+---
+name: test_or_i16
+# CHECK-LABEL: name: test_or_i16
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s16) = IMPLICIT_DEF
+# CHECK-NEXT: %1(s16) = G_OR %0, %0
+# CHECK-NEXT: %ax = COPY %1(s16)
+# CHECK-NEXT: RET 0, implicit %ax
+body: |
+ bb.1 (%ir-block.0):
+ %0(s16) = IMPLICIT_DEF
+ %1(s16) = G_OR %0, %0
+ %ax = COPY %1(s16)
+ RET 0, implicit %ax
+
+...
+---
+name: test_or_i32
+# CHECK-LABEL: name: test_or_i32
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s32) = IMPLICIT_DEF
+# CHECK-NEXT: %1(s32) = G_OR %0, %0
+# CHECK-NEXT: %eax = COPY %1(s32)
+# CHECK-NEXT: RET 0, implicit %eax
+body: |
+ bb.1 (%ir-block.0):
+ %0(s32) = IMPLICIT_DEF
+ %1(s32) = G_OR %0, %0
+ %eax = COPY %1(s32)
+ RET 0, implicit %eax
+
+...
+---
+name: test_or_i64
+# CHECK-LABEL: name: test_or_i64
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s64) = IMPLICIT_DEF
+# CHECK-NEXT: %1(s64) = G_OR %0, %0
+# CHECK-NEXT: %rax = COPY %1(s64)
+# CHECK-NEXT: RET 0, implicit %rax
+body: |
+ bb.1 (%ir-block.0):
+ %0(s64) = IMPLICIT_DEF
+ %1(s64) = G_OR %0, %0
+ %rax = COPY %1(s64)
+ RET 0, implicit %rax
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/legalize-xor-scalar.mir b/test/CodeGen/X86/GlobalISel/legalize-xor-scalar.mir
new file mode 100644
index 000000000000..e2af91283026
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/legalize-xor-scalar.mir
@@ -0,0 +1,124 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
+
+--- |
+ define i8 @test_xor_i8() {
+ %ret = xor i8 undef, undef
+ ret i8 %ret
+ }
+
+ define i16 @test_xor_i16() {
+ %ret = xor i16 undef, undef
+ ret i16 %ret
+ }
+
+ define i32 @test_xor_i32() {
+ %ret = xor i32 undef, undef
+ ret i32 %ret
+ }
+
+ define i64 @test_xor_i64() {
+ %ret = xor i64 undef, undef
+ ret i64 %ret
+ }
+
+...
+---
+name: test_xor_i8
+# CHECK-LABEL: name: test_xor_i8
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s8) = IMPLICIT_DEF
+# CHECK-NEXT: %1(s8) = G_XOR %0, %0
+# CHECK-NEXT: %al = COPY %1(s8)
+# CHECK-NEXT: RET 0, implicit %al
+body: |
+ bb.1 (%ir-block.0):
+ %0(s8) = IMPLICIT_DEF
+ %1(s8) = G_XOR %0, %0
+ %al = COPY %1(s8)
+ RET 0, implicit %al
+
+...
+---
+name: test_xor_i16
+# CHECK-LABEL: name: test_xor_i16
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s16) = IMPLICIT_DEF
+# CHECK-NEXT: %1(s16) = G_XOR %0, %0
+# CHECK-NEXT: %ax = COPY %1(s16)
+# CHECK-NEXT: RET 0, implicit %ax
+body: |
+ bb.1 (%ir-block.0):
+ %0(s16) = IMPLICIT_DEF
+ %1(s16) = G_XOR %0, %0
+ %ax = COPY %1(s16)
+ RET 0, implicit %ax
+
+...
+---
+name: test_xor_i32
+# CHECK-LABEL: name: test_xor_i32
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s32) = IMPLICIT_DEF
+# CHECK-NEXT: %1(s32) = G_XOR %0, %0
+# CHECK-NEXT: %eax = COPY %1(s32)
+# CHECK-NEXT: RET 0, implicit %eax
+body: |
+ bb.1 (%ir-block.0):
+ %0(s32) = IMPLICIT_DEF
+ %1(s32) = G_XOR %0, %0
+ %eax = COPY %1(s32)
+ RET 0, implicit %eax
+
+...
+---
+name: test_xor_i64
+# CHECK-LABEL: name: test_xor_i64
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# CHECK: %0(s64) = IMPLICIT_DEF
+# CHECK-NEXT: %1(s64) = G_XOR %0, %0
+# CHECK-NEXT: %rax = COPY %1(s64)
+# CHECK-NEXT: RET 0, implicit %rax
+body: |
+ bb.1 (%ir-block.0):
+ %0(s64) = IMPLICIT_DEF
+ %1(s64) = G_XOR %0, %0
+ %rax = COPY %1(s64)
+ RET 0, implicit %rax
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/or-scalar.ll b/test/CodeGen/X86/GlobalISel/or-scalar.ll
new file mode 100644
index 000000000000..b0371457f76e
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/or-scalar.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL
+
+define i8 @test_or_i8(i8 %arg1, i8 %arg2) {
+; ALL-LABEL: test_or_i8:
+; ALL: # BB#0:
+; ALL-NEXT: orb %dil, %sil
+; ALL-NEXT: movl %esi, %eax
+; ALL-NEXT: retq
+ %ret = or i8 %arg1, %arg2
+ ret i8 %ret
+}
+
+define i16 @test_or_i16(i16 %arg1, i16 %arg2) {
+; ALL-LABEL: test_or_i16:
+; ALL: # BB#0:
+; ALL-NEXT: orw %di, %si
+; ALL-NEXT: movl %esi, %eax
+; ALL-NEXT: retq
+ %ret = or i16 %arg1, %arg2
+ ret i16 %ret
+}
+
+define i32 @test_or_i32(i32 %arg1, i32 %arg2) {
+; ALL-LABEL: test_or_i32:
+; ALL: # BB#0:
+; ALL-NEXT: orl %edi, %esi
+; ALL-NEXT: movl %esi, %eax
+; ALL-NEXT: retq
+ %ret = or i32 %arg1, %arg2
+ ret i32 %ret
+}
+
+define i64 @test_or_i64(i64 %arg1, i64 %arg2) {
+; ALL-LABEL: test_or_i64:
+; ALL: # BB#0:
+; ALL-NEXT: orq %rdi, %rsi
+; ALL-NEXT: movq %rsi, %rax
+; ALL-NEXT: retq
+ %ret = or i64 %arg1, %arg2
+ ret i64 %ret
+}
+
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
index 7bcc57aef4ac..3658bc9af957 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
@@ -35,6 +35,25 @@
%ret = fadd double %arg1, %arg2
ret double %ret
}
+
+ define void @test_fsub_float() {
+ %ret1 = fsub float undef, undef
+ %ret2 = fsub double undef, undef
+ ret void
+ }
+
+ define void @test_fmul_float() {
+ %ret1 = fmul float undef, undef
+ %ret2 = fmul double undef, undef
+ ret void
+ }
+
+ define void @test_fdiv_float() {
+ %ret1 = fdiv float undef, undef
+ %ret2 = fdiv double undef, undef
+ ret void
+ }
+
define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
%ret = add <4 x i32> %arg1, %arg2
@@ -135,6 +154,26 @@
ret i1 %r
}
+ define i8 @test_xor_i8() {
+ %ret = xor i8 undef, undef
+ ret i8 %ret
+ }
+
+ define i16 @test_or_i16() {
+ %ret = or i16 undef, undef
+ ret i16 %ret
+ }
+
+ define i32 @test_and_i32() {
+ %ret = and i32 undef, undef
+ ret i32 %ret
+ }
+
+ define i64 @test_and_i64() {
+ %ret = and i64 undef, undef
+ ret i64 %ret
+ }
+
...
---
name: test_add_i8
@@ -338,6 +377,105 @@ body: |
...
---
+name: test_fsub_float
+# CHECK-LABEL: name: test_fsub_float
+alignment: 4
+legalized: true
+regBankSelected: false
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 5, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 6, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 7, class: vecr, preferred-register: '' }
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+ - { id: 2, class: _, preferred-register: '' }
+ - { id: 3, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+body: |
+ bb.1 (%ir-block.0):
+ %0(s32) = IMPLICIT_DEF
+ %2(s64) = IMPLICIT_DEF
+ %1(s32) = G_FSUB %0, %0
+ %3(s64) = G_FSUB %2, %2
+ RET 0
+
+...
+---
+name: test_fmul_float
+# CHECK-LABEL: name: test_fmul_float
+alignment: 4
+legalized: true
+regBankSelected: false
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 5, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 6, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 7, class: vecr, preferred-register: '' }
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+ - { id: 2, class: _, preferred-register: '' }
+ - { id: 3, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+body: |
+ bb.1 (%ir-block.0):
+ %0(s32) = IMPLICIT_DEF
+ %2(s64) = IMPLICIT_DEF
+ %1(s32) = G_FMUL %0, %0
+ %3(s64) = G_FMUL %2, %2
+ RET 0
+
+...
+---
+name: test_fdiv_float
+# CHECK-LABEL: name: test_fdiv_float
+alignment: 4
+legalized: true
+regBankSelected: false
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 5, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 6, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 7, class: vecr, preferred-register: '' }
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+ - { id: 2, class: _, preferred-register: '' }
+ - { id: 3, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+body: |
+ bb.1 (%ir-block.0):
+ %0(s32) = IMPLICIT_DEF
+ %2(s64) = IMPLICIT_DEF
+ %1(s32) = G_FDIV %0, %0
+ %3(s64) = G_FDIV %2, %2
+ RET 0
+
+...
+---
name: test_add_v4i32
alignment: 4
legalized: true
@@ -850,3 +988,100 @@ body: |
RET 0, implicit %al
...
+---
+name: test_xor_i8
+# CHECK-LABEL: name: test_xor_i8
+alignment: 4
+legalized: true
+regBankSelected: false
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+body: |
+ bb.1 (%ir-block.0):
+ %0(s8) = IMPLICIT_DEF
+ %1(s8) = G_XOR %0, %0
+ %al = COPY %1(s8)
+ RET 0, implicit %al
+
+...
+---
+name: test_or_i16
+# CHECK-LABEL: name: test_or_i16
+alignment: 4
+legalized: true
+regBankSelected: false
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+body: |
+ bb.1 (%ir-block.0):
+ %0(s16) = IMPLICIT_DEF
+ %1(s16) = G_OR %0, %0
+ %ax = COPY %1(s16)
+ RET 0, implicit %ax
+
+...
+---
+name: test_and_i32
+# CHECK-LABEL: name: test_and_i32
+alignment: 4
+legalized: true
+regBankSelected: false
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+body: |
+ bb.1 (%ir-block.0):
+ %0(s32) = IMPLICIT_DEF
+ %1(s32) = G_AND %0, %0
+ %eax = COPY %1(s32)
+ RET 0, implicit %eax
+
+...
+---
+name: test_and_i64
+# CHECK-LABEL: name: test_and_i64
+alignment: 4
+legalized: true
+regBankSelected: false
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+registers:
+ - { id: 0, class: _, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+body: |
+ bb.1 (%ir-block.0):
+ %0(s64) = IMPLICIT_DEF
+ %1(s64) = G_AND %0, %0
+ %rax = COPY %1(s64)
+ RET 0, implicit %rax
+
+...
+
diff --git a/test/CodeGen/X86/GlobalISel/select-add.mir b/test/CodeGen/X86/GlobalISel/select-add.mir
index 78e6bb6913a4..45811c5cdc26 100644
--- a/test/CodeGen/X86/GlobalISel/select-add.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add.mir
@@ -24,16 +24,6 @@
ret i8 %ret
}
- define float @test_add_float(float %arg1, float %arg2) {
- %ret = fadd float %arg1, %arg2
- ret float %ret
- }
-
- define double @test_add_double(double %arg1, double %arg2) {
- %ret = fadd double %arg1, %arg2
- ret double %ret
- }
-
define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
%ret = add <4 x i32> %arg1, %arg2
ret <4 x i32> %ret
@@ -157,76 +147,6 @@ body: |
...
---
-name: test_add_float
-# ALL-LABEL: name: test_add_float
-alignment: 4
-legalized: true
-regBankSelected: true
-selected: false
-tracksRegLiveness: true
-# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' }
-# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' }
-# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' }
-# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' }
-# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' }
-# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' }
-registers:
- - { id: 0, class: vecr }
- - { id: 1, class: vecr }
- - { id: 2, class: vecr }
-# ALL: %0 = COPY %xmm0
-# ALL-NEXT: %1 = COPY %xmm1
-# SSE-NEXT: %2 = ADDSSrr %0, %1
-# AVX-NEXT: %2 = VADDSSrr %0, %1
-# AVX512F-NEXT: %2 = VADDSSZrr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %xmm0, %xmm1
-
- %0(s32) = COPY %xmm0
- %1(s32) = COPY %xmm1
- %2(s32) = G_FADD %0, %1
- %xmm0 = COPY %2(s32)
- RET 0, implicit %xmm0
-
-...
----
-name: test_add_double
-# ALL-LABEL: name: test_add_double
-alignment: 4
-legalized: true
-regBankSelected: true
-selected: false
-tracksRegLiveness: true
-# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' }
-# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' }
-# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' }
-# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' }
-# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' }
-# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' }
-registers:
- - { id: 0, class: vecr }
- - { id: 1, class: vecr }
- - { id: 2, class: vecr }
-# ALL: %0 = COPY %xmm0
-# ALL-NEXT: %1 = COPY %xmm1
-# SSE-NEXT: %2 = ADDSDrr %0, %1
-# AVX-NEXT: %2 = VADDSDrr %0, %1
-# AVX512F-NEXT: %2 = VADDSDZrr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %xmm0, %xmm1
-
- %0(s64) = COPY %xmm0
- %1(s64) = COPY %xmm1
- %2(s64) = G_FADD %0, %1
- %xmm0 = COPY %2(s64)
- RET 0, implicit %xmm0
-
-...
----
name: test_add_v4i32
# ALL-LABEL: name: test_add_v4i32
alignment: 4
diff --git a/test/CodeGen/X86/GlobalISel/select-and-scalar.mir b/test/CodeGen/X86/GlobalISel/select-and-scalar.mir
new file mode 100644
index 000000000000..c40cc224d50e
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-and-scalar.mir
@@ -0,0 +1,160 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL
+
+--- |
+ define i8 @test_and_i8(i8 %arg1, i8 %arg2) {
+ %ret = and i8 %arg1, %arg2
+ ret i8 %ret
+ }
+
+ define i16 @test_and_i16(i16 %arg1, i16 %arg2) {
+ %ret = and i16 %arg1, %arg2
+ ret i16 %ret
+ }
+
+ define i32 @test_and_i32(i32 %arg1, i32 %arg2) {
+ %ret = and i32 %arg1, %arg2
+ ret i32 %ret
+ }
+
+ define i64 @test_and_i64(i64 %arg1, i64 %arg2) {
+ %ret = and i64 %arg1, %arg2
+ ret i64 %ret
+ }
+
+...
+---
+name: test_and_i8
+# ALL-LABEL: name: test_and_i8
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr, preferred-register: '' }
+ - { id: 1, class: gpr, preferred-register: '' }
+ - { id: 2, class: gpr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# ALL: %0 = COPY %dil
+# ALL-NEXT: %1 = COPY %sil
+# ALL-NEXT: %2 = AND8rr %0, %1, implicit-def %eflags
+# ALL-NEXT: %al = COPY %2
+# ALL-NEXT: RET 0, implicit %al
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s8) = COPY %edi
+ %1(s8) = COPY %esi
+ %2(s8) = G_AND %0, %1
+ %al = COPY %2(s8)
+ RET 0, implicit %al
+
+...
+---
+name: test_and_i16
+# ALL-LABEL: name: test_and_i16
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr, preferred-register: '' }
+ - { id: 1, class: gpr, preferred-register: '' }
+ - { id: 2, class: gpr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# ALL: %0 = COPY %di
+# ALL-NEXT: %1 = COPY %si
+# ALL-NEXT: %2 = AND16rr %0, %1, implicit-def %eflags
+# ALL-NEXT: %ax = COPY %2
+# ALL-NEXT: RET 0, implicit %ax
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s16) = COPY %edi
+ %1(s16) = COPY %esi
+ %2(s16) = G_AND %0, %1
+ %ax = COPY %2(s16)
+ RET 0, implicit %ax
+
+...
+---
+name: test_and_i32
+# ALL-LABEL: name: test_and_i32
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr, preferred-register: '' }
+ - { id: 1, class: gpr, preferred-register: '' }
+ - { id: 2, class: gpr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# ALL: %0 = COPY %edi
+# ALL-NEXT: %1 = COPY %esi
+# ALL-NEXT: %2 = AND32rr %0, %1, implicit-def %eflags
+# ALL-NEXT: %eax = COPY %2
+# ALL-NEXT: RET 0, implicit %eax
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s32) = COPY %edi
+ %1(s32) = COPY %esi
+ %2(s32) = G_AND %0, %1
+ %eax = COPY %2(s32)
+ RET 0, implicit %eax
+
+...
+---
+name: test_and_i64
+# ALL-LABEL: name: test_and_i64
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr, preferred-register: '' }
+ - { id: 1, class: gpr, preferred-register: '' }
+ - { id: 2, class: gpr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# ALL: %0 = COPY %rdi
+# ALL-NEXT: %1 = COPY %rsi
+# ALL-NEXT: %2 = AND64rr %0, %1, implicit-def %eflags
+# ALL-NEXT: %rax = COPY %2
+# ALL-NEXT: RET 0, implicit %rax
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %rsi
+
+ %0(s64) = COPY %rdi
+ %1(s64) = COPY %rsi
+ %2(s64) = G_AND %0, %1
+ %rax = COPY %2(s64)
+ RET 0, implicit %rax
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/select-constant.mir b/test/CodeGen/X86/GlobalISel/select-constant.mir
index 7902a5084ce6..4b91b5f9f098 100644
--- a/test/CodeGen/X86/GlobalISel/select-constant.mir
+++ b/test/CodeGen/X86/GlobalISel/select-constant.mir
@@ -13,6 +13,10 @@
ret i32 4
}
+ define i32 @const_i32_0() {
+ ret i32 0
+ }
+
define i64 @const_i64() {
ret i64 68719476720
}
@@ -84,6 +88,23 @@ body: |
...
---
+name: const_i32_0
+# CHECK-LABEL: name: const_i32_0
+legalized: true
+regBankSelected: true
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr }
+# CHECK: %0 = MOV32r0 implicit-def %eflags
+body: |
+ bb.1 (%ir-block.0):
+ %0(s32) = G_CONSTANT i32 0
+ %eax = COPY %0(s32)
+ RET 0, implicit %eax
+
+...
+---
name: const_i64
legalized: true
regBankSelected: true
diff --git a/test/CodeGen/X86/GlobalISel/select-fadd-scalar.mir b/test/CodeGen/X86/GlobalISel/select-fadd-scalar.mir
new file mode 100644
index 000000000000..fa4c529982cc
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-fadd-scalar.mir
@@ -0,0 +1,119 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=SSE
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=AVX
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=AVX512ALL --check-prefix=AVX512F
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512ALL --check-prefix=AVX512VL
+--- |
+
+ define float @test_fadd_float(float %arg1, float %arg2) {
+ %ret = fadd float %arg1, %arg2
+ ret float %ret
+ }
+
+ define double @test_fadd_double(double %arg1, double %arg2) {
+ %ret = fadd double %arg1, %arg2
+ ret double %ret
+ }
+
+...
+---
+name: test_fadd_float
+# ALL-LABEL: name: test_fadd_float
+alignment: 4
+legalized: true
+regBankSelected: true
+# NO_AVX512F: registers:
+# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' }
+#
+# AVX512ALL: registers:
+# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' }
+registers:
+ - { id: 0, class: vecr, preferred-register: '' }
+ - { id: 1, class: vecr, preferred-register: '' }
+ - { id: 2, class: vecr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# SSE: %0 = COPY %xmm0
+# SSE-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = ADDSSrr %0, %1
+# SSE-NEXT: %xmm0 = COPY %2
+# SSE-NEXT: RET 0, implicit %xmm0
+#
+# AVX: %0 = COPY %xmm0
+# AVX-NEXT: %1 = COPY %xmm1
+# AVX-NEXT: %2 = VADDSSrr %0, %1
+# AVX-NEXT: %xmm0 = COPY %2
+# AVX-NEXT: RET 0, implicit %xmm0
+#
+# AVX512ALL: %0 = COPY %xmm0
+# AVX512ALL-NEXT: %1 = COPY %xmm1
+# AVX512ALL-NEXT: %2 = VADDSSZrr %0, %1
+# AVX512ALL-NEXT: %xmm0 = COPY %2
+# AVX512ALL-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s32) = COPY %xmm0
+ %1(s32) = COPY %xmm1
+ %2(s32) = G_FADD %0, %1
+ %xmm0 = COPY %2(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_fadd_double
+# ALL-LABEL: name: test_fadd_double
+alignment: 4
+legalized: true
+regBankSelected: true
+# NO_AVX512F: registers:
+# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' }
+#
+# AVX512ALL: registers:
+# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' }
+registers:
+ - { id: 0, class: vecr, preferred-register: '' }
+ - { id: 1, class: vecr, preferred-register: '' }
+ - { id: 2, class: vecr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# SSE: %0 = COPY %xmm0
+# SSE-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = ADDSDrr %0, %1
+# SSE-NEXT: %xmm0 = COPY %2
+# SSE-NEXT: RET 0, implicit %xmm0
+#
+# AVX: %0 = COPY %xmm0
+# AVX-NEXT: %1 = COPY %xmm1
+# AVX-NEXT: %2 = VADDSDrr %0, %1
+# AVX-NEXT: %xmm0 = COPY %2
+# AVX-NEXT: RET 0, implicit %xmm0
+#
+# AVX512ALL: %0 = COPY %xmm0
+# AVX512ALL-NEXT: %1 = COPY %xmm1
+# AVX512ALL-NEXT: %2 = VADDSDZrr %0, %1
+# AVX512ALL-NEXT: %xmm0 = COPY %2
+# AVX512ALL-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s64) = COPY %xmm0
+ %1(s64) = COPY %xmm1
+ %2(s64) = G_FADD %0, %1
+ %xmm0 = COPY %2(s64)
+ RET 0, implicit %xmm0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/select-fdiv-scalar.mir b/test/CodeGen/X86/GlobalISel/select-fdiv-scalar.mir
new file mode 100644
index 000000000000..d2c1d1528652
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-fdiv-scalar.mir
@@ -0,0 +1,119 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=SSE
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=AVX
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=AVX512ALL --check-prefix=AVX512F
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512ALL --check-prefix=AVX512VL
+--- |
+
+ define float @test_fdiv_float(float %arg1, float %arg2) {
+ %ret = fdiv float %arg1, %arg2
+ ret float %ret
+ }
+
+ define double @test_fdiv_double(double %arg1, double %arg2) {
+ %ret = fdiv double %arg1, %arg2
+ ret double %ret
+ }
+
+...
+---
+name: test_fdiv_float
+# ALL-LABEL: name: test_fdiv_float
+alignment: 4
+legalized: true
+regBankSelected: true
+# NO_AVX512F: registers:
+# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' }
+#
+# AVX512ALL: registers:
+# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' }
+registers:
+ - { id: 0, class: vecr, preferred-register: '' }
+ - { id: 1, class: vecr, preferred-register: '' }
+ - { id: 2, class: vecr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# SSE: %0 = COPY %xmm0
+# SSE-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = DIVSSrr %0, %1
+# SSE-NEXT: %xmm0 = COPY %2
+# SSE-NEXT: RET 0, implicit %xmm0
+#
+# AVX: %0 = COPY %xmm0
+# AVX-NEXT: %1 = COPY %xmm1
+# AVX-NEXT: %2 = VDIVSSrr %0, %1
+# AVX-NEXT: %xmm0 = COPY %2
+# AVX-NEXT: RET 0, implicit %xmm0
+#
+# AVX512ALL: %0 = COPY %xmm0
+# AVX512ALL-NEXT: %1 = COPY %xmm1
+# AVX512ALL-NEXT: %2 = VDIVSSZrr %0, %1
+# AVX512ALL-NEXT: %xmm0 = COPY %2
+# AVX512ALL-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s32) = COPY %xmm0
+ %1(s32) = COPY %xmm1
+ %2(s32) = G_FDIV %0, %1
+ %xmm0 = COPY %2(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_fdiv_double
+# ALL-LABEL: name: test_fdiv_double
+alignment: 4
+legalized: true
+regBankSelected: true
+# NO_AVX512F: registers:
+# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' }
+#
+# AVX512ALL: registers:
+# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' }
+registers:
+ - { id: 0, class: vecr, preferred-register: '' }
+ - { id: 1, class: vecr, preferred-register: '' }
+ - { id: 2, class: vecr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# SSE: %0 = COPY %xmm0
+# SSE-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = DIVSDrr %0, %1
+# SSE-NEXT: %xmm0 = COPY %2
+# SSE-NEXT: RET 0, implicit %xmm0
+#
+# AVX: %0 = COPY %xmm0
+# AVX-NEXT: %1 = COPY %xmm1
+# AVX-NEXT: %2 = VDIVSDrr %0, %1
+# AVX-NEXT: %xmm0 = COPY %2
+# AVX-NEXT: RET 0, implicit %xmm0
+#
+# AVX512ALL: %0 = COPY %xmm0
+# AVX512ALL-NEXT: %1 = COPY %xmm1
+# AVX512ALL-NEXT: %2 = VDIVSDZrr %0, %1
+# AVX512ALL-NEXT: %xmm0 = COPY %2
+# AVX512ALL-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s64) = COPY %xmm0
+ %1(s64) = COPY %xmm1
+ %2(s64) = G_FDIV %0, %1
+ %xmm0 = COPY %2(s64)
+ RET 0, implicit %xmm0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/select-fmul-scalar.mir b/test/CodeGen/X86/GlobalISel/select-fmul-scalar.mir
new file mode 100644
index 000000000000..98e5d303d7b1
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-fmul-scalar.mir
@@ -0,0 +1,119 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=SSE
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=AVX
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=AVX512ALL --check-prefix=AVX512F
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512ALL --check-prefix=AVX512VL
+--- |
+
+ define float @test_fmul_float(float %arg1, float %arg2) {
+ %ret = fmul float %arg1, %arg2
+ ret float %ret
+ }
+
+ define double @test_fmul_double(double %arg1, double %arg2) {
+ %ret = fmul double %arg1, %arg2
+ ret double %ret
+ }
+
+...
+---
+name: test_fmul_float
+# ALL-LABEL: name: test_fmul_float
+alignment: 4
+legalized: true
+regBankSelected: true
+# NO_AVX512F: registers:
+# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' }
+#
+# AVX512ALL: registers:
+# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' }
+registers:
+ - { id: 0, class: vecr, preferred-register: '' }
+ - { id: 1, class: vecr, preferred-register: '' }
+ - { id: 2, class: vecr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# SSE: %0 = COPY %xmm0
+# SSE-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = MULSSrr %0, %1
+# SSE-NEXT: %xmm0 = COPY %2
+# SSE-NEXT: RET 0, implicit %xmm0
+#
+# AVX: %0 = COPY %xmm0
+# AVX-NEXT: %1 = COPY %xmm1
+# AVX-NEXT: %2 = VMULSSrr %0, %1
+# AVX-NEXT: %xmm0 = COPY %2
+# AVX-NEXT: RET 0, implicit %xmm0
+#
+# AVX512ALL: %0 = COPY %xmm0
+# AVX512ALL-NEXT: %1 = COPY %xmm1
+# AVX512ALL-NEXT: %2 = VMULSSZrr %0, %1
+# AVX512ALL-NEXT: %xmm0 = COPY %2
+# AVX512ALL-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s32) = COPY %xmm0
+ %1(s32) = COPY %xmm1
+ %2(s32) = G_FMUL %0, %1
+ %xmm0 = COPY %2(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_fmul_double
+# ALL-LABEL: name: test_fmul_double
+alignment: 4
+legalized: true
+regBankSelected: true
+# NO_AVX512F: registers:
+# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' }
+#
+# AVX512ALL: registers:
+# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' }
+registers:
+ - { id: 0, class: vecr, preferred-register: '' }
+ - { id: 1, class: vecr, preferred-register: '' }
+ - { id: 2, class: vecr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# SSE: %0 = COPY %xmm0
+# SSE-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = MULSDrr %0, %1
+# SSE-NEXT: %xmm0 = COPY %2
+# SSE-NEXT: RET 0, implicit %xmm0
+#
+# AVX: %0 = COPY %xmm0
+# AVX-NEXT: %1 = COPY %xmm1
+# AVX-NEXT: %2 = VMULSDrr %0, %1
+# AVX-NEXT: %xmm0 = COPY %2
+# AVX-NEXT: RET 0, implicit %xmm0
+#
+# AVX512ALL: %0 = COPY %xmm0
+# AVX512ALL-NEXT: %1 = COPY %xmm1
+# AVX512ALL-NEXT: %2 = VMULSDZrr %0, %1
+# AVX512ALL-NEXT: %xmm0 = COPY %2
+# AVX512ALL-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s64) = COPY %xmm0
+ %1(s64) = COPY %xmm1
+ %2(s64) = G_FMUL %0, %1
+ %xmm0 = COPY %2(s64)
+ RET 0, implicit %xmm0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/select-fsub-scalar.mir b/test/CodeGen/X86/GlobalISel/select-fsub-scalar.mir
new file mode 100644
index 000000000000..9f58327d9bb6
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-fsub-scalar.mir
@@ -0,0 +1,119 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=SSE
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=AVX
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=AVX512ALL --check-prefix=AVX512F
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512ALL --check-prefix=AVX512VL
+--- |
+
+ define float @test_fsub_float(float %arg1, float %arg2) {
+ %ret = fsub float %arg1, %arg2
+ ret float %ret
+ }
+
+ define double @test_fsub_double(double %arg1, double %arg2) {
+ %ret = fsub double %arg1, %arg2
+ ret double %ret
+ }
+
+...
+---
+name: test_fsub_float
+# ALL-LABEL: name: test_fsub_float
+alignment: 4
+legalized: true
+regBankSelected: true
+# NO_AVX512F: registers:
+# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' }
+#
+# AVX512ALL: registers:
+# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' }
+registers:
+ - { id: 0, class: vecr, preferred-register: '' }
+ - { id: 1, class: vecr, preferred-register: '' }
+ - { id: 2, class: vecr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# SSE: %0 = COPY %xmm0
+# SSE-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = SUBSSrr %0, %1
+# SSE-NEXT: %xmm0 = COPY %2
+# SSE-NEXT: RET 0, implicit %xmm0
+#
+# AVX: %0 = COPY %xmm0
+# AVX-NEXT: %1 = COPY %xmm1
+# AVX-NEXT: %2 = VSUBSSrr %0, %1
+# AVX-NEXT: %xmm0 = COPY %2
+# AVX-NEXT: RET 0, implicit %xmm0
+#
+# AVX512ALL: %0 = COPY %xmm0
+# AVX512ALL-NEXT: %1 = COPY %xmm1
+# AVX512ALL-NEXT: %2 = VSUBSSZrr %0, %1
+# AVX512ALL-NEXT: %xmm0 = COPY %2
+# AVX512ALL-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s32) = COPY %xmm0
+ %1(s32) = COPY %xmm1
+ %2(s32) = G_FSUB %0, %1
+ %xmm0 = COPY %2(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_fsub_double
+# ALL-LABEL: name: test_fsub_double
+alignment: 4
+legalized: true
+regBankSelected: true
+# NO_AVX512F: registers:
+# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' }
+#
+# AVX512ALL: registers:
+# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' }
+registers:
+ - { id: 0, class: vecr, preferred-register: '' }
+ - { id: 1, class: vecr, preferred-register: '' }
+ - { id: 2, class: vecr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# SSE: %0 = COPY %xmm0
+# SSE-NEXT: %1 = COPY %xmm1
+# SSE-NEXT: %2 = SUBSDrr %0, %1
+# SSE-NEXT: %xmm0 = COPY %2
+# SSE-NEXT: RET 0, implicit %xmm0
+#
+# AVX: %0 = COPY %xmm0
+# AVX-NEXT: %1 = COPY %xmm1
+# AVX-NEXT: %2 = VSUBSDrr %0, %1
+# AVX-NEXT: %xmm0 = COPY %2
+# AVX-NEXT: RET 0, implicit %xmm0
+#
+# AVX512ALL: %0 = COPY %xmm0
+# AVX512ALL-NEXT: %1 = COPY %xmm1
+# AVX512ALL-NEXT: %2 = VSUBSDZrr %0, %1
+# AVX512ALL-NEXT: %xmm0 = COPY %2
+# AVX512ALL-NEXT: RET 0, implicit %xmm0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(s64) = COPY %xmm0
+ %1(s64) = COPY %xmm1
+ %2(s64) = G_FSUB %0, %1
+ %xmm0 = COPY %2(s64)
+ RET 0, implicit %xmm0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/select-merge-vec256.mir b/test/CodeGen/X86/GlobalISel/select-merge-vec256.mir
new file mode 100644
index 000000000000..8e31a904e360
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-merge-vec256.mir
@@ -0,0 +1,52 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=AVX
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=AVX512VL
+--- |
+ define void @test_merge() {
+ ret void
+ }
+...
+---
+name: test_merge
+# AVX-LABEL: name: test_merge
+#
+# AVX512VL-LABEL: name: test_merge
+alignment: 4
+legalized: true
+regBankSelected: true
+# AVX: registers:
+# AVX-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# AVX-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX-NEXT: - { id: 2, class: vr256, preferred-register: '' }
+# AVX-NEXT: - { id: 3, class: vr256, preferred-register: '' }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 3, class: vr256x, preferred-register: '' }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+# AVX: %0 = IMPLICIT_DEF
+# AVX-NEXT: undef %2.sub_xmm = COPY %0
+# AVX-NEXT: %3 = VINSERTF128rr %2, %0, 1
+# AVX-NEXT: %1 = COPY %3
+# AVX-NEXT: %ymm0 = COPY %1
+# AVX-NEXT: RET 0, implicit %ymm0
+#
+# AVX512VL: %0 = IMPLICIT_DEF
+# AVX512VL-NEXT: undef %2.sub_xmm = COPY %0
+# AVX512VL-NEXT: %3 = VINSERTF32x4Z256rr %2, %0, 1
+# AVX512VL-NEXT: %1 = COPY %3
+# AVX512VL-NEXT: %ymm0 = COPY %1
+# AVX512VL-NEXT: RET 0, implicit %ymm0
+body: |
+ bb.1 (%ir-block.0):
+
+ %0(<4 x s32>) = IMPLICIT_DEF
+ %1(<8 x s32>) = G_MERGE_VALUES %0(<4 x s32>), %0(<4 x s32>)
+ %ymm0 = COPY %1(<8 x s32>)
+ RET 0, implicit %ymm0
+
+...
+
diff --git a/test/CodeGen/X86/GlobalISel/select-merge-vec512.mir b/test/CodeGen/X86/GlobalISel/select-merge-vec512.mir
new file mode 100644
index 000000000000..a072d582e505
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-merge-vec512.mir
@@ -0,0 +1,74 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL
+--- |
+ define void @test_merge_v128() {
+ ret void
+ }
+
+ define void @test_merge_v256() {
+ ret void
+ }
+
+...
+---
+name: test_merge_v128
+# ALL-LABEL: name: test_merge_v128
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 3, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 4, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 5, class: vr512, preferred-register: '' }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+# ALL: %0 = IMPLICIT_DEF
+# ALL-NEXT: undef %2.sub_xmm = COPY %0
+# ALL-NEXT: %3 = VINSERTF32x4Zrr %2, %0, 1
+# ALL-NEXT: %4 = VINSERTF32x4Zrr %3, %0, 2
+# ALL-NEXT: %5 = VINSERTF32x4Zrr %4, %0, 3
+# ALL-NEXT: %1 = COPY %5
+# ALL-NEXT: %zmm0 = COPY %1
+# ALL-NEXT: RET 0, implicit %zmm0
+body: |
+ bb.1 (%ir-block.0):
+
+ %0(<4 x s32>) = IMPLICIT_DEF
+ %1(<16 x s32>) = G_MERGE_VALUES %0(<4 x s32>), %0(<4 x s32>), %0(<4 x s32>), %0(<4 x s32>)
+ %zmm0 = COPY %1(<16 x s32>)
+ RET 0, implicit %zmm0
+
+...
+---
+name: test_merge_v256
+# ALL-LABEL: name: test_merge_v256
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 3, class: vr512, preferred-register: '' }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+# ALL: %0 = IMPLICIT_DEF
+# ALL-NEXT: undef %2.sub_ymm = COPY %0
+# ALL-NEXT: %3 = VINSERTF64x4Zrr %2, %0, 1
+# ALL-NEXT: %1 = COPY %3
+# ALL-NEXT: %zmm0 = COPY %1
+# ALL-NEXT: RET 0, implicit %zmm0
+body: |
+ bb.1 (%ir-block.0):
+
+ %0(<8 x s32>) = IMPLICIT_DEF
+ %1(<16 x s32>) = G_MERGE_VALUES %0(<8 x s32>), %0(<8 x s32>)
+ %zmm0 = COPY %1(<16 x s32>)
+ RET 0, implicit %zmm0
+
+...
+
diff --git a/test/CodeGen/X86/GlobalISel/select-or-scalar.mir b/test/CodeGen/X86/GlobalISel/select-or-scalar.mir
new file mode 100644
index 000000000000..4f7e48207838
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-or-scalar.mir
@@ -0,0 +1,160 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL
+
+--- |
+ define i8 @test_or_i8(i8 %arg1, i8 %arg2) {
+ %ret = or i8 %arg1, %arg2
+ ret i8 %ret
+ }
+
+ define i16 @test_or_i16(i16 %arg1, i16 %arg2) {
+ %ret = or i16 %arg1, %arg2
+ ret i16 %ret
+ }
+
+ define i32 @test_or_i32(i32 %arg1, i32 %arg2) {
+ %ret = or i32 %arg1, %arg2
+ ret i32 %ret
+ }
+
+ define i64 @test_or_i64(i64 %arg1, i64 %arg2) {
+ %ret = or i64 %arg1, %arg2
+ ret i64 %ret
+ }
+
+...
+---
+name: test_or_i8
+# ALL-LABEL: name: test_or_i8
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr, preferred-register: '' }
+ - { id: 1, class: gpr, preferred-register: '' }
+ - { id: 2, class: gpr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# ALL: %0 = COPY %dil
+# ALL-NEXT: %1 = COPY %sil
+# ALL-NEXT: %2 = OR8rr %0, %1, implicit-def %eflags
+# ALL-NEXT: %al = COPY %2
+# ALL-NEXT: RET 0, implicit %al
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s8) = COPY %edi
+ %1(s8) = COPY %esi
+ %2(s8) = G_OR %0, %1
+ %al = COPY %2(s8)
+ RET 0, implicit %al
+
+...
+---
+name: test_or_i16
+# ALL-LABEL: name: test_or_i16
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr, preferred-register: '' }
+ - { id: 1, class: gpr, preferred-register: '' }
+ - { id: 2, class: gpr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# ALL: %0 = COPY %di
+# ALL-NEXT: %1 = COPY %si
+# ALL-NEXT: %2 = OR16rr %0, %1, implicit-def %eflags
+# ALL-NEXT: %ax = COPY %2
+# ALL-NEXT: RET 0, implicit %ax
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s16) = COPY %edi
+ %1(s16) = COPY %esi
+ %2(s16) = G_OR %0, %1
+ %ax = COPY %2(s16)
+ RET 0, implicit %ax
+
+...
+---
+name: test_or_i32
+# ALL-LABEL: name: test_or_i32
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr, preferred-register: '' }
+ - { id: 1, class: gpr, preferred-register: '' }
+ - { id: 2, class: gpr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# ALL: %0 = COPY %edi
+# ALL-NEXT: %1 = COPY %esi
+# ALL-NEXT: %2 = OR32rr %0, %1, implicit-def %eflags
+# ALL-NEXT: %eax = COPY %2
+# ALL-NEXT: RET 0, implicit %eax
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s32) = COPY %edi
+ %1(s32) = COPY %esi
+ %2(s32) = G_OR %0, %1
+ %eax = COPY %2(s32)
+ RET 0, implicit %eax
+
+...
+---
+name: test_or_i64
+# ALL-LABEL: name: test_or_i64
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr, preferred-register: '' }
+ - { id: 1, class: gpr, preferred-register: '' }
+ - { id: 2, class: gpr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# ALL: %0 = COPY %rdi
+# ALL-NEXT: %1 = COPY %rsi
+# ALL-NEXT: %2 = OR64rr %0, %1, implicit-def %eflags
+# ALL-NEXT: %rax = COPY %2
+# ALL-NEXT: RET 0, implicit %rax
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %rsi
+
+ %0(s64) = COPY %rdi
+ %1(s64) = COPY %rsi
+ %2(s64) = G_OR %0, %1
+ %rax = COPY %2(s64)
+ RET 0, implicit %rax
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/select-sub.mir b/test/CodeGen/X86/GlobalISel/select-sub.mir
index 4768a2d93222..d47f77828c9b 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub.mir
@@ -14,16 +14,6 @@
ret i32 %ret
}
- define float @test_sub_float(float %arg1, float %arg2) {
- %ret = fsub float %arg1, %arg2
- ret float %ret
- }
-
- define double @test_sub_double(double %arg1, double %arg2) {
- %ret = fsub double %arg1, %arg2
- ret double %ret
- }
-
define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
%ret = sub <4 x i32> %arg1, %arg2
ret <4 x i32> %ret
@@ -87,73 +77,6 @@ body: |
...
---
-name: test_sub_float
-alignment: 4
-legalized: true
-regBankSelected: true
-selected: false
-tracksRegLiveness: true
-# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' }
-# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' }
-# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' }
-# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' }
-# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' }
-# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' }
-registers:
- - { id: 0, class: vecr }
- - { id: 1, class: vecr }
- - { id: 2, class: vecr }
-# ALL: %0 = COPY %xmm0
-# ALL-NEXT: %1 = COPY %xmm1
-# SSE-NEXT: %2 = SUBSSrr %0, %1
-# AVX-NEXT: %2 = VSUBSSrr %0, %1
-# AVX512F-NEXT: %2 = VSUBSSZrr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %xmm0, %xmm1
-
- %0(s32) = COPY %xmm0
- %1(s32) = COPY %xmm1
- %2(s32) = G_FSUB %0, %1
- %xmm0 = COPY %2(s32)
- RET 0, implicit %xmm0
-
-...
----
-name: test_sub_double
-alignment: 4
-legalized: true
-regBankSelected: true
-selected: false
-tracksRegLiveness: true
-# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' }
-# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' }
-# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' }
-# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' }
-# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' }
-# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' }
-registers:
- - { id: 0, class: vecr }
- - { id: 1, class: vecr }
- - { id: 2, class: vecr }
-# ALL: %0 = COPY %xmm0
-# ALL-NEXT: %1 = COPY %xmm1
-# SSE-NEXT: %2 = SUBSDrr %0, %1
-# AVX-NEXT: %2 = VSUBSDrr %0, %1
-# AVX512F-NEXT: %2 = VSUBSDZrr %0, %1
-body: |
- bb.1 (%ir-block.0):
- liveins: %xmm0, %xmm1
-
- %0(s64) = COPY %xmm0
- %1(s64) = COPY %xmm1
- %2(s64) = G_FSUB %0, %1
- %xmm0 = COPY %2(s64)
- RET 0, implicit %xmm0
-...
----
name: test_sub_v4i32
alignment: 4
legalized: true
diff --git a/test/CodeGen/X86/GlobalISel/select-xor-scalar.mir b/test/CodeGen/X86/GlobalISel/select-xor-scalar.mir
new file mode 100644
index 000000000000..9d03c6a3f1a8
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-xor-scalar.mir
@@ -0,0 +1,160 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL
+
+--- |
+ define i8 @test_xor_i8(i8 %arg1, i8 %arg2) {
+ %ret = xor i8 %arg1, %arg2
+ ret i8 %ret
+ }
+
+ define i16 @test_xor_i16(i16 %arg1, i16 %arg2) {
+ %ret = xor i16 %arg1, %arg2
+ ret i16 %ret
+ }
+
+ define i32 @test_xor_i32(i32 %arg1, i32 %arg2) {
+ %ret = xor i32 %arg1, %arg2
+ ret i32 %ret
+ }
+
+ define i64 @test_xor_i64(i64 %arg1, i64 %arg2) {
+ %ret = xor i64 %arg1, %arg2
+ ret i64 %ret
+ }
+
+...
+---
+name: test_xor_i8
+# ALL-LABEL: name: test_xor_i8
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr, preferred-register: '' }
+ - { id: 1, class: gpr, preferred-register: '' }
+ - { id: 2, class: gpr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# ALL: %0 = COPY %dil
+# ALL-NEXT: %1 = COPY %sil
+# ALL-NEXT: %2 = XOR8rr %0, %1, implicit-def %eflags
+# ALL-NEXT: %al = COPY %2
+# ALL-NEXT: RET 0, implicit %al
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s8) = COPY %edi
+ %1(s8) = COPY %esi
+ %2(s8) = G_XOR %0, %1
+ %al = COPY %2(s8)
+ RET 0, implicit %al
+
+...
+---
+name: test_xor_i16
+# ALL-LABEL: name: test_xor_i16
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr, preferred-register: '' }
+ - { id: 1, class: gpr, preferred-register: '' }
+ - { id: 2, class: gpr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# ALL: %0 = COPY %di
+# ALL-NEXT: %1 = COPY %si
+# ALL-NEXT: %2 = XOR16rr %0, %1, implicit-def %eflags
+# ALL-NEXT: %ax = COPY %2
+# ALL-NEXT: RET 0, implicit %ax
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s16) = COPY %edi
+ %1(s16) = COPY %esi
+ %2(s16) = G_XOR %0, %1
+ %ax = COPY %2(s16)
+ RET 0, implicit %ax
+
+...
+---
+name: test_xor_i32
+# ALL-LABEL: name: test_xor_i32
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr, preferred-register: '' }
+ - { id: 1, class: gpr, preferred-register: '' }
+ - { id: 2, class: gpr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# ALL: %0 = COPY %edi
+# ALL-NEXT: %1 = COPY %esi
+# ALL-NEXT: %2 = XOR32rr %0, %1, implicit-def %eflags
+# ALL-NEXT: %eax = COPY %2
+# ALL-NEXT: RET 0, implicit %eax
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %esi
+
+ %0(s32) = COPY %edi
+ %1(s32) = COPY %esi
+ %2(s32) = G_XOR %0, %1
+ %eax = COPY %2(s32)
+ RET 0, implicit %eax
+
+...
+---
+name: test_xor_i64
+# ALL-LABEL: name: test_xor_i64
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' }
+registers:
+ - { id: 0, class: gpr, preferred-register: '' }
+ - { id: 1, class: gpr, preferred-register: '' }
+ - { id: 2, class: gpr, preferred-register: '' }
+liveins:
+fixedStack:
+stack:
+constants:
+# ALL: %0 = COPY %rdi
+# ALL-NEXT: %1 = COPY %rsi
+# ALL-NEXT: %2 = XOR64rr %0, %1, implicit-def %eflags
+# ALL-NEXT: %rax = COPY %2
+# ALL-NEXT: RET 0, implicit %rax
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %rsi
+
+ %0(s64) = COPY %rdi
+ %1(s64) = COPY %rsi
+ %2(s64) = G_XOR %0, %1
+ %rax = COPY %2(s64)
+ RET 0, implicit %rax
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/xor-scalar.ll b/test/CodeGen/X86/GlobalISel/xor-scalar.ll
new file mode 100644
index 000000000000..9941db8abd9c
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/xor-scalar.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL
+
+define i8 @test_xor_i8(i8 %arg1, i8 %arg2) {
+; ALL-LABEL: test_xor_i8:
+; ALL: # BB#0:
+; ALL-NEXT: xorb %dil, %sil
+; ALL-NEXT: movl %esi, %eax
+; ALL-NEXT: retq
+ %ret = xor i8 %arg1, %arg2
+ ret i8 %ret
+}
+
+define i16 @test_xor_i16(i16 %arg1, i16 %arg2) {
+; ALL-LABEL: test_xor_i16:
+; ALL: # BB#0:
+; ALL-NEXT: xorw %di, %si
+; ALL-NEXT: movl %esi, %eax
+; ALL-NEXT: retq
+ %ret = xor i16 %arg1, %arg2
+ ret i16 %ret
+}
+
+define i32 @test_xor_i32(i32 %arg1, i32 %arg2) {
+; ALL-LABEL: test_xor_i32:
+; ALL: # BB#0:
+; ALL-NEXT: xorl %edi, %esi
+; ALL-NEXT: movl %esi, %eax
+; ALL-NEXT: retq
+ %ret = xor i32 %arg1, %arg2
+ ret i32 %ret
+}
+
+define i64 @test_xor_i64(i64 %arg1, i64 %arg2) {
+; ALL-LABEL: test_xor_i64:
+; ALL: # BB#0:
+; ALL-NEXT: xorq %rdi, %rsi
+; ALL-NEXT: movq %rsi, %rax
+; ALL-NEXT: retq
+ %ret = xor i64 %arg1, %arg2
+ ret i64 %ret
+}
+
diff --git a/test/CodeGen/X86/atom-call-reg-indirect.ll b/test/CodeGen/X86/atom-call-reg-indirect.ll
index 663b6f1eee51..8045abc7bad6 100644
--- a/test/CodeGen/X86/atom-call-reg-indirect.ll
+++ b/test/CodeGen/X86/atom-call-reg-indirect.ll
@@ -4,6 +4,8 @@
; RUN: llc < %s -mcpu=core2 -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM-NOT64 %s
; RUN: llc < %s -mcpu=slm -mtriple=i686-linux | FileCheck -check-prefix=SLM32 %s
; RUN: llc < %s -mcpu=slm -mtriple=x86_64-linux | FileCheck -check-prefix=SLM64 %s
+; RUN: llc < %s -mcpu=goldmont -mtriple=i686-linux | FileCheck -check-prefix=SLM32 %s
+; RUN: llc < %s -mcpu=goldmont -mtriple=x86_64-linux | FileCheck -check-prefix=SLM64 %s
; fn_ptr.ll
diff --git a/test/CodeGen/X86/atom-fixup-lea2.ll b/test/CodeGen/X86/atom-fixup-lea2.ll
index ec8261388734..68b376ea5cc2 100644
--- a/test/CodeGen/X86/atom-fixup-lea2.ll
+++ b/test/CodeGen/X86/atom-fixup-lea2.ll
@@ -1,4 +1,6 @@
; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
+; RUN: llc < %s -mcpu=goldmont -mtriple=i686-linux | FileCheck %s
+
; CHECK:BB#5
; CHECK-NEXT:leal
; CHECK-NEXT:leal
diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll
index b81359e2832b..bddb015a0dd5 100644
--- a/test/CodeGen/X86/atom-sched.ll
+++ b/test/CodeGen/X86/atom-sched.ll
@@ -1,5 +1,6 @@
; RUN: llc <%s -O2 -mcpu=atom -march=x86 -relocation-model=static | FileCheck -check-prefix=atom %s
; RUN: llc <%s -O2 -mcpu=slm -march=x86 -relocation-model=static | FileCheck -check-prefix=slm %s
+; RUN: llc <%s -O2 -mcpu=goldmont -march=x86 -relocation-model=static | FileCheck -check-prefix=slm %s
; RUN: llc <%s -O2 -mcpu=core2 -march=x86 -relocation-model=static | FileCheck %s
;
diff --git a/test/CodeGen/X86/avx2-arith.ll b/test/CodeGen/X86/avx2-arith.ll
index aec74424b9b2..017f54b40b2d 100644
--- a/test/CodeGen/X86/avx2-arith.ll
+++ b/test/CodeGen/X86/avx2-arith.ll
@@ -1,15 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
; X32-LABEL: test_vpaddq:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_vpaddq:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%x = add <4 x i64> %i, %j
@@ -18,12 +18,12 @@ define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
; X32-LABEL: test_vpaddd:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_vpaddd:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%x = add <8 x i32> %i, %j
@@ -32,12 +32,12 @@ define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
; X32-LABEL: test_vpaddw:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_vpaddw:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%x = add <16 x i16> %i, %j
@@ -46,12 +46,12 @@ define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
; X32-LABEL: test_vpaddb:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_vpaddb:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%x = add <32 x i8> %i, %j
@@ -60,12 +60,12 @@ define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
; X32-LABEL: test_vpsubq:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsubq %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_vpsubq:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsubq %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%x = sub <4 x i64> %i, %j
@@ -74,12 +74,12 @@ define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
; X32-LABEL: test_vpsubd:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsubd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_vpsubd:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsubd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%x = sub <8 x i32> %i, %j
@@ -88,12 +88,12 @@ define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
; X32-LABEL: test_vpsubw:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsubw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_vpsubw:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsubw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%x = sub <16 x i16> %i, %j
@@ -102,12 +102,12 @@ define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
; X32-LABEL: test_vpsubb:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_vpsubb:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%x = sub <32 x i8> %i, %j
@@ -116,12 +116,12 @@ define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
; X32-LABEL: test_vpmulld:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_vpmulld:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%x = mul <8 x i32> %i, %j
@@ -130,12 +130,12 @@ define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
; X32-LABEL: test_vpmullw:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_vpmullw:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%x = mul <16 x i16> %i, %j
@@ -144,7 +144,7 @@ define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone
define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
; X32-LABEL: mul_v16i8:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovsxbw %xmm1, %ymm1
; X32-NEXT: vpmovsxbw %xmm0, %ymm0
; X32-NEXT: vpmullw %ymm1, %ymm0, %ymm0
@@ -157,7 +157,7 @@ define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
; X32-NEXT: retl
;
; X64-LABEL: mul_v16i8:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovsxbw %xmm1, %ymm1
; X64-NEXT: vpmovsxbw %xmm0, %ymm0
; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0
@@ -174,7 +174,7 @@ define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
; X32-LABEL: mul_v32i8:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vextracti128 $1, %ymm1, %xmm2
; X32-NEXT: vpmovsxbw %xmm2, %ymm2
; X32-NEXT: vextracti128 $1, %ymm0, %xmm3
@@ -196,7 +196,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
; X32-NEXT: retl
;
; X64-LABEL: mul_v32i8:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vextracti128 $1, %ymm1, %xmm2
; X64-NEXT: vpmovsxbw %xmm2, %ymm2
; X64-NEXT: vextracti128 $1, %ymm0, %xmm3
@@ -222,7 +222,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
; X32-LABEL: mul_v4i64:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsrlq $32, %ymm0, %ymm2
; X32-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
; X32-NEXT: vpsrlq $32, %ymm1, %ymm3
@@ -234,7 +234,7 @@ define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
; X32-NEXT: retl
;
; X64-LABEL: mul_v4i64:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlq $32, %ymm0, %ymm2
; X64-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
; X64-NEXT: vpsrlq $32, %ymm1, %ymm3
@@ -250,12 +250,12 @@ define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
define <8 x i32> @mul_const1(<8 x i32> %x) {
; X32-LABEL: mul_const1:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: mul_const1:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; X64-NEXT: retq
%y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
@@ -264,12 +264,12 @@ define <8 x i32> @mul_const1(<8 x i32> %x) {
define <4 x i64> @mul_const2(<4 x i64> %x) {
; X32-LABEL: mul_const2:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllq $2, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: mul_const2:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllq $2, %ymm0, %ymm0
; X64-NEXT: retq
%y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4>
@@ -278,12 +278,12 @@ define <4 x i64> @mul_const2(<4 x i64> %x) {
define <16 x i16> @mul_const3(<16 x i16> %x) {
; X32-LABEL: mul_const3:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllw $3, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: mul_const3:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllw $3, %ymm0, %ymm0
; X64-NEXT: retq
%y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -292,13 +292,13 @@ define <16 x i16> @mul_const3(<16 x i16> %x) {
define <4 x i64> @mul_const4(<4 x i64> %x) {
; X32-LABEL: mul_const4:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
; X32-NEXT: vpsubq %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: mul_const4:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
; X64-NEXT: vpsubq %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
@@ -308,12 +308,12 @@ define <4 x i64> @mul_const4(<4 x i64> %x) {
define <8 x i32> @mul_const5(<8 x i32> %x) {
; X32-LABEL: mul_const5:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: mul_const5:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-NEXT: retq
%y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -322,12 +322,12 @@ define <8 x i32> @mul_const5(<8 x i32> %x) {
define <8 x i32> @mul_const6(<8 x i32> %x) {
; X32-LABEL: mul_const6:
-; X32: ## BB#0:
-; X32-NEXT: vpmulld LCPI18_0, %ymm0, %ymm0
+; X32: # BB#0:
+; X32-NEXT: vpmulld {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: mul_const6:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
%y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
@@ -336,13 +336,13 @@ define <8 x i32> @mul_const6(<8 x i32> %x) {
define <8 x i64> @mul_const7(<8 x i64> %x) {
; X32-LABEL: mul_const7:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpaddq %ymm0, %ymm0, %ymm0
; X32-NEXT: vpaddq %ymm1, %ymm1, %ymm1
; X32-NEXT: retl
;
; X64-LABEL: mul_const7:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpaddq %ymm0, %ymm0, %ymm0
; X64-NEXT: vpaddq %ymm1, %ymm1, %ymm1
; X64-NEXT: retq
@@ -352,12 +352,12 @@ define <8 x i64> @mul_const7(<8 x i64> %x) {
define <8 x i16> @mul_const8(<8 x i16> %x) {
; X32-LABEL: mul_const8:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllw $3, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: mul_const8:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllw $3, %xmm0, %xmm0
; X64-NEXT: retq
%y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -366,14 +366,14 @@ define <8 x i16> @mul_const8(<8 x i16> %x) {
define <8 x i32> @mul_const9(<8 x i32> %x) {
; X32-LABEL: mul_const9:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl $2, %eax
; X32-NEXT: vmovd %eax, %xmm1
; X32-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: mul_const9:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: movl $2, %eax
; X64-NEXT: vmovd %eax, %xmm1
; X64-NEXT: vpmulld %ymm1, %ymm0, %ymm0
@@ -385,13 +385,13 @@ define <8 x i32> @mul_const9(<8 x i32> %x) {
; %x * 0x01010101
define <4 x i32> @mul_const10(<4 x i32> %x) {
; X32-LABEL: mul_const10:
-; X32: ## BB#0:
-; X32-NEXT: vpbroadcastd LCPI22_0, %xmm1
+; X32: # BB#0:
+; X32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1
; X32-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: mul_const10:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; X64-NEXT: retq
@@ -402,13 +402,13 @@ define <4 x i32> @mul_const10(<4 x i32> %x) {
; %x * 0x80808080
define <4 x i32> @mul_const11(<4 x i32> %x) {
; X32-LABEL: mul_const11:
-; X32: ## BB#0:
-; X32-NEXT: vpbroadcastd LCPI23_0, %xmm1
+; X32: # BB#0:
+; X32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1
; X32-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: mul_const11:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; X64-NEXT: retq
diff --git a/test/CodeGen/X86/avx2-cmp.ll b/test/CodeGen/X86/avx2-cmp.ll
index e2b550383c8d..2369aa5ac9a0 100644
--- a/test/CodeGen/X86/avx2-cmp.ll
+++ b/test/CodeGen/X86/avx2-cmp.ll
@@ -1,15 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
define <8 x i32> @v8i32_cmpgt(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
; X32-LABEL: v8i32_cmpgt:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: v8i32_cmpgt:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
%bincmp = icmp slt <8 x i32> %i, %j
@@ -19,12 +19,12 @@ define <8 x i32> @v8i32_cmpgt(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
define <4 x i64> @v4i64_cmpgt(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
; X32-LABEL: v4i64_cmpgt:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: v4i64_cmpgt:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
%bincmp = icmp slt <4 x i64> %i, %j
@@ -34,12 +34,12 @@ define <4 x i64> @v4i64_cmpgt(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
define <16 x i16> @v16i16_cmpgt(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
; X32-LABEL: v16i16_cmpgt:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: v16i16_cmpgt:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
%bincmp = icmp slt <16 x i16> %i, %j
@@ -49,12 +49,12 @@ define <16 x i16> @v16i16_cmpgt(<16 x i16> %i, <16 x i16> %j) nounwind readnone
define <32 x i8> @v32i8_cmpgt(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
; X32-LABEL: v32i8_cmpgt:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: v32i8_cmpgt:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
%bincmp = icmp slt <32 x i8> %i, %j
@@ -64,12 +64,12 @@ define <32 x i8> @v32i8_cmpgt(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
define <8 x i32> @int256_cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
; X32-LABEL: int256_cmpeq:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: int256_cmpeq:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%bincmp = icmp eq <8 x i32> %i, %j
@@ -79,12 +79,12 @@ define <8 x i32> @int256_cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
define <4 x i64> @v4i64_cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
; X32-LABEL: v4i64_cmpeq:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: v4i64_cmpeq:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%bincmp = icmp eq <4 x i64> %i, %j
@@ -94,12 +94,12 @@ define <4 x i64> @v4i64_cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
define <16 x i16> @v16i16_cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
; X32-LABEL: v16i16_cmpeq:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: v16i16_cmpeq:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%bincmp = icmp eq <16 x i16> %i, %j
@@ -109,12 +109,12 @@ define <16 x i16> @v16i16_cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone
define <32 x i8> @v32i8_cmpeq(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
; X32-LABEL: v32i8_cmpeq:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: v32i8_cmpeq:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%bincmp = icmp eq <32 x i8> %i, %j
diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll
index 26edafbdb64f..60cc2cf199e6 100755
--- a/test/CodeGen/X86/avx2-conversions.ll
+++ b/test/CodeGen/X86/avx2-conversions.ll
@@ -1,21 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
; X32-LABEL: trunc4:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: trunc4:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%B = trunc <4 x i64> %A to <4 x i32>
@@ -24,18 +24,18 @@ define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
; X32-LABEL: trunc8:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: trunc8:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%B = trunc <8 x i32> %A to <8 x i16>
@@ -44,12 +44,12 @@ define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
define <4 x i64> @sext4(<4 x i32> %A) nounwind {
; X32-LABEL: sext4:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovsxdq %xmm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: sext4:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovsxdq %xmm0, %ymm0
; X64-NEXT: retq
%B = sext <4 x i32> %A to <4 x i64>
@@ -58,12 +58,12 @@ define <4 x i64> @sext4(<4 x i32> %A) nounwind {
define <8 x i32> @sext8(<8 x i16> %A) nounwind {
; X32-LABEL: sext8:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovsxwd %xmm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: sext8:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovsxwd %xmm0, %ymm0
; X64-NEXT: retq
%B = sext <8 x i16> %A to <8 x i32>
@@ -72,12 +72,12 @@ define <8 x i32> @sext8(<8 x i16> %A) nounwind {
define <4 x i64> @zext4(<4 x i32> %A) nounwind {
; X32-LABEL: zext4:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X32-NEXT: retl
;
; X64-LABEL: zext4:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X64-NEXT: retq
%B = zext <4 x i32> %A to <4 x i64>
@@ -86,12 +86,12 @@ define <4 x i64> @zext4(<4 x i32> %A) nounwind {
define <8 x i32> @zext8(<8 x i16> %A) nounwind {
; X32-LABEL: zext8:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X32-NEXT: retl
;
; X64-LABEL: zext8:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X64-NEXT: retq
%B = zext <8 x i16> %A to <8 x i32>
@@ -100,13 +100,13 @@ define <8 x i32> @zext8(<8 x i16> %A) nounwind {
define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
; X32-LABEL: zext_8i8_8i32:
-; X32: ## BB#0:
-; X32-NEXT: vpand LCPI6_0, %xmm0, %xmm0
+; X32: # BB#0:
+; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X32-NEXT: retl
;
; X64-LABEL: zext_8i8_8i32:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X64-NEXT: retq
@@ -116,12 +116,12 @@ define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) {
; X32-LABEL: zext_16i8_16i16:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; X32-NEXT: retl
;
; X64-LABEL: zext_16i8_16i16:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; X64-NEXT: retq
%t = zext <16 x i8> %z to <16 x i16>
@@ -130,12 +130,12 @@ define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) {
define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
; X32-LABEL: sext_16i8_16i16:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovsxbw %xmm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: sext_16i8_16i16:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovsxbw %xmm0, %ymm0
; X64-NEXT: retq
%t = sext <16 x i8> %z to <16 x i16>
@@ -144,7 +144,7 @@ define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
; X32-LABEL: trunc_16i16_16i8:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vextracti128 $1, %ymm0, %xmm1
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; X32-NEXT: vpshufb %xmm2, %xmm1, %xmm1
@@ -154,7 +154,7 @@ define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
; X32-NEXT: retl
;
; X64-LABEL: trunc_16i16_16i8:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; X64-NEXT: vpshufb %xmm2, %xmm1, %xmm1
@@ -168,13 +168,13 @@ define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
; X32-LABEL: load_sext_test1:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpmovsxdq (%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: load_sext_test1:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovsxdq (%rdi), %ymm0
; X64-NEXT: retq
%X = load <4 x i32>, <4 x i32>* %ptr
@@ -184,13 +184,13 @@ define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
; X32-LABEL: load_sext_test2:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpmovsxbq (%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: load_sext_test2:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovsxbq (%rdi), %ymm0
; X64-NEXT: retq
%X = load <4 x i8>, <4 x i8>* %ptr
@@ -200,13 +200,13 @@ define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
; X32-LABEL: load_sext_test3:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpmovsxwq (%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: load_sext_test3:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovsxwq (%rdi), %ymm0
; X64-NEXT: retq
%X = load <4 x i16>, <4 x i16>* %ptr
@@ -216,13 +216,13 @@ define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
; X32-LABEL: load_sext_test4:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpmovsxwd (%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: load_sext_test4:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovsxwd (%rdi), %ymm0
; X64-NEXT: retq
%X = load <8 x i16>, <8 x i16>* %ptr
@@ -232,13 +232,13 @@ define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
; X32-LABEL: load_sext_test5:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpmovsxbd (%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: load_sext_test5:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovsxbd (%rdi), %ymm0
; X64-NEXT: retq
%X = load <8 x i8>, <8 x i8>* %ptr
diff --git a/test/CodeGen/X86/avx2-fma-fneg-combine.ll b/test/CodeGen/X86/avx2-fma-fneg-combine.ll
index 345943bd7303..019593cc0f80 100644
--- a/test/CodeGen/X86/avx2-fma-fneg-combine.ll
+++ b/test/CodeGen/X86/avx2-fma-fneg-combine.ll
@@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X64
; This test checks combinations of FNEG and FMA intrinsics
define <8 x float> @test1(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
; X32-LABEL: test1:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test1:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
; X64-NEXT: retq
entry:
@@ -24,12 +24,12 @@ declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x f
define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; X32-LABEL: test2:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test2:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
; X64-NEXT: retq
entry:
@@ -42,14 +42,14 @@ declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x
define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; X32-LABEL: test3:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
-; X32-NEXT: vbroadcastss LCPI2_0, %xmm1
+; X32-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1
; X32-NEXT: vxorps %xmm1, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test3:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
; X64-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0
@@ -64,12 +64,12 @@ declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4
define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
; X32-LABEL: test4:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test4:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; X64-NEXT: retq
entry:
@@ -80,14 +80,14 @@ entry:
define <8 x float> @test5(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
; X32-LABEL: test5:
-; X32: ## BB#0: ## %entry
-; X32-NEXT: vbroadcastss LCPI4_0, %ymm3
+; X32: # BB#0: # %entry
+; X32-NEXT: vbroadcastss {{\.LCPI.*}}, %ymm3
; X32-NEXT: vxorps %ymm3, %ymm2, %ymm2
; X32-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test5:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vbroadcastss {{.*}}(%rip), %ymm3
; X64-NEXT: vxorps %ymm3, %ymm2, %ymm2
; X64-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
@@ -103,12 +103,12 @@ declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x f
define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; X32-LABEL: test6:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test6:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
; X64-NEXT: retq
entry:
diff --git a/test/CodeGen/X86/avx2-gather.ll b/test/CodeGen/X86/avx2-gather.ll
index d162b4755ee1..64dd6fa00616 100644
--- a/test/CodeGen/X86/avx2-gather.ll
+++ b/test/CodeGen/X86/avx2-gather.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
<4 x i32>, <4 x float>, i8) nounwind readonly
define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1, <4 x i32> %idx, <4 x float> %mask) {
; X32-LABEL: test_x86_avx2_gather_d_ps:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2
; X32-NEXT: vgatherdps %xmm1, (%eax,%xmm0,2), %xmm2
@@ -15,7 +15,7 @@ define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1, <4 x i32> %idx, <4 x floa
; X32-NEXT: retl
;
; X64-LABEL: test_x86_avx2_gather_d_ps:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2
; X64-NEXT: vgatherdps %xmm1, (%rdi,%xmm0,2), %xmm2
; X64-NEXT: vmovaps %xmm2, %xmm0
@@ -30,7 +30,7 @@ declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*,
define <2 x double> @test_x86_avx2_gather_d_pd(i8* %a1, <4 x i32> %idx, <2 x double> %mask) {
; X32-LABEL: test_x86_avx2_gather_d_pd:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; X32-NEXT: vgatherdpd %xmm1, (%eax,%xmm0,2), %xmm2
@@ -38,7 +38,7 @@ define <2 x double> @test_x86_avx2_gather_d_pd(i8* %a1, <4 x i32> %idx, <2 x dou
; X32-NEXT: retl
;
; X64-LABEL: test_x86_avx2_gather_d_pd:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; X64-NEXT: vgatherdpd %xmm1, (%rdi,%xmm0,2), %xmm2
; X64-NEXT: vmovapd %xmm2, %xmm0
@@ -53,7 +53,7 @@ declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*,
define <8 x float> @test_x86_avx2_gather_d_ps_256(i8* %a1, <8 x i32> %idx, <8 x float> %mask) {
; X32-LABEL: test_x86_avx2_gather_d_ps_256:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vxorps %ymm2, %ymm2, %ymm2
; X32-NEXT: vgatherdps %ymm1, (%eax,%ymm0,4), %ymm2
@@ -61,7 +61,7 @@ define <8 x float> @test_x86_avx2_gather_d_ps_256(i8* %a1, <8 x i32> %idx, <8 x
; X32-NEXT: retl
;
; X64-LABEL: test_x86_avx2_gather_d_ps_256:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vxorps %ymm2, %ymm2, %ymm2
; X64-NEXT: vgatherdps %ymm1, (%rdi,%ymm0,4), %ymm2
; X64-NEXT: vmovaps %ymm2, %ymm0
@@ -76,7 +76,7 @@ declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*,
define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1, <4 x i32> %idx, <4 x double> %mask) {
; X32-LABEL: test_x86_avx2_gather_d_pd_256:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vxorpd %ymm2, %ymm2, %ymm2
; X32-NEXT: vgatherdpd %ymm1, (%eax,%xmm0,8), %ymm2
@@ -84,7 +84,7 @@ define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1, <4 x i32> %idx, <4 x
; X32-NEXT: retl
;
; X64-LABEL: test_x86_avx2_gather_d_pd_256:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vxorpd %ymm2, %ymm2, %ymm2
; X64-NEXT: vgatherdpd %ymm1, (%rdi,%xmm0,8), %ymm2
; X64-NEXT: vmovapd %ymm2, %ymm0
@@ -96,7 +96,7 @@ define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1, <4 x i32> %idx, <4 x
define <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_i32gather_epi32:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
@@ -105,7 +105,7 @@ define <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) {
; X32-NEXT: retl
;
; X64-LABEL: test_mm_i32gather_epi32:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm0,2), %xmm1
@@ -122,7 +122,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>
define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_i32gather_pd:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -131,7 +131,7 @@ define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) {
; X32-NEXT: retl
;
; X64-LABEL: test_mm_i32gather_pd:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1
diff --git a/test/CodeGen/X86/avx2-logic.ll b/test/CodeGen/X86/avx2-logic.ll
index 9208d959a755..68d486699cbc 100644
--- a/test/CodeGen/X86/avx2-logic.ll
+++ b/test/CodeGen/X86/avx2-logic.ll
@@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
define <4 x i64> @vpandn(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
; X32-LABEL: vpandn:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; X32-NEXT: vpsubq %ymm1, %ymm0, %ymm1
; X32-NEXT: vpandn %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vpandn:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; X64-NEXT: vpsubq %ymm1, %ymm0, %ymm1
; X64-NEXT: vpandn %ymm0, %ymm1, %ymm0
@@ -26,14 +26,14 @@ entry:
define <4 x i64> @vpand(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
; X32-LABEL: vpand:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; X32-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X32-NEXT: vpand %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vpand:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; X64-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X64-NEXT: vpand %ymm1, %ymm0, %ymm0
@@ -47,14 +47,14 @@ entry:
define <4 x i64> @vpor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
; X32-LABEL: vpor:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; X32-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X32-NEXT: vpor %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vpor:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; X64-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X64-NEXT: vpor %ymm1, %ymm0, %ymm0
@@ -68,14 +68,14 @@ entry:
define <4 x i64> @vpxor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
; X32-LABEL: vpxor:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; X32-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X32-NEXT: vpxor %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vpxor:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; X64-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X64-NEXT: vpxor %ymm1, %ymm0, %ymm0
@@ -89,14 +89,14 @@ entry:
define <32 x i8> @vpblendvb(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y) {
; X32-LABEL: vpblendvb:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllw $7, %ymm0, %ymm0
-; X32-NEXT: vpand LCPI4_0, %ymm0, %ymm0
+; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vpblendvb:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllw $7, %ymm0, %ymm0
; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
@@ -107,12 +107,12 @@ define <32 x i8> @vpblendvb(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y) {
define <8 x i32> @allOnes() nounwind {
; X32-LABEL: allOnes:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: allOnes:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; X64-NEXT: retq
ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -120,12 +120,12 @@ define <8 x i32> @allOnes() nounwind {
define <16 x i16> @allOnes2() nounwind {
; X32-LABEL: allOnes2:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: allOnes2:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; X64-NEXT: retq
ret <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
diff --git a/test/CodeGen/X86/avx2-phaddsub.ll b/test/CodeGen/X86/avx2-phaddsub.ll
index 9eafac902b86..232a3326fa13 100644
--- a/test/CodeGen/X86/avx2-phaddsub.ll
+++ b/test/CodeGen/X86/avx2-phaddsub.ll
@@ -1,15 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) {
; X32-LABEL: phaddw1:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vphaddw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: phaddw1:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vphaddw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
@@ -20,12 +20,12 @@ define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) {
define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) {
; X32-LABEL: phaddw2:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vphaddw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: phaddw2:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vphaddw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
@@ -36,12 +36,12 @@ define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) {
define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) {
; X32-LABEL: phaddd1:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vphaddd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: phaddd1:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vphaddd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
@@ -52,12 +52,12 @@ define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) {
define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) {
; X32-LABEL: phaddd2:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vphaddd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: phaddd2:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vphaddd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
@@ -68,12 +68,12 @@ define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) {
define <8 x i32> @phaddd3(<8 x i32> %x) {
; X32-LABEL: phaddd3:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vphaddd %ymm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: phaddd3:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vphaddd %ymm0, %ymm0, %ymm0
; X64-NEXT: retq
%a = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
@@ -84,12 +84,12 @@ define <8 x i32> @phaddd3(<8 x i32> %x) {
define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) {
; X32-LABEL: phsubw1:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vphsubw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: phsubw1:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vphsubw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
@@ -100,12 +100,12 @@ define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) {
define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) {
; X32-LABEL: phsubd1:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vphsubd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: phsubd1:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vphsubd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
@@ -116,12 +116,12 @@ define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) {
define <8 x i32> @phsubd2(<8 x i32> %x, <8 x i32> %y) {
; X32-LABEL: phsubd2:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vphsubd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: phsubd2:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vphsubd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 undef, i32 8, i32 undef, i32 4, i32 6, i32 12, i32 14>
diff --git a/test/CodeGen/X86/avx2-shift.ll b/test/CodeGen/X86/avx2-shift.ll
index 4345bd6f7926..47bbba2c7e08 100644
--- a/test/CodeGen/X86/avx2-shift.ll
+++ b/test/CodeGen/X86/avx2-shift.ll
@@ -1,15 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
; X32-LABEL: variable_shl0:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: variable_shl0:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; X64-NEXT: retq
%k = shl <4 x i32> %x, %y
@@ -18,12 +18,12 @@ define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
; X32-LABEL: variable_shl1:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: variable_shl1:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%k = shl <8 x i32> %x, %y
@@ -32,12 +32,12 @@ define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
; X32-LABEL: variable_shl2:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: variable_shl2:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
; X64-NEXT: retq
%k = shl <2 x i64> %x, %y
@@ -46,12 +46,12 @@ define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
; X32-LABEL: variable_shl3:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: variable_shl3:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%k = shl <4 x i64> %x, %y
@@ -60,12 +60,12 @@ define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
; X32-LABEL: variable_srl0:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: variable_srl0:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X64-NEXT: retq
%k = lshr <4 x i32> %x, %y
@@ -74,12 +74,12 @@ define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
; X32-LABEL: variable_srl1:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: variable_srl1:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%k = lshr <8 x i32> %x, %y
@@ -88,12 +88,12 @@ define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
; X32-LABEL: variable_srl2:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: variable_srl2:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; X64-NEXT: retq
%k = lshr <2 x i64> %x, %y
@@ -102,12 +102,12 @@ define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
; X32-LABEL: variable_srl3:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: variable_srl3:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%k = lshr <4 x i64> %x, %y
@@ -116,12 +116,12 @@ define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
; X32-LABEL: variable_sra0:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: variable_sra0:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; X64-NEXT: retq
%k = ashr <4 x i32> %x, %y
@@ -130,12 +130,12 @@ define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
; X32-LABEL: variable_sra1:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: variable_sra1:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%k = ashr <8 x i32> %x, %y
@@ -146,12 +146,12 @@ define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
; X32-LABEL: vshift00:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpslld $2, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vshift00:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpslld $2, %ymm0, %ymm0
; X64-NEXT: retq
%s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
@@ -160,12 +160,12 @@ define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
; X32-LABEL: vshift01:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllw $2, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vshift01:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllw $2, %ymm0, %ymm0
; X64-NEXT: retq
%s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
@@ -174,12 +174,12 @@ define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
; X32-LABEL: vshift02:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllq $2, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vshift02:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllq $2, %ymm0, %ymm0
; X64-NEXT: retq
%s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
@@ -190,12 +190,12 @@ define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
; X32-LABEL: vshift03:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsrld $2, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vshift03:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrld $2, %ymm0, %ymm0
; X64-NEXT: retq
%s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
@@ -204,12 +204,12 @@ define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
; X32-LABEL: vshift04:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsrlw $2, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vshift04:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlw $2, %ymm0, %ymm0
; X64-NEXT: retq
%s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
@@ -218,12 +218,12 @@ define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
; X32-LABEL: vshift05:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsrlq $2, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vshift05:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlq $2, %ymm0, %ymm0
; X64-NEXT: retq
%s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
@@ -234,12 +234,12 @@ define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
; X32-LABEL: vshift06:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsrad $2, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vshift06:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrad $2, %ymm0, %ymm0
; X64-NEXT: retq
%s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
@@ -248,12 +248,12 @@ define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
; X32-LABEL: vshift07:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsraw $2, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: vshift07:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsraw $2, %ymm0, %ymm0
; X64-NEXT: retq
%s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
@@ -262,13 +262,13 @@ define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
; X32-LABEL: variable_sra0_load:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpsravd (%eax), %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: variable_sra0_load:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsravd (%rdi), %xmm0, %xmm0
; X64-NEXT: retq
%y1 = load <4 x i32>, <4 x i32>* %y
@@ -278,13 +278,13 @@ define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
; X32-LABEL: variable_sra1_load:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpsravd (%eax), %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: variable_sra1_load:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsravd (%rdi), %ymm0, %ymm0
; X64-NEXT: retq
%y1 = load <8 x i32>, <8 x i32>* %y
@@ -294,13 +294,13 @@ define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
; X32-LABEL: variable_shl0_load:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpsllvd (%eax), %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: variable_shl0_load:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllvd (%rdi), %xmm0, %xmm0
; X64-NEXT: retq
%y1 = load <4 x i32>, <4 x i32>* %y
@@ -310,13 +310,13 @@ define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
; X32-LABEL: variable_shl1_load:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpsllvd (%eax), %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: variable_shl1_load:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllvd (%rdi), %ymm0, %ymm0
; X64-NEXT: retq
%y1 = load <8 x i32>, <8 x i32>* %y
@@ -326,13 +326,13 @@ define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
; X32-LABEL: variable_shl2_load:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpsllvq (%eax), %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: variable_shl2_load:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllvq (%rdi), %xmm0, %xmm0
; X64-NEXT: retq
%y1 = load <2 x i64>, <2 x i64>* %y
@@ -342,13 +342,13 @@ define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
; X32-LABEL: variable_shl3_load:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpsllvq (%eax), %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: variable_shl3_load:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllvq (%rdi), %ymm0, %ymm0
; X64-NEXT: retq
%y1 = load <4 x i64>, <4 x i64>* %y
@@ -358,13 +358,13 @@ define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
; X32-LABEL: variable_srl0_load:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpsrlvd (%eax), %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: variable_srl0_load:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0
; X64-NEXT: retq
%y1 = load <4 x i32>, <4 x i32>* %y
@@ -374,13 +374,13 @@ define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
; X32-LABEL: variable_srl1_load:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpsrlvd (%eax), %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: variable_srl1_load:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0
; X64-NEXT: retq
%y1 = load <8 x i32>, <8 x i32>* %y
@@ -390,13 +390,13 @@ define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
; X32-LABEL: variable_srl2_load:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpsrlvq (%eax), %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: variable_srl2_load:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0
; X64-NEXT: retq
%y1 = load <2 x i64>, <2 x i64>* %y
@@ -406,13 +406,13 @@ define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
; X32-LABEL: variable_srl3_load:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpsrlvq (%eax), %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: variable_srl3_load:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0
; X64-NEXT: retq
%y1 = load <4 x i64>, <4 x i64>* %y
@@ -422,13 +422,13 @@ define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
define <32 x i8> @shl9(<32 x i8> %A) nounwind {
; X32-LABEL: shl9:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllw $3, %ymm0, %ymm0
-; X32-NEXT: vpand LCPI28_0, %ymm0, %ymm0
+; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: shl9:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllw $3, %ymm0, %ymm0
; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
@@ -438,13 +438,13 @@ define <32 x i8> @shl9(<32 x i8> %A) nounwind {
define <32 x i8> @shr9(<32 x i8> %A) nounwind {
; X32-LABEL: shr9:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsrlw $3, %ymm0, %ymm0
-; X32-NEXT: vpand LCPI29_0, %ymm0, %ymm0
+; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: shr9:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlw $3, %ymm0, %ymm0
; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
@@ -454,13 +454,13 @@ define <32 x i8> @shr9(<32 x i8> %A) nounwind {
define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
; X32-LABEL: sra_v32i8_7:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
; X32-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: sra_v32i8_7:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
@@ -470,16 +470,16 @@ define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
; X32-LABEL: sra_v32i8:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsrlw $3, %ymm0, %ymm0
-; X32-NEXT: vpand LCPI31_0, %ymm0, %ymm0
+; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; X32-NEXT: vpxor %ymm1, %ymm0, %ymm0
; X32-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: sra_v32i8:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsrlw $3, %ymm0, %ymm0
; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
@@ -492,13 +492,13 @@ define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
; X32-LABEL: sext_v16i16:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllw $8, %ymm0, %ymm0
; X32-NEXT: vpsraw $8, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: sext_v16i16:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllw $8, %ymm0, %ymm0
; X64-NEXT: vpsraw $8, %ymm0, %ymm0
; X64-NEXT: retq
@@ -509,13 +509,13 @@ define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
; X32-LABEL: sext_v8i32:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpslld $16, %ymm0, %ymm0
; X32-NEXT: vpsrad $16, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: sext_v8i32:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpslld $16, %ymm0, %ymm0
; X64-NEXT: vpsrad $16, %ymm0, %ymm0
; X64-NEXT: retq
@@ -526,24 +526,24 @@ define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) {
; X32-LABEL: variable_shl16:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: variable_shl16:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%res = shl <8 x i16> %lhs, %rhs
@@ -552,24 +552,24 @@ define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) {
define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) {
; X32-LABEL: variable_ashr16:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X32-NEXT: vpmovsxwd %xmm0, %ymm0
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: variable_ashr16:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X64-NEXT: vpmovsxwd %xmm0, %ymm0
; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%res = ashr <8 x i16> %lhs, %rhs
@@ -578,24 +578,24 @@ define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) {
define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) {
; X32-LABEL: variable_lshr16:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: variable_lshr16:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%res = lshr <8 x i16> %lhs, %rhs
diff --git a/test/CodeGen/X86/avx2-vector-shifts.ll b/test/CodeGen/X86/avx2-vector-shifts.ll
index 45a1cd975038..127726ea30da 100644
--- a/test/CodeGen/X86/avx2-vector-shifts.ll
+++ b/test/CodeGen/X86/avx2-vector-shifts.ll
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
; AVX2 Logical Shift Left
define <16 x i16> @test_sllw_1(<16 x i16> %InVec) {
; X32-LABEL: test_sllw_1:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: retl
;
; X64-LABEL: test_sllw_1:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: retq
entry:
%shl = shl <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
@@ -19,12 +19,12 @@ entry:
define <16 x i16> @test_sllw_2(<16 x i16> %InVec) {
; X32-LABEL: test_sllw_2:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpaddw %ymm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_sllw_2:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpaddw %ymm0, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -34,12 +34,12 @@ entry:
define <16 x i16> @test_sllw_3(<16 x i16> %InVec) {
; X32-LABEL: test_sllw_3:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpsllw $15, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_sllw_3:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpsllw $15, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -49,11 +49,11 @@ entry:
define <8 x i32> @test_slld_1(<8 x i32> %InVec) {
; X32-LABEL: test_slld_1:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: retl
;
; X64-LABEL: test_slld_1:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: retq
entry:
%shl = shl <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -62,12 +62,12 @@ entry:
define <8 x i32> @test_slld_2(<8 x i32> %InVec) {
; X32-LABEL: test_slld_2:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_slld_2:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -77,14 +77,14 @@ entry:
define <8 x i32> @test_vpslld_var(i32 %shift) {
; X32-LABEL: test_vpslld_var:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
; X32-NEXT: vpslld %xmm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_vpslld_var:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vmovd %edi, %xmm0
; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
; X64-NEXT: vpslld %xmm0, %ymm1, %ymm0
@@ -96,12 +96,12 @@ define <8 x i32> @test_vpslld_var(i32 %shift) {
define <8 x i32> @test_slld_3(<8 x i32> %InVec) {
; X32-LABEL: test_slld_3:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpslld $31, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_slld_3:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpslld $31, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -111,11 +111,11 @@ entry:
define <4 x i64> @test_sllq_1(<4 x i64> %InVec) {
; X32-LABEL: test_sllq_1:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: retl
;
; X64-LABEL: test_sllq_1:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: retq
entry:
%shl = shl <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
@@ -124,12 +124,12 @@ entry:
define <4 x i64> @test_sllq_2(<4 x i64> %InVec) {
; X32-LABEL: test_sllq_2:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpaddq %ymm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_sllq_2:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpaddq %ymm0, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -139,12 +139,12 @@ entry:
define <4 x i64> @test_sllq_3(<4 x i64> %InVec) {
; X32-LABEL: test_sllq_3:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpsllq $63, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_sllq_3:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpsllq $63, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -156,11 +156,11 @@ entry:
define <16 x i16> @test_sraw_1(<16 x i16> %InVec) {
; X32-LABEL: test_sraw_1:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: retl
;
; X64-LABEL: test_sraw_1:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: retq
entry:
%shl = ashr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
@@ -169,12 +169,12 @@ entry:
define <16 x i16> @test_sraw_2(<16 x i16> %InVec) {
; X32-LABEL: test_sraw_2:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpsraw $1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_sraw_2:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpsraw $1, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -184,12 +184,12 @@ entry:
define <16 x i16> @test_sraw_3(<16 x i16> %InVec) {
; X32-LABEL: test_sraw_3:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpsraw $15, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_sraw_3:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpsraw $15, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -199,11 +199,11 @@ entry:
define <8 x i32> @test_srad_1(<8 x i32> %InVec) {
; X32-LABEL: test_srad_1:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: retl
;
; X64-LABEL: test_srad_1:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: retq
entry:
%shl = ashr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -212,12 +212,12 @@ entry:
define <8 x i32> @test_srad_2(<8 x i32> %InVec) {
; X32-LABEL: test_srad_2:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpsrad $1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_srad_2:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpsrad $1, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -227,12 +227,12 @@ entry:
define <8 x i32> @test_srad_3(<8 x i32> %InVec) {
; X32-LABEL: test_srad_3:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpsrad $31, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_srad_3:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpsrad $31, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -244,11 +244,11 @@ entry:
define <16 x i16> @test_srlw_1(<16 x i16> %InVec) {
; X32-LABEL: test_srlw_1:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: retl
;
; X64-LABEL: test_srlw_1:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: retq
entry:
%shl = lshr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
@@ -257,12 +257,12 @@ entry:
define <16 x i16> @test_srlw_2(<16 x i16> %InVec) {
; X32-LABEL: test_srlw_2:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpsrlw $1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_srlw_2:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpsrlw $1, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -272,12 +272,12 @@ entry:
define <16 x i16> @test_srlw_3(<16 x i16> %InVec) {
; X32-LABEL: test_srlw_3:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpsrlw $15, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_srlw_3:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpsrlw $15, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -287,11 +287,11 @@ entry:
define <8 x i32> @test_srld_1(<8 x i32> %InVec) {
; X32-LABEL: test_srld_1:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: retl
;
; X64-LABEL: test_srld_1:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: retq
entry:
%shl = lshr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -300,12 +300,12 @@ entry:
define <8 x i32> @test_srld_2(<8 x i32> %InVec) {
; X32-LABEL: test_srld_2:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpsrld $1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_srld_2:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpsrld $1, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -315,12 +315,12 @@ entry:
define <8 x i32> @test_srld_3(<8 x i32> %InVec) {
; X32-LABEL: test_srld_3:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpsrld $31, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_srld_3:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpsrld $31, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -330,11 +330,11 @@ entry:
define <4 x i64> @test_srlq_1(<4 x i64> %InVec) {
; X32-LABEL: test_srlq_1:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: retl
;
; X64-LABEL: test_srlq_1:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: retq
entry:
%shl = lshr <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
@@ -343,12 +343,12 @@ entry:
define <4 x i64> @test_srlq_2(<4 x i64> %InVec) {
; X32-LABEL: test_srlq_2:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpsrlq $1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_srlq_2:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpsrlq $1, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -358,12 +358,12 @@ entry:
define <4 x i64> @test_srlq_3(<4 x i64> %InVec) {
; X32-LABEL: test_srlq_3:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpsrlq $63, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_srlq_3:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpsrlq $63, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -373,17 +373,17 @@ entry:
define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
; X32-LABEL: srl_trunc_and_v4i64:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
; X32-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; X32-NEXT: vpbroadcastd LCPI25_0, %xmm2
+; X32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm2
; X32-NEXT: vpand %xmm2, %xmm1, %xmm1
; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: srl_trunc_and_v4i64:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
; X64-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
; X64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
@@ -403,24 +403,24 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X32-LABEL: shl_8i16:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: shl_8i16:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%shl = shl <8 x i16> %r, %a
@@ -429,7 +429,7 @@ define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-LABEL: shl_16i16:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
@@ -443,7 +443,7 @@ define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-NEXT: retl
;
; X64-LABEL: shl_16i16:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
@@ -461,13 +461,13 @@ define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X32-LABEL: shl_32i8:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllw $5, %ymm1, %ymm1
; X32-NEXT: vpsllw $4, %ymm0, %ymm2
-; X32-NEXT: vpand LCPI28_0, %ymm2, %ymm2
+; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X32-NEXT: vpsllw $2, %ymm0, %ymm2
-; X32-NEXT: vpand LCPI28_1, %ymm2, %ymm2
+; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X32-NEXT: vpaddb %ymm0, %ymm0, %ymm2
@@ -476,7 +476,7 @@ define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X32-NEXT: retl
;
; X64-LABEL: shl_32i8:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllw $5, %ymm1, %ymm1
; X64-NEXT: vpsllw $4, %ymm0, %ymm2
; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
@@ -495,24 +495,24 @@ define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X32-LABEL: ashr_8i16:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X32-NEXT: vpmovsxwd %xmm0, %ymm0
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: ashr_8i16:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X64-NEXT: vpmovsxwd %xmm0, %ymm0
; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%ashr = ashr <8 x i16> %r, %a
@@ -521,7 +521,7 @@ define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-LABEL: ashr_16i16:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
@@ -535,7 +535,7 @@ define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-NEXT: retl
;
; X64-LABEL: ashr_16i16:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
@@ -553,7 +553,7 @@ define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X32-LABEL: ashr_32i8:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllw $5, %ymm1, %ymm1
; X32-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; X32-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
@@ -581,7 +581,7 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X32-NEXT: retl
;
; X64-LABEL: ashr_32i8:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllw $5, %ymm1, %ymm1
; X64-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; X64-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
@@ -613,24 +613,24 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X32-LABEL: lshr_8i16:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: lshr_8i16:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%lshr = lshr <8 x i16> %r, %a
@@ -639,7 +639,7 @@ define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-LABEL: lshr_16i16:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
@@ -653,7 +653,7 @@ define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-NEXT: retl
;
; X64-LABEL: lshr_16i16:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
@@ -671,23 +671,23 @@ define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X32-LABEL: lshr_32i8:
-; X32: ## BB#0:
+; X32: # BB#0:
; X32-NEXT: vpsllw $5, %ymm1, %ymm1
; X32-NEXT: vpsrlw $4, %ymm0, %ymm2
-; X32-NEXT: vpand LCPI34_0, %ymm2, %ymm2
+; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X32-NEXT: vpsrlw $2, %ymm0, %ymm2
-; X32-NEXT: vpand LCPI34_1, %ymm2, %ymm2
+; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X32-NEXT: vpsrlw $1, %ymm0, %ymm2
-; X32-NEXT: vpand LCPI34_2, %ymm2, %ymm2
+; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: lshr_32i8:
-; X64: ## BB#0:
+; X64: # BB#0:
; X64-NEXT: vpsllw $5, %ymm1, %ymm1
; X64-NEXT: vpsrlw $4, %ymm0, %ymm2
; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
diff --git a/test/CodeGen/X86/avx2-vperm.ll b/test/CodeGen/X86/avx2-vperm.ll
index d0e18550f6a8..d57daafab243 100755
--- a/test/CodeGen/X86/avx2-vperm.ll
+++ b/test/CodeGen/X86/avx2-vperm.ll
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
define <8 x i32> @perm_cl_int_8x32(<8 x i32> %A) nounwind readnone {
; X32-LABEL: perm_cl_int_8x32:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [0,7,2,1,2,7,6,0]
; X32-NEXT: vpermd %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: perm_cl_int_8x32:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [0,7,2,1,2,7,6,0]
; X64-NEXT: vpermd %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
@@ -22,13 +22,13 @@ entry:
define <8 x float> @perm_cl_fp_8x32(<8 x float> %A) nounwind readnone {
; X32-LABEL: perm_cl_fp_8x32:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vmovaps {{.*#+}} ymm1 = <u,7,2,u,4,u,1,6>
; X32-NEXT: vpermps %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: perm_cl_fp_8x32:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vmovaps {{.*#+}} ymm1 = <u,7,2,u,4,u,1,6>
; X64-NEXT: vpermps %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
@@ -39,12 +39,12 @@ entry:
define <4 x i64> @perm_cl_int_4x64(<4 x i64> %A) nounwind readnone {
; X32-LABEL: perm_cl_int_4x64:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,1]
; X32-NEXT: retl
;
; X64-LABEL: perm_cl_int_4x64:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,1]
; X64-NEXT: retq
entry:
@@ -54,12 +54,12 @@ entry:
define <4 x double> @perm_cl_fp_4x64(<4 x double> %A) nounwind readnone {
; X32-LABEL: perm_cl_fp_4x64:
-; X32: ## BB#0: ## %entry
+; X32: # BB#0: # %entry
; X32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
; X32-NEXT: retl
;
; X64-LABEL: perm_cl_fp_4x64:
-; X64: ## BB#0: ## %entry
+; X64: # BB#0: # %entry
; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
; X64-NEXT: retq
entry:
diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll
index d96b5882556d..7c0f145bb717 100644
--- a/test/CodeGen/X86/avx512-arith.ll
+++ b/test/CodeGen/X86/avx512-arith.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=AVX512DQ %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq -mattr=+avx512bw -mattr=+avx512vl| FileCheck --check-prefix=CHECK --check-prefix=SKX %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
; CHECK-LABEL: addpd512:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
entry:
@@ -17,7 +17,7 @@ entry:
define <8 x double> @addpd512fold(<8 x double> %y) {
; CHECK-LABEL: addpd512fold:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
@@ -27,7 +27,7 @@ entry:
define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
; CHECK-LABEL: addps512:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
entry:
@@ -37,7 +37,7 @@ entry:
define <16 x float> @addps512fold(<16 x float> %y) {
; CHECK-LABEL: addps512fold:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
@@ -47,7 +47,7 @@ entry:
define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
; CHECK-LABEL: subpd512:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
entry:
@@ -57,7 +57,7 @@ entry:
define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
; CHECK-LABEL: subpd512fold:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
@@ -68,7 +68,7 @@ entry:
define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
; CHECK-LABEL: subps512:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
entry:
@@ -78,7 +78,7 @@ entry:
define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
; CHECK-LABEL: subps512fold:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
@@ -89,7 +89,7 @@ entry:
define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
; AVX512F-LABEL: imulq512:
-; AVX512F: ## BB#0:
+; AVX512F: # BB#0:
; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm2
; AVX512F-NEXT: vpmuludq %zmm0, %zmm2, %zmm2
; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3
@@ -101,7 +101,7 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: imulq512:
-; AVX512VL: ## BB#0:
+; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm2
; AVX512VL-NEXT: vpmuludq %zmm0, %zmm2, %zmm2
; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3
@@ -113,7 +113,7 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: imulq512:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm2
; AVX512BW-NEXT: vpmuludq %zmm0, %zmm2, %zmm2
; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3
@@ -125,12 +125,12 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: imulq512:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: imulq512:
-; SKX: ## BB#0:
+; SKX: # BB#0:
; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0
; SKX-NEXT: retq
%z = mul <8 x i64>%x, %y
@@ -139,7 +139,7 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
; AVX512F-LABEL: imulq256:
-; AVX512F: ## BB#0:
+; AVX512F: # BB#0:
; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm2
; AVX512F-NEXT: vpmuludq %ymm0, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm3
@@ -151,7 +151,7 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: imulq256:
-; AVX512VL: ## BB#0:
+; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm2
; AVX512VL-NEXT: vpmuludq %ymm0, %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm3
@@ -163,7 +163,7 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: imulq256:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm2
; AVX512BW-NEXT: vpmuludq %ymm0, %ymm2, %ymm2
; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3
@@ -175,15 +175,15 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: imulq256:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
-; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: imulq256:
-; SKX: ## BB#0:
+; SKX: # BB#0:
; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0
; SKX-NEXT: retq
%z = mul <4 x i64>%x, %y
@@ -192,7 +192,7 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
; AVX512F-LABEL: imulq128:
-; AVX512F: ## BB#0:
+; AVX512F: # BB#0:
; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm2
; AVX512F-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm3
@@ -204,7 +204,7 @@ define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: imulq128:
-; AVX512VL: ## BB#0:
+; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm2
; AVX512VL-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3
@@ -216,7 +216,7 @@ define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: imulq128:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm2
; AVX512BW-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm3
@@ -228,16 +228,16 @@ define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: imulq128:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: imulq128:
-; SKX: ## BB#0:
+; SKX: # BB#0:
; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0
; SKX-NEXT: retq
%z = mul <2 x i64>%x, %y
@@ -246,7 +246,7 @@ define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
; CHECK-LABEL: mulpd512:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
entry:
@@ -256,7 +256,7 @@ entry:
define <8 x double> @mulpd512fold(<8 x double> %y) {
; CHECK-LABEL: mulpd512fold:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
@@ -266,7 +266,7 @@ entry:
define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
; CHECK-LABEL: mulps512:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
entry:
@@ -276,7 +276,7 @@ entry:
define <16 x float> @mulps512fold(<16 x float> %y) {
; CHECK-LABEL: mulps512fold:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
@@ -286,7 +286,7 @@ entry:
define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
; CHECK-LABEL: divpd512:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
entry:
@@ -296,7 +296,7 @@ entry:
define <8 x double> @divpd512fold(<8 x double> %y) {
; CHECK-LABEL: divpd512fold:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
@@ -306,7 +306,7 @@ entry:
define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
; CHECK-LABEL: divps512:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
entry:
@@ -316,7 +316,7 @@ entry:
define <16 x float> @divps512fold(<16 x float> %y) {
; CHECK-LABEL: divps512fold:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
@@ -326,7 +326,7 @@ entry:
define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
; CHECK-LABEL: vpaddq_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%x = add <8 x i64> %i, %j
@@ -335,7 +335,7 @@ define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
; CHECK-LABEL: vpaddq_fold_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
; CHECK-NEXT: retq
%tmp = load <8 x i64>, <8 x i64>* %j, align 4
@@ -345,7 +345,7 @@ define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
; CHECK-LABEL: vpaddq_broadcast_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; CHECK-NEXT: retq
%x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
@@ -354,7 +354,7 @@ define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
; CHECK-LABEL: vpaddq_broadcast2_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
; CHECK-NEXT: retq
%tmp = load i64, i64* %j
@@ -372,7 +372,7 @@ define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
; CHECK-LABEL: vpaddd_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%x = add <16 x i32> %i, %j
@@ -381,7 +381,7 @@ define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
; CHECK-LABEL: vpaddd_fold_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
; CHECK-NEXT: retq
%tmp = load <16 x i32>, <16 x i32>* %j, align 4
@@ -391,7 +391,7 @@ define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
; CHECK-LABEL: vpaddd_broadcast_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: retq
%x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -400,7 +400,7 @@ define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
; CHECK-LABEL: vpaddd_mask_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
@@ -413,7 +413,7 @@ define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %ma
define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
; CHECK-LABEL: vpaddd_maskz_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
@@ -426,7 +426,7 @@ define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %m
define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
; CHECK-LABEL: vpaddd_mask_fold_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
@@ -440,7 +440,7 @@ define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16
define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
; CHECK-LABEL: vpaddd_mask_broadcast_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
@@ -453,7 +453,7 @@ define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1)
define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
; CHECK-LABEL: vpaddd_maskz_fold_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
@@ -467,7 +467,7 @@ define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16
define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
; CHECK-LABEL: vpaddd_maskz_broadcast_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
@@ -480,7 +480,7 @@ define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1)
define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
; CHECK-LABEL: vpsubq_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%x = sub <8 x i64> %i, %j
@@ -489,7 +489,7 @@ define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
; CHECK-LABEL: vpsubd_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%x = sub <16 x i32> %i, %j
@@ -498,7 +498,7 @@ define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
; CHECK-LABEL: vpmulld_test:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%x = mul <16 x i32> %i, %j
@@ -508,7 +508,7 @@ define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
declare float @sqrtf(float) readnone
define float @sqrtA(float %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: sqrtA:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
entry:
@@ -519,7 +519,7 @@ entry:
declare double @sqrt(double) readnone
define double @sqrtB(double %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: sqrtB:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
entry:
@@ -530,7 +530,7 @@ entry:
declare float @llvm.sqrt.f32(float)
define float @sqrtC(float %a) nounwind {
; CHECK-LABEL: sqrtC:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%b = call float @llvm.sqrt.f32(float %a)
@@ -540,7 +540,7 @@ define float @sqrtC(float %a) nounwind {
declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
define <16 x float> @sqrtD(<16 x float> %a) nounwind {
; CHECK-LABEL: sqrtD:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vsqrtps %zmm0, %zmm0
; CHECK-NEXT: retq
%b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
@@ -550,7 +550,7 @@ define <16 x float> @sqrtD(<16 x float> %a) nounwind {
declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
define <8 x double> @sqrtE(<8 x double> %a) nounwind {
; CHECK-LABEL: sqrtE:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
; CHECK-NEXT: retq
%b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
@@ -559,7 +559,7 @@ define <8 x double> @sqrtE(<8 x double> %a) nounwind {
define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
; CHECK-LABEL: fadd_broadcast:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: retq
%b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
@@ -568,7 +568,7 @@ define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
; CHECK-LABEL: addq_broadcast:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; CHECK-NEXT: retq
%b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
@@ -577,27 +577,27 @@ define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
; AVX512F-LABEL: orq_broadcast:
-; AVX512F: ## BB#0:
+; AVX512F: # BB#0:
; AVX512F-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: orq_broadcast:
-; AVX512VL: ## BB#0:
+; AVX512VL: # BB#0:
; AVX512VL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: orq_broadcast:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: orq_broadcast:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: orq_broadcast:
-; SKX: ## BB#0:
+; SKX: # BB#0:
; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; SKX-NEXT: retq
%b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
@@ -606,27 +606,27 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
; AVX512F-LABEL: andd512fold:
-; AVX512F: ## BB#0: ## %entry
+; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: andd512fold:
-; AVX512VL: ## BB#0: ## %entry
+; AVX512VL: # BB#0: # %entry
; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: andd512fold:
-; AVX512BW: ## BB#0: ## %entry
+; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: andd512fold:
-; AVX512DQ: ## BB#0: ## %entry
+; AVX512DQ: # BB#0: # %entry
; AVX512DQ-NEXT: vandps (%rdi), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: andd512fold:
-; SKX: ## BB#0: ## %entry
+; SKX: # BB#0: # %entry
; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0
; SKX-NEXT: retq
entry:
@@ -637,27 +637,27 @@ entry:
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
; AVX512F-LABEL: andqbrst:
-; AVX512F: ## BB#0: ## %entry
+; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: andqbrst:
-; AVX512VL: ## BB#0: ## %entry
+; AVX512VL: # BB#0: # %entry
; AVX512VL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: andqbrst:
-; AVX512BW: ## BB#0: ## %entry
+; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: andqbrst:
-; AVX512DQ: ## BB#0: ## %entry
+; AVX512DQ: # BB#0: # %entry
; AVX512DQ-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: andqbrst:
-; SKX: ## BB#0: ## %entry
+; SKX: # BB#0: # %entry
; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
; SKX-NEXT: retq
entry:
@@ -670,7 +670,7 @@ entry:
define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
; CHECK-LABEL: test_mask_vaddps:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1}
@@ -685,7 +685,7 @@ define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
; CHECK-LABEL: test_mask_vmulps:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1}
@@ -700,7 +700,7 @@ define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
; CHECK-LABEL: test_mask_vminps:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
@@ -716,38 +716,38 @@ define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
; AVX512F-LABEL: test_mask_vminpd:
-; AVX512F: ## BB#0:
-; AVX512F-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
+; AVX512F: # BB#0:
+; AVX512F-NEXT: # kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_mask_vminpd:
-; AVX512VL: ## BB#0:
+; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: test_mask_vminpd:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: # kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_mask_vminpd:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: test_mask_vminpd:
-; SKX: ## BB#0:
+; SKX: # BB#0:
; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
@@ -763,7 +763,7 @@ define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
; CHECK-LABEL: test_mask_vmaxps:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
@@ -779,38 +779,38 @@ define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
; AVX512F-LABEL: test_mask_vmaxpd:
-; AVX512F: ## BB#0:
-; AVX512F-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
+; AVX512F: # BB#0:
+; AVX512F-NEXT: # kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_mask_vmaxpd:
-; AVX512VL: ## BB#0:
+; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: test_mask_vmaxpd:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: # kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_mask_vmaxpd:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: test_mask_vmaxpd:
-; SKX: ## BB#0:
+; SKX: # BB#0:
; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
@@ -826,7 +826,7 @@ define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
; CHECK-LABEL: test_mask_vsubps:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1}
@@ -841,7 +841,7 @@ define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
; CHECK-LABEL: test_mask_vdivps:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1}
@@ -856,7 +856,7 @@ define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
; CHECK-LABEL: test_mask_vaddpd:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1
; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1}
@@ -871,7 +871,7 @@ define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
; CHECK-LABEL: test_maskz_vaddpd:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
@@ -885,7 +885,7 @@ define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
; CHECK-LABEL: test_mask_fold_vaddpd:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1}
@@ -901,7 +901,7 @@ define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
; CHECK-LABEL: test_maskz_fold_vaddpd:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
@@ -916,7 +916,7 @@ define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
; CHECK-LABEL: test_broadcast_vaddpd:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0
; CHECK-NEXT: retq
%tmp = load double, double* %j
@@ -929,7 +929,7 @@ define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind
define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
; CHECK-LABEL: test_mask_broadcast_vaddpd:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1
; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
@@ -948,7 +948,7 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double>
define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
; CHECK-LABEL: test_maskz_broadcast_vaddpd:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
@@ -966,27 +966,27 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
define <16 x float> @test_fxor(<16 x float> %a) {
; AVX512F-LABEL: test_fxor:
-; AVX512F: ## BB#0:
+; AVX512F: # BB#0:
; AVX512F-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_fxor:
-; AVX512VL: ## BB#0:
+; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: test_fxor:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_fxor:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: test_fxor:
-; SKX: ## BB#0:
+; SKX: # BB#0:
; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; SKX-NEXT: retq
@@ -996,30 +996,30 @@ define <16 x float> @test_fxor(<16 x float> %a) {
define <8 x float> @test_fxor_8f32(<8 x float> %a) {
; AVX512F-LABEL: test_fxor_8f32:
-; AVX512F: ## BB#0:
+; AVX512F: # BB#0:
; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
; AVX512F-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_fxor_8f32:
-; AVX512VL: ## BB#0:
+; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: test_fxor_8f32:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
; AVX512BW-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_fxor_8f32:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
; AVX512DQ-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: test_fxor_8f32:
-; SKX: ## BB#0:
+; SKX: # BB#0:
; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
; SKX-NEXT: retq
%res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
@@ -1028,27 +1028,27 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) {
define <8 x double> @fabs_v8f64(<8 x double> %p)
; AVX512F-LABEL: fabs_v8f64:
-; AVX512F: ## BB#0:
+; AVX512F: # BB#0:
; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fabs_v8f64:
-; AVX512VL: ## BB#0:
+; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: fabs_v8f64:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: fabs_v8f64:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: fabs_v8f64:
-; SKX: ## BB#0:
+; SKX: # BB#0:
; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; SKX-NEXT: retq
{
@@ -1059,27 +1059,27 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
define <16 x float> @fabs_v16f32(<16 x float> %p)
; AVX512F-LABEL: fabs_v16f32:
-; AVX512F: ## BB#0:
+; AVX512F: # BB#0:
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fabs_v16f32:
-; AVX512VL: ## BB#0:
+; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: fabs_v16f32:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: fabs_v16f32:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: fabs_v16f32:
-; SKX: ## BB#0:
+; SKX: # BB#0:
; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; SKX-NEXT: retq
{
diff --git a/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
index 300cb51f871c..edcc3933bc39 100644
--- a/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
+++ b/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
@@ -1004,8 +1004,6 @@ define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_pcmpeq_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1018,8 +1016,6 @@ define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1058,8 +1054,6 @@ define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_pcmpgt_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1072,8 +1066,6 @@ define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1087,8 +1079,6 @@ define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_pcmpeq_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1101,8 +1091,6 @@ define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1116,10 +1104,6 @@ define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_pcmpeq_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
-; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1132,10 +1116,6 @@ define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
-; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1149,8 +1129,6 @@ define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_pcmpgt_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1163,8 +1141,6 @@ define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1178,10 +1154,6 @@ define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_pcmpgt_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
-; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1194,10 +1166,6 @@ define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
-; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -5164,23 +5132,11 @@ define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_cmp_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x37,0xc8]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x02]
-; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
-; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k4 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xe0,0x02]
-; CHECK-NEXT: kshiftlw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0c]
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xe9]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
@@ -5219,43 +5175,31 @@ define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_cmp_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
-; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x37,0xd0]
-; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
-; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd9,0x02]
-; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x04]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k6 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x1f,0xf0,0x02]
+; CHECK-NEXT: kmovw %edi, %k6 ## encoding: [0xc5,0xf8,0x92,0xf7]
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k6} ## encoding: [0x62,0xf2,0xfd,0x2e,0x29,0xc1]
+; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k7 {%k6} ## encoding: [0x62,0xf2,0xf5,0x2e,0x37,0xf8]
+; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k1 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1f,0xc9,0x02]
+; CHECK-NEXT: kxorw %k0, %k0, %k2 ## encoding: [0xc5,0xfc,0x47,0xd0]
+; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1f,0xd9,0x04]
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k4 {%k6} ## encoding: [0x62,0xf3,0xf5,0x2e,0x1f,0xe0,0x02]
+; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k5 {%k6} ## encoding: [0x62,0xf2,0xfd,0x2e,0x37,0xe9]
; CHECK-NEXT: kshiftlw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0c]
; CHECK-NEXT: kshiftrw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0c]
-; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xf9]
-; CHECK-NEXT: kshiftlw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0c]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
-; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
-; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
+; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
@@ -5283,23 +5227,11 @@ define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_ucmp_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x01]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x02]
-; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
-; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x05]
-; CHECK-NEXT: kshiftlw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0c]
; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x06]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
@@ -5338,43 +5270,31 @@ define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_ucmp_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
-; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd1,0x01]
-; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
-; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x02]
-; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x04]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
-; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x05]
+; CHECK-NEXT: kmovw %edi, %k6 ## encoding: [0xc5,0xf8,0x92,0xf7]
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k6} ## encoding: [0x62,0xf2,0xfd,0x2e,0x29,0xc1]
+; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k7 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1e,0xf9,0x01]
+; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k1 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1e,0xc9,0x02]
+; CHECK-NEXT: kxorw %k0, %k0, %k2 ## encoding: [0xc5,0xfc,0x47,0xd0]
+; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1f,0xd9,0x04]
+; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k4 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1e,0xe1,0x05]
+; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k5 {%k6} ## encoding: [0x62,0xf3,0xfd,0x2e,0x1e,0xe9,0x06]
; CHECK-NEXT: kshiftlw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0c]
; CHECK-NEXT: kshiftrw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0c]
-; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf9,0x06]
-; CHECK-NEXT: kshiftlw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0c]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
-; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
-; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
+; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
@@ -5402,23 +5322,11 @@ define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_cmp_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x08,0x66,0xc8]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x02]
-; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
-; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k4 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xe0,0x02]
-; CHECK-NEXT: kshiftlw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0c]
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xe9]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
@@ -5457,43 +5365,31 @@ define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_cmp_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
-; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x66,0xd0]
-; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
-; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd9,0x02]
-; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x04]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k6 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x1f,0xf0,0x02]
+; CHECK-NEXT: kmovw %edi, %k6 ## encoding: [0xc5,0xf8,0x92,0xf7]
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k6} ## encoding: [0x62,0xf1,0x7d,0x0e,0x76,0xc1]
+; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k7 {%k6} ## encoding: [0x62,0xf1,0x75,0x0e,0x66,0xf8]
+; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k1 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1f,0xc9,0x02]
+; CHECK-NEXT: kxorw %k0, %k0, %k2 ## encoding: [0xc5,0xfc,0x47,0xd0]
+; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1f,0xd9,0x04]
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k4 {%k6} ## encoding: [0x62,0xf3,0x75,0x0e,0x1f,0xe0,0x02]
+; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k5 {%k6} ## encoding: [0x62,0xf1,0x7d,0x0e,0x66,0xe9]
; CHECK-NEXT: kshiftlw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0c]
; CHECK-NEXT: kshiftrw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0c]
-; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xf9]
-; CHECK-NEXT: kshiftlw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0c]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
-; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
-; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
+; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
@@ -5521,23 +5417,11 @@ define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_ucmp_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x01]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x02]
-; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
-; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x05]
-; CHECK-NEXT: kshiftlw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0c]
; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x06]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
@@ -5576,43 +5460,31 @@ define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_ucmp_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
-; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd1,0x01]
-; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
-; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x02]
-; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x04]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
-; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x05]
+; CHECK-NEXT: kmovw %edi, %k6 ## encoding: [0xc5,0xf8,0x92,0xf7]
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k6} ## encoding: [0x62,0xf1,0x7d,0x0e,0x76,0xc1]
+; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k7 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1e,0xf9,0x01]
+; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k1 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1e,0xc9,0x02]
+; CHECK-NEXT: kxorw %k0, %k0, %k2 ## encoding: [0xc5,0xfc,0x47,0xd0]
+; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1f,0xd9,0x04]
+; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k4 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1e,0xe1,0x05]
+; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k5 {%k6} ## encoding: [0x62,0xf3,0x7d,0x0e,0x1e,0xe9,0x06]
; CHECK-NEXT: kshiftlw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0c]
; CHECK-NEXT: kshiftrw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0c]
-; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf9,0x06]
-; CHECK-NEXT: kshiftlw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0c]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
-; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
-; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
+; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
@@ -5640,35 +5512,11 @@ define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_cmp_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
-; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x37,0xc8]
-; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x02]
-; CHECK-NEXT: kshiftlw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
-; CHECK-NEXT: kshiftlw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k4 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xe0,0x02]
-; CHECK-NEXT: kshiftlw $14, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0c]
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xe9]
-; CHECK-NEXT: kshiftlw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
@@ -5707,57 +5555,33 @@ define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_cmp_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
-; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
-; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x37,0xd0]
-; CHECK-NEXT: kshiftlw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
-; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd9,0x02]
-; CHECK-NEXT: kshiftlw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04]
-; CHECK-NEXT: kshiftlw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k6 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x1f,0xf0,0x02]
-; CHECK-NEXT: kshiftlw $14, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0c]
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xf9]
+; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff]
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf2,0xfd,0x0f,0x29,0xc1]
+; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k6 {%k7} ## encoding: [0x62,0xf2,0xf5,0x0f,0x37,0xf0]
+; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k1 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xc9,0x02]
+; CHECK-NEXT: kxorw %k0, %k0, %k2 ## encoding: [0xc5,0xfc,0x47,0xd0]
+; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xd9,0x04]
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k4 {%k7} ## encoding: [0x62,0xf3,0xf5,0x0f,0x1f,0xe0,0x02]
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf2,0xfd,0x0f,0x37,0xe9]
; CHECK-NEXT: kshiftlw $14, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0e]
; CHECK-NEXT: kshiftrw $14, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0e]
; CHECK-NEXT: kshiftlw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0c]
; CHECK-NEXT: kshiftrw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0c]
-; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
-; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
-; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
+; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
@@ -5785,35 +5609,11 @@ define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_ucmp_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
-; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x01]
-; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x02]
-; CHECK-NEXT: kshiftlw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
-; CHECK-NEXT: kshiftlw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x05]
-; CHECK-NEXT: kshiftlw $14, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x32,0xe4,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k4, %k4 ## encoding: [0xc4,0xe3,0xf9,0x30,0xe4,0x0c]
; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x06]
-; CHECK-NEXT: kshiftlw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
@@ -5852,57 +5652,33 @@ define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_ucmp_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
-; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x01]
-; CHECK-NEXT: kshiftlw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
-; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x02]
-; CHECK-NEXT: kshiftlw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x32,0xdb,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k3, %k3 ## encoding: [0xc4,0xe3,0xf9,0x30,0xdb,0x0c]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04]
-; CHECK-NEXT: kshiftlw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c]
-; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x05]
-; CHECK-NEXT: kshiftlw $14, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x32,0xf6,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k6, %k6 ## encoding: [0xc4,0xe3,0xf9,0x30,0xf6,0x0c]
-; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf9,0x06]
+; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff]
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf2,0xfd,0x0f,0x29,0xc1]
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xf1,0x01]
+; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k1 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xc9,0x02]
+; CHECK-NEXT: kxorw %k0, %k0, %k2 ## encoding: [0xc5,0xfc,0x47,0xd0]
+; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xd9,0x04]
+; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k4 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xe1,0x05]
+; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xe9,0x06]
; CHECK-NEXT: kshiftlw $14, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0e]
; CHECK-NEXT: kshiftrw $14, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0e]
; CHECK-NEXT: kshiftlw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x32,0xff,0x0c]
; CHECK-NEXT: kshiftrw $12, %k7, %k7 ## encoding: [0xc4,0xe3,0xf9,0x30,0xff,0x0c]
-; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
-; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
-; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
+; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
diff --git a/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
new file mode 100644
index 000000000000..f297fc3db95f
--- /dev/null
+++ b/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
@@ -0,0 +1,13485 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -check-prefix=NoVLX
+
+define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp eq <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp eq <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp eq <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp eq <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp eq <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp eq <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp eq <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp eq <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %1 = bitcast <4 x i64> %__b to <32 x i8>
+ %2 = icmp eq <32 x i8> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqb (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <32 x i8>
+ %2 = icmp eq <32 x i8> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %1 = bitcast <4 x i64> %__b to <32 x i8>
+ %2 = icmp eq <32 x i8> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <32 x i8>
+ %2 = icmp eq <32 x i8> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp eq <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp eq <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp eq <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp eq <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp eq <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp eq <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp eq <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp eq <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp eq <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp eq <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp eq <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp eq <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp eq <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp eq <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp eq <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp eq <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp eq <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp eq <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp eq <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp eq <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %1 = bitcast <8 x i64> %__b to <32 x i16>
+ %2 = icmp eq <32 x i16> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqw (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <32 x i16>
+ %2 = icmp eq <32 x i16> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %1 = bitcast <8 x i64> %__b to <32 x i16>
+ %2 = icmp eq <32 x i16> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <32 x i16>
+ %2 = icmp eq <32 x i16> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp eq <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp eq <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp eq <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp eq <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %3, %2
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp eq <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp eq <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp eq <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp eq <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %3, %2
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = bitcast <4 x i1> %5 to i4
+ ret i4 %6
+}
+
+define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = bitcast <4 x i1> %5 to i4
+ ret i4 %6
+}
+
+
+define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = bitcast <4 x i1> %5 to i4
+ ret i4 %6
+}
+
+
+define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp eq <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp eq <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp sgt <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp sgt <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp sgt <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp sgt <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp sgt <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp sgt <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp sgt <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp sgt <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %1 = bitcast <4 x i64> %__b to <32 x i8>
+ %2 = icmp sgt <32 x i8> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtb (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <32 x i8>
+ %2 = icmp sgt <32 x i8> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %1 = bitcast <4 x i64> %__b to <32 x i8>
+ %2 = icmp sgt <32 x i8> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <32 x i8>
+ %2 = icmp sgt <32 x i8> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp sgt <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp sgt <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp sgt <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp sgt <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp sgt <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp sgt <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp sgt <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp sgt <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp sgt <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp sgt <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp sgt <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp sgt <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp sgt <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp sgt <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp sgt <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp sgt <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp sgt <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp sgt <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp sgt <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp sgt <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %1 = bitcast <8 x i64> %__b to <32 x i16>
+ %2 = icmp sgt <32 x i16> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtw (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <32 x i16>
+ %2 = icmp sgt <32 x i16> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %1 = bitcast <8 x i64> %__b to <32 x i16>
+ %2 = icmp sgt <32 x i16> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <32 x i16>
+ %2 = icmp sgt <32 x i16> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp sgt <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp sgt <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp sgt <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp sgt <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %3, %2
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp sgt <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp sgt <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp sgt <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp sgt <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %3, %2
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = bitcast <4 x i1> %5 to i4
+ ret i4 %6
+}
+
+define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = bitcast <4 x i1> %5 to i4
+ ret i4 %6
+}
+
+
+define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = bitcast <4 x i1> %5 to i4
+ ret i4 %6
+}
+
+
+define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sgt <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sgt <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp sge <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp sge <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp sge <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp sge <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp sge <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp sge <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp sge <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp sge <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %1 = bitcast <4 x i64> %__b to <32 x i8>
+ %2 = icmp sge <32 x i8> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltb (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <32 x i8>
+ %2 = icmp sge <32 x i8> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %1 = bitcast <4 x i64> %__b to <32 x i8>
+ %2 = icmp sge <32 x i8> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <32 x i8>
+ %2 = icmp sge <32 x i8> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp sge <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp sge <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp sge <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp sge <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp sge <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp sge <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp sge <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp sge <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp sge <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp sge <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp sge <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp sge <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp sge <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp sge <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp sge <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp sge <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp sge <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp sge <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp sge <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp sge <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmplew %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %1 = bitcast <8 x i64> %__b to <32 x i16>
+ %2 = icmp sge <32 x i16> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltw (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <32 x i16>
+ %2 = icmp sge <32 x i16> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmplew %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %1 = bitcast <8 x i64> %__b to <32 x i16>
+ %2 = icmp sge <32 x i16> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <32 x i16>
+ %2 = icmp sge <32 x i16> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rdi), %xmm1
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rsi), %xmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rdi), %xmm1
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rsi), %xmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rdi), %xmm1
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rsi), %xmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rdi), %xmm1
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rsi), %xmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rdi), %ymm1
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rsi), %ymm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rdi), %ymm1
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rsi), %ymm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rdi), %ymm1
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rsi), %ymm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rdi), %zmm1
+; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rsi), %zmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %3, %2
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rdi), %zmm1
+; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd (%rsi), %zmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %3, %2
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = bitcast <4 x i1> %5 to i4
+ ret i4 %6
+}
+
+define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = bitcast <4 x i1> %5 to i4
+ ret i4 %6
+}
+
+
+define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = bitcast <4 x i1> %5 to i4
+ ret i4 %6
+}
+
+
+define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp sge <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %ymm1
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rsi), %ymm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %ymm1
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rsi), %ymm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %ymm1
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rsi), %ymm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %ymm1
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rsi), %ymm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm1
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rsi), %zmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm1
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rsi), %zmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm1
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rsi), %zmm1
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp ult <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltub (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp ult <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp ult <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp ult <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp ult <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltub (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp ult <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp ult <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp ult <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %1 = bitcast <4 x i64> %__b to <32 x i8>
+ %2 = icmp ult <32 x i8> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltub (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <32 x i8>
+ %2 = icmp ult <32 x i8> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %1 = bitcast <4 x i64> %__b to <32 x i8>
+ %2 = icmp ult <32 x i8> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <32 x i8>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <32 x i8>
+ %2 = icmp ult <32 x i8> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp ult <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp ult <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp ult <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp ult <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp ult <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp ult <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp ult <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp ult <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp ult <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp ult <8 x i16> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %1 = bitcast <2 x i64> %__b to <8 x i16>
+ %2 = icmp ult <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <8 x i16>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <8 x i16>
+ %2 = icmp ult <8 x i16> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp ult <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp ult <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp ult <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp ult <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp ult <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp ult <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
+ %2 = icmp ult <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp ult <16 x i16> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %1 = bitcast <8 x i64> %__b to <32 x i16>
+ %2 = icmp ult <32 x i16> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuw (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <32 x i16>
+ %2 = icmp ult <32 x i16> %0, %1
+ %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %1 = bitcast <8 x i64> %__b to <32 x i16>
+ %2 = icmp ult <32 x i16> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <32 x i16>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <32 x i16>
+ %2 = icmp ult <32 x i16> %0, %1
+ %3 = bitcast i32 %__u to <32 x i1>
+ %4 = and <32 x i1> %2, %3
+ %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %1 = bitcast <2 x i64> %__b to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x i32>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <4 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %1 = bitcast <4 x i64> %__b to <8 x i32>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x i32>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <8 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <8 x i32> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp ult <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp ult <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp ult <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp ult <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %3, %2
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp ult <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp ult <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %1 = bitcast <8 x i64> %__b to <16 x i32>
+ %2 = icmp ult <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x i32>
+ %2 = icmp ult <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <16 x i32> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %3, %2
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = bitcast <4 x i1> %5 to i4
+ ret i4 %6
+}
+
+define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = bitcast <4 x i1> %5 to i4
+ ret i4 %6
+}
+
+
+define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = bitcast <4 x i1> %5 to i4
+ ret i4 %6
+}
+
+
+define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %1 = bitcast <2 x i64> %__b to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x i64>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %2, %extract.i
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <2 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = icmp ult <2 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %4 = and <2 x i1> %extract.i, %2
+ %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <8 x i1> %5 to i8
+ ret i8 %6
+}
+
+
+define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %1 = bitcast <4 x i64> %__b to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x i64>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %2, %extract.i
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <4 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <4 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = and <4 x i1> %extract.i, %2
+ %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <16 x i1> %5 to i16
+ ret i16 %6
+}
+
+
+define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %1 = bitcast <8 x i64> %__b to <8 x i64>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x i64>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %2, %3
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x i64>
+ %load = load i64, i64* %__b
+ %vec = insertelement <8 x i64> undef, i64 %load, i32 0
+ %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp ult <8 x i64> %0, %1
+ %3 = bitcast i8 %__u to <8 x i1>
+ %4 = and <8 x i1> %3, %2
+ %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
+}
+
+
+declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
+define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x float>
+ %1 = bitcast <2 x i64> %__b to <4 x float>
+ %2 = fcmp oeq <4 x float> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x float>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x float>
+ %2 = fcmp oeq <4 x float> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x float>
+ %load = load float, float* %__b
+ %vec = insertelement <4 x float> undef, float %load, i32 0
+ %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <4 x float> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+
+define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x float>
+ %1 = bitcast <2 x i64> %__b to <4 x float>
+ %2 = fcmp oeq <4 x float> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x float>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x float>
+ %2 = fcmp oeq <4 x float> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x float>
+ %load = load float, float* %__b
+ %vec = insertelement <4 x float> undef, float %load, i32 0
+ %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <4 x float> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+
+define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x float>
+ %1 = bitcast <2 x i64> %__b to <4 x float>
+ %2 = fcmp oeq <4 x float> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x float>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x float>
+ %2 = fcmp oeq <4 x float> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x float>
+ %load = load float, float* %__b
+ %vec = insertelement <4 x float> undef, float %load, i32 0
+ %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <4 x float> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+
+define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x float>
+ %1 = bitcast <2 x i64> %__b to <4 x float>
+ %2 = fcmp oeq <4 x float> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x float>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <4 x float>
+ %2 = fcmp oeq <4 x float> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <4 x float>
+ %load = load float, float* %__b
+ %vec = insertelement <4 x float> undef, float %load, i32 0
+ %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <4 x float> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+
+define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x float>
+ %1 = bitcast <4 x i64> %__b to <8 x float>
+ %2 = fcmp oeq <8 x float> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovaps (%rdi), %ymm1
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x float>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x float>
+ %2 = fcmp oeq <8 x float> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
+; NoVLX: ## BB#0: ## %entry
+; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x float>
+ %load = load float, float* %__b
+ %vec = insertelement <8 x float> undef, float %load, i32 0
+ %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <8 x float> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+
+define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x float>
+ %1 = bitcast <4 x i64> %__b to <8 x float>
+ %2 = fcmp oeq <8 x float> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x float>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x float>
+ %2 = fcmp oeq <8 x float> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x float>
+ %load = load float, float* %__b
+ %vec = insertelement <8 x float> undef, float %load, i32 0
+ %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <8 x float> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+
+define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x float>
+ %1 = bitcast <4 x i64> %__b to <8 x float>
+ %2 = fcmp oeq <8 x float> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x float>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <8 x float>
+ %2 = fcmp oeq <8 x float> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <8 x float>
+ %load = load float, float* %__b
+ %vec = insertelement <8 x float> undef, float %load, i32 0
+ %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <8 x float> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+
+define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x float>
+ %1 = bitcast <8 x i64> %__b to <16 x float>
+ %2 = fcmp oeq <16 x float> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x float>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x float>
+ %2 = fcmp oeq <16 x float> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x float>
+ %load = load float, float* %__b
+ %vec = insertelement <16 x float> undef, float %load, i32 0
+ %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <16 x float> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+
+define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x float>
+ %1 = bitcast <8 x i64> %__b to <16 x float>
+ %2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 -1, i32 8)
+ %3 = zext i16 %2 to i32
+ ret i32 %3
+}
+
+
+define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x float>
+ %1 = bitcast <8 x i64> %__b to <16 x float>
+ %2 = fcmp oeq <16 x float> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x float>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <16 x float>
+ %2 = fcmp oeq <16 x float> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x float>
+ %load = load float, float* %__b
+ %vec = insertelement <16 x float> undef, float %load, i32 0
+ %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <16 x float> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+
+define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: movzwl %ax, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <16 x float>
+ %1 = bitcast <8 x i64> %__b to <16 x float>
+ %2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 -1, i32 8)
+ %3 = zext i16 %2 to i64
+ ret i64 %3
+}
+
+
+declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
+define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %1 = bitcast <2 x i64> %__b to <2 x double>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x double>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %load = load double, double* %__b
+ %vec = insertelement <2 x double> undef, double %load, i32 0
+ %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i1> %3 to i4
+ ret i4 %4
+}
+
+
+define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %1 = bitcast <2 x i64> %__b to <2 x double>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x double>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %load = load double, double* %__b
+ %vec = insertelement <2 x double> undef, double %load, i32 0
+ %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+
+define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %1 = bitcast <2 x i64> %__b to <2 x double>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x double>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %load = load double, double* %__b
+ %vec = insertelement <2 x double> undef, double %load, i32 0
+ %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+
+define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %1 = bitcast <2 x i64> %__b to <2 x double>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x double>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %load = load double, double* %__b
+ %vec = insertelement <2 x double> undef, double %load, i32 0
+ %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+
+define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %1 = bitcast <2 x i64> %__b to <2 x double>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <2 x double>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <2 x double>
+ %load = load double, double* %__b
+ %vec = insertelement <2 x double> undef, double %load, i32 0
+ %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+ %2 = fcmp oeq <2 x double> %0, %1
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+
+define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x double>
+ %1 = bitcast <4 x i64> %__b to <4 x double>
+ %2 = fcmp oeq <4 x double> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x double>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x double>
+ %2 = fcmp oeq <4 x double> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x double>
+ %load = load double, double* %__b
+ %vec = insertelement <4 x double> undef, double %load, i32 0
+ %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <4 x double> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+
+define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x double>
+ %1 = bitcast <4 x i64> %__b to <4 x double>
+ %2 = fcmp oeq <4 x double> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x double>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x double>
+ %2 = fcmp oeq <4 x double> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x double>
+ %load = load double, double* %__b
+ %vec = insertelement <4 x double> undef, double %load, i32 0
+ %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <4 x double> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+
+define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x double>
+ %1 = bitcast <4 x i64> %__b to <4 x double>
+ %2 = fcmp oeq <4 x double> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x double>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x double>
+ %2 = fcmp oeq <4 x double> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x double>
+ %load = load double, double* %__b
+ %vec = insertelement <4 x double> undef, double %load, i32 0
+ %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <4 x double> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+
+define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x double>
+ %1 = bitcast <4 x i64> %__b to <4 x double>
+ %2 = fcmp oeq <4 x double> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x double>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <4 x double>
+ %2 = fcmp oeq <4 x double> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <4 x double>
+ %load = load double, double* %__b
+ %vec = insertelement <4 x double> undef, double %load, i32 0
+ %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <4 x double> %0, %1
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+
+define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x double>
+ %1 = bitcast <8 x i64> %__b to <8 x double>
+ %2 = fcmp oeq <8 x double> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x double>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x double>
+ %2 = fcmp oeq <8 x double> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x double>
+ %load = load double, double* %__b
+ %vec = insertelement <8 x double> undef, double %load, i32 0
+ %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <8 x double> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+
+define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x double>
+ %1 = bitcast <8 x i64> %__b to <8 x double>
+ %2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8)
+ %3 = zext i8 %2 to i16
+ ret i16 %3
+}
+
+
+define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x double>
+ %1 = bitcast <8 x i64> %__b to <8 x double>
+ %2 = fcmp oeq <8 x double> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x double>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x double>
+ %2 = fcmp oeq <8 x double> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x double>
+ %load = load double, double* %__b
+ %vec = insertelement <8 x double> undef, double %load, i32 0
+ %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <8 x double> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+
+define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x double>
+ %1 = bitcast <8 x i64> %__b to <8 x double>
+ %2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8)
+ %3 = zext i8 %2 to i32
+ ret i32 %3
+}
+
+
+define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x double>
+ %1 = bitcast <8 x i64> %__b to <8 x double>
+ %2 = fcmp oeq <8 x double> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x double>
+ %load = load <8 x i64>, <8 x i64>* %__b
+ %1 = bitcast <8 x i64> %load to <8 x double>
+ %2 = fcmp oeq <8 x double> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x double>
+ %load = load double, double* %__b
+ %vec = insertelement <8 x double> undef, double %load, i32 0
+ %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = fcmp oeq <8 x double> %0, %1
+ %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+
+define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; CHECK-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__a to <8 x double>
+ %1 = bitcast <8 x i64> %__b to <8 x double>
+ %2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8)
+ %3 = zext i8 %2 to i64
+ ret i64 %3
+}
+
+
+
diff --git a/test/CodeGen/X86/bswap-vector.ll b/test/CodeGen/X86/bswap-vector.ll
index fd5983df8325..7463f5f6d086 100644
--- a/test/CodeGen/X86/bswap-vector.ll
+++ b/test/CodeGen/X86/bswap-vector.ll
@@ -1,11 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-SSE --check-prefix=CHECK-NOSSSE3
-; RUN: llc < %s -mcpu=core2 | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-SSE --check-prefix=CHECK-SSSE3
-; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-AVX --check-prefix=CHECK-AVX2
-; RUN: llc < %s -mcpu=core-avx2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-WIDE-AVX --check-prefix=CHECK-WIDE-AVX2
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-SSE --check-prefix=CHECK-NOSSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-SSE --check-prefix=CHECK-SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-AVX --check-prefix=CHECK-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-WIDE-AVX --check-prefix=CHECK-WIDE-AVX2
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
diff --git a/test/CodeGen/X86/bswap-wide-int.ll b/test/CodeGen/X86/bswap-wide-int.ll
new file mode 100644
index 000000000000..db48eb80de4b
--- /dev/null
+++ b/test/CodeGen/X86/bswap-wide-int.ll
@@ -0,0 +1,173 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+movbe | FileCheck %s --check-prefix=X86-MOVBE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movbe | FileCheck %s --check-prefix=X64-MOVBE
+
+declare i64 @llvm.bswap.i64(i64)
+declare i128 @llvm.bswap.i128(i128)
+declare i256 @llvm.bswap.i256(i256)
+
+define i64 @bswap_i64(i64 %a0) nounwind {
+; X86-LABEL: bswap_i64:
+; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %edx
+; X86-NEXT: retl
+;
+; X86-MOVBE-LABEL: bswap_i64:
+; X86-MOVBE: # BB#0:
+; X86-MOVBE-NEXT: movbel {{[0-9]+}}(%esp), %eax
+; X86-MOVBE-NEXT: movbel {{[0-9]+}}(%esp), %edx
+; X86-MOVBE-NEXT: retl
+;
+; X64-LABEL: bswap_i64:
+; X64: # BB#0:
+; X64-NEXT: bswapq %rdi
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: retq
+;
+; X64-MOVBE-LABEL: bswap_i64:
+; X64-MOVBE: # BB#0:
+; X64-MOVBE-NEXT: bswapq %rdi
+; X64-MOVBE-NEXT: movq %rdi, %rax
+; X64-MOVBE-NEXT: retq
+ %1 = call i64 @llvm.bswap.i64(i64 %a0)
+ ret i64 %1
+}
+
+define i128 @bswap_i128(i128 %a0) nounwind {
+; X86-LABEL: bswap_i128:
+; X86: # BB#0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: bswapl %edi
+; X86-NEXT: bswapl %esi
+; X86-NEXT: bswapl %edx
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl $4
+;
+; X86-MOVBE-LABEL: bswap_i128:
+; X86-MOVBE: # BB#0:
+; X86-MOVBE-NEXT: pushl %edi
+; X86-MOVBE-NEXT: pushl %esi
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-MOVBE-NEXT: movbel %esi, 12(%eax)
+; X86-MOVBE-NEXT: movbel %edi, 8(%eax)
+; X86-MOVBE-NEXT: movbel %ecx, 4(%eax)
+; X86-MOVBE-NEXT: movbel %edx, (%eax)
+; X86-MOVBE-NEXT: popl %esi
+; X86-MOVBE-NEXT: popl %edi
+; X86-MOVBE-NEXT: retl $4
+;
+; X64-LABEL: bswap_i128:
+; X64: # BB#0:
+; X64-NEXT: bswapq %rsi
+; X64-NEXT: bswapq %rdi
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: movq %rdi, %rdx
+; X64-NEXT: retq
+;
+; X64-MOVBE-LABEL: bswap_i128:
+; X64-MOVBE: # BB#0:
+; X64-MOVBE-NEXT: bswapq %rsi
+; X64-MOVBE-NEXT: bswapq %rdi
+; X64-MOVBE-NEXT: movq %rsi, %rax
+; X64-MOVBE-NEXT: movq %rdi, %rdx
+; X64-MOVBE-NEXT: retq
+ %1 = call i128 @llvm.bswap.i128(i128 %a0)
+ ret i128 %1
+}
+
+define i256 @bswap_i256(i256 %a0) nounwind {
+; X86-LABEL: bswap_i256:
+; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: movl %ecx, 28(%eax)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: movl %ecx, 24(%eax)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: movl %ecx, 20(%eax)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: movl %ecx, 16(%eax)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: retl $4
+;
+; X86-MOVBE-LABEL: bswap_i256:
+; X86-MOVBE: # BB#0:
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-MOVBE-NEXT: movbel %ecx, 28(%eax)
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-MOVBE-NEXT: movbel %ecx, 24(%eax)
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-MOVBE-NEXT: movbel %ecx, 20(%eax)
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-MOVBE-NEXT: movbel %ecx, 16(%eax)
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-MOVBE-NEXT: movbel %ecx, 12(%eax)
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-MOVBE-NEXT: movbel %ecx, 8(%eax)
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-MOVBE-NEXT: movbel %ecx, 4(%eax)
+; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-MOVBE-NEXT: movbel %ecx, (%eax)
+; X86-MOVBE-NEXT: retl $4
+;
+; X64-LABEL: bswap_i256:
+; X64: # BB#0:
+; X64-NEXT: bswapq %r8
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: bswapq %rdx
+; X64-NEXT: bswapq %rsi
+; X64-NEXT: movq %rsi, 24(%rdi)
+; X64-NEXT: movq %rdx, 16(%rdi)
+; X64-NEXT: movq %rcx, 8(%rdi)
+; X64-NEXT: movq %r8, (%rdi)
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: retq
+;
+; X64-MOVBE-LABEL: bswap_i256:
+; X64-MOVBE: # BB#0:
+; X64-MOVBE-NEXT: movbeq %rsi, 24(%rdi)
+; X64-MOVBE-NEXT: movbeq %rdx, 16(%rdi)
+; X64-MOVBE-NEXT: movbeq %rcx, 8(%rdi)
+; X64-MOVBE-NEXT: movbeq %r8, (%rdi)
+; X64-MOVBE-NEXT: movq %rdi, %rax
+; X64-MOVBE-NEXT: retq
+ %1 = call i256 @llvm.bswap.i256(i256 %a0)
+ ret i256 %1
+}
diff --git a/test/CodeGen/X86/compress_expand.ll b/test/CodeGen/X86/compress_expand.ll
index e09fcf2a336e..f62e18869a98 100644
--- a/test/CodeGen/X86/compress_expand.ll
+++ b/test/CodeGen/X86/compress_expand.ll
@@ -265,9 +265,7 @@ define <2 x float> @test13(float* %base, <2 x float> %src0, <2 x i32> %trigger)
; SKX: # BB#0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
-; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k0
-; SKX-NEXT: kshiftlb $6, %k0, %k0
-; SKX-NEXT: kshiftrb $6, %k0, %k1
+; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
; SKX-NEXT: vexpandps (%rdi), %xmm0 {%k1}
; SKX-NEXT: retq
;
@@ -295,9 +293,7 @@ define void @test14(float* %base, <2 x float> %V, <2 x i32> %trigger) {
; SKX: # BB#0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
-; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k0
-; SKX-NEXT: kshiftlb $6, %k0, %k0
-; SKX-NEXT: kshiftrb $6, %k0, %k1
+; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
; SKX-NEXT: vcompressps %xmm0, (%rdi) {%k1}
; SKX-NEXT: retq
;
diff --git a/test/CodeGen/X86/cpus.ll b/test/CodeGen/X86/cpus.ll
index 20ce932a184b..7901858cb5dc 100644
--- a/test/CodeGen/X86/cpus.ll
+++ b/test/CodeGen/X86/cpus.ll
@@ -18,6 +18,7 @@
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=goldmont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=k8 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=opteron 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=athlon64 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
@@ -34,3 +35,4 @@
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+
diff --git a/test/CodeGen/X86/fp128-cast.ll b/test/CodeGen/X86/fp128-cast.ll
index 6568f73029e0..6543292c08b4 100644
--- a/test/CodeGen/X86/fp128-cast.ll
+++ b/test/CodeGen/X86/fp128-cast.ll
@@ -61,10 +61,10 @@ entry:
; X32: retl
;
; X64-LABEL: TestFPToSIF128_I32:
-; X64: movaps vf128(%rip), %xmm0
-; X64-NEXT: callq __fixtfsi
-; X64-NEXT: movl %eax, vi32(%rip)
-; X64: retq
+; X64: movaps vf128(%rip), %xmm0
+; X64-NEXT: callq __fixtfsi
+; X64-NEXT: movl %eax, vi32(%rip)
+; X64: retq
}
define void @TestFPToUIF128_U32() {
@@ -78,10 +78,10 @@ entry:
; X32: retl
;
; X64-LABEL: TestFPToUIF128_U32:
-; X64: movaps vf128(%rip), %xmm0
-; X64-NEXT: callq __fixunstfsi
-; X64-NEXT: movl %eax, vu32(%rip)
-; X64: retq
+; X64: movaps vf128(%rip), %xmm0
+; X64-NEXT: callq __fixunstfsi
+; X64-NEXT: movl %eax, vu32(%rip)
+; X64: retq
}
define void @TestFPToSIF128_I64() {
diff --git a/test/CodeGen/X86/insertelement-zero.ll b/test/CodeGen/X86/insertelement-zero.ll
index e30772b528bc..ea7418f4707e 100644
--- a/test/CodeGen/X86/insertelement-zero.ll
+++ b/test/CodeGen/X86/insertelement-zero.ll
@@ -1,13 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-unknown"
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
define <2 x double> @insert_v2f64_z1(<2 x double> %a) {
; SSE2-LABEL: insert_v2f64_z1:
diff --git a/test/CodeGen/X86/lower-vec-shift.ll b/test/CodeGen/X86/lower-vec-shift.ll
index 783cda0a8dd7..8d64baf5f2a4 100644
--- a/test/CodeGen/X86/lower-vec-shift.ll
+++ b/test/CodeGen/X86/lower-vec-shift.ll
@@ -1,8 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
-
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
; Verify that the following shifts are lowered into a sequence of two shifts plus
; a blend. On pre-avx2 targets, instead of scalarizing logical and arithmetic
diff --git a/test/CodeGen/X86/lower-vec-shuffle-bug.ll b/test/CodeGen/X86/lower-vec-shuffle-bug.ll
index 5918e8045f62..7a081b556867 100644
--- a/test/CodeGen/X86/lower-vec-shuffle-bug.ll
+++ b/test/CodeGen/X86/lower-vec-shuffle-bug.ll
@@ -1,8 +1,9 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
define <4 x double> @test1(<4 x double> %A, <4 x double> %B) {
; CHECK-LABEL: test1:
-; CHECK: # BB#0:
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
@@ -12,7 +13,7 @@ entry:
define <4 x double> @test2(<4 x double> %A, <4 x double> %B) {
; CHECK-LABEL: test2:
-; CHECK: # BB#0:
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
@@ -22,7 +23,7 @@ entry:
define <4 x double> @test3(<4 x double> %A, <4 x double> %B) {
; CHECK-LABEL: test3:
-; CHECK: # BB#0:
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
@@ -32,7 +33,7 @@ entry:
define <4 x double> @test4(<4 x double> %A, <4 x double> %B) {
; CHECK-LABEL: test4:
-; CHECK: # BB#0:
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll
index 3c616e8a9f43..7a2e41e10a37 100644
--- a/test/CodeGen/X86/masked_memop.ll
+++ b/test/CodeGen/X86/masked_memop.ll
@@ -462,9 +462,7 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
; SKX: ## BB#0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
-; SKX-NEXT: kshiftlw $14, %k0, %k0
-; SKX-NEXT: kshiftrw $14, %k0, %k1
+; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
; SKX-NEXT: vmovups %xmm1, (%rdi) {%k1}
; SKX-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
@@ -550,9 +548,7 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %
; SKX: ## BB#0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
-; SKX-NEXT: kshiftlw $14, %k0, %k0
-; SKX-NEXT: kshiftrw $14, %k0, %k1
+; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
; SKX-NEXT: vblendmps (%rdi), %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
@@ -601,9 +597,7 @@ define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
; SKX: ## BB#0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
-; SKX-NEXT: kshiftlw $14, %k0, %k0
-; SKX-NEXT: kshiftrw $14, %k0, %k1
+; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; SKX-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1}
; SKX-NEXT: vpmovsxdq %xmm0, %xmm0
@@ -645,9 +639,7 @@ define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) {
; SKX: ## BB#0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
-; SKX-NEXT: kshiftlw $14, %k0, %k0
-; SKX-NEXT: kshiftrw $14, %k0, %k1
+; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} {z}
; SKX-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index 9d26aee2e8b8..0e09abf73c8c 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=AVX2 | FileCheck %s --check-prefix=X64 --check-prefix=AVX2
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=AVX2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2
; This tests codegen time inlining/optimization of memcmp
; rdar://6480398
@@ -11,60 +12,70 @@
declare i32 @memcmp(i8*, i8*, i64)
define i32 @length2(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length2:
-; X32: # BB#0: # %loadbb
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movzwl (%ecx), %ecx
-; X32-NEXT: movzwl (%eax), %eax
-; X32-NEXT: rolw $8, %cx
-; X32-NEXT: rolw $8, %ax
-; X32-NEXT: movzwl %cx, %ecx
-; X32-NEXT: movzwl %ax, %eax
-; X32-NEXT: cmpl %eax, %ecx
-; X32-NEXT: je .LBB0_1
-; X32-NEXT: # BB#2: # %res_block
-; X32-NEXT: movl $-1, %eax
-; X32-NEXT: jb .LBB0_4
-; X32-NEXT: # BB#3: # %res_block
-; X32-NEXT: movl $1, %eax
-; X32-NEXT: .LBB0_4: # %endblock
-; X32-NEXT: retl
-; X32-NEXT: .LBB0_1:
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: retl
+; X86-NOSSE-LABEL: length2:
+; X86-NOSSE: # BB#0:
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movzwl (%ecx), %ecx
+; X86-NOSSE-NEXT: movzwl (%eax), %eax
+; X86-NOSSE-NEXT: rolw $8, %cx
+; X86-NOSSE-NEXT: rolw $8, %ax
+; X86-NOSSE-NEXT: cmpw %ax, %cx
+; X86-NOSSE-NEXT: movl $-1, %eax
+; X86-NOSSE-NEXT: jae .LBB0_1
+; X86-NOSSE-NEXT: # BB#2:
+; X86-NOSSE-NEXT: je .LBB0_3
+; X86-NOSSE-NEXT: .LBB0_4:
+; X86-NOSSE-NEXT: retl
+; X86-NOSSE-NEXT: .LBB0_1:
+; X86-NOSSE-NEXT: movl $1, %eax
+; X86-NOSSE-NEXT: jne .LBB0_4
+; X86-NOSSE-NEXT: .LBB0_3:
+; X86-NOSSE-NEXT: xorl %eax, %eax
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: length2:
+; X86-SSE2: # BB#0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: movzwl (%ecx), %ecx
+; X86-SSE2-NEXT: movzwl (%eax), %eax
+; X86-SSE2-NEXT: rolw $8, %cx
+; X86-SSE2-NEXT: rolw $8, %ax
+; X86-SSE2-NEXT: xorl %edx, %edx
+; X86-SSE2-NEXT: cmpw %ax, %cx
+; X86-SSE2-NEXT: movl $-1, %ecx
+; X86-SSE2-NEXT: movl $1, %eax
+; X86-SSE2-NEXT: cmovbl %ecx, %eax
+; X86-SSE2-NEXT: cmovel %edx, %eax
+; X86-SSE2-NEXT: retl
;
; X64-LABEL: length2:
-; X64: # BB#0: # %loadbb
+; X64: # BB#0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: je .LBB0_1
-; X64-NEXT: # BB#2: # %res_block
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: movl $-1, %ecx
; X64-NEXT: movl $1, %eax
; X64-NEXT: cmovbl %ecx, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB0_1:
-; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmovel %edx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
ret i32 %m
}
define i1 @length2_eq(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length2_eq:
-; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movzwl (%ecx), %ecx
-; X32-NEXT: cmpw (%eax), %cx
-; X32-NEXT: sete %al
-; X32-NEXT: retl
+; X86-LABEL: length2_eq:
+; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzwl (%ecx), %ecx
+; X86-NEXT: cmpw (%eax), %cx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
;
; X64-LABEL: length2_eq:
; X64: # BB#0:
@@ -78,13 +89,13 @@ define i1 @length2_eq(i8* %X, i8* %Y) nounwind {
}
define i1 @length2_eq_const(i8* %X) nounwind {
-; X32-LABEL: length2_eq_const:
-; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movzwl (%eax), %eax
-; X32-NEXT: cmpl $12849, %eax # imm = 0x3231
-; X32-NEXT: setne %al
-; X32-NEXT: retl
+; X86-LABEL: length2_eq_const:
+; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
+; X86-NEXT: cmpl $12849, %eax # imm = 0x3231
+; X86-NEXT: setne %al
+; X86-NEXT: retl
;
; X64-LABEL: length2_eq_const:
; X64: # BB#0:
@@ -98,17 +109,17 @@ define i1 @length2_eq_const(i8* %X) nounwind {
}
define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length2_eq_nobuiltin_attr:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $2
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: sete %al
-; X32-NEXT: retl
+; X86-LABEL: length2_eq_nobuiltin_attr:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $2
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
;
; X64-LABEL: length2_eq_nobuiltin_attr:
; X64: # BB#0:
@@ -125,15 +136,15 @@ define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind {
}
define i32 @length3(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length3:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $3
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: retl
+; X86-LABEL: length3:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $3
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
;
; X64-LABEL: length3:
; X64: # BB#0:
@@ -144,17 +155,17 @@ define i32 @length3(i8* %X, i8* %Y) nounwind {
}
define i1 @length3_eq(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length3_eq:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $3
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: setne %al
-; X32-NEXT: retl
+; X86-LABEL: length3_eq:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $3
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
;
; X64-LABEL: length3_eq:
; X64: # BB#0:
@@ -171,56 +182,70 @@ define i1 @length3_eq(i8* %X, i8* %Y) nounwind {
}
define i32 @length4(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length4:
-; X32: # BB#0: # %loadbb
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl (%ecx), %ecx
-; X32-NEXT: movl (%eax), %eax
-; X32-NEXT: bswapl %ecx
-; X32-NEXT: bswapl %eax
-; X32-NEXT: cmpl %eax, %ecx
-; X32-NEXT: je .LBB6_1
-; X32-NEXT: # BB#2: # %res_block
-; X32-NEXT: movl $-1, %eax
-; X32-NEXT: jb .LBB6_4
-; X32-NEXT: # BB#3: # %res_block
-; X32-NEXT: movl $1, %eax
-; X32-NEXT: .LBB6_4: # %endblock
-; X32-NEXT: retl
-; X32-NEXT: .LBB6_1:
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: retl
+; X86-NOSSE-LABEL: length4:
+; X86-NOSSE: # BB#0:
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl (%ecx), %ecx
+; X86-NOSSE-NEXT: movl (%eax), %eax
+; X86-NOSSE-NEXT: bswapl %ecx
+; X86-NOSSE-NEXT: bswapl %eax
+; X86-NOSSE-NEXT: cmpl %eax, %ecx
+; X86-NOSSE-NEXT: movl $-1, %eax
+; X86-NOSSE-NEXT: jae .LBB6_1
+; X86-NOSSE-NEXT: # BB#2:
+; X86-NOSSE-NEXT: je .LBB6_3
+; X86-NOSSE-NEXT: .LBB6_4:
+; X86-NOSSE-NEXT: retl
+; X86-NOSSE-NEXT: .LBB6_1:
+; X86-NOSSE-NEXT: movl $1, %eax
+; X86-NOSSE-NEXT: jne .LBB6_4
+; X86-NOSSE-NEXT: .LBB6_3:
+; X86-NOSSE-NEXT: xorl %eax, %eax
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: length4:
+; X86-SSE2: # BB#0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: movl (%ecx), %ecx
+; X86-SSE2-NEXT: movl (%eax), %eax
+; X86-SSE2-NEXT: bswapl %ecx
+; X86-SSE2-NEXT: bswapl %eax
+; X86-SSE2-NEXT: xorl %edx, %edx
+; X86-SSE2-NEXT: cmpl %eax, %ecx
+; X86-SSE2-NEXT: movl $-1, %ecx
+; X86-SSE2-NEXT: movl $1, %eax
+; X86-SSE2-NEXT: cmovbl %ecx, %eax
+; X86-SSE2-NEXT: cmovel %edx, %eax
+; X86-SSE2-NEXT: retl
;
; X64-LABEL: length4:
-; X64: # BB#0: # %loadbb
+; X64: # BB#0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: je .LBB6_1
-; X64-NEXT: # BB#2: # %res_block
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: movl $-1, %ecx
; X64-NEXT: movl $1, %eax
; X64-NEXT: cmovbl %ecx, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB6_1:
-; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmovel %edx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
ret i32 %m
}
define i1 @length4_eq(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length4_eq:
-; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl (%ecx), %ecx
-; X32-NEXT: cmpl (%eax), %ecx
-; X32-NEXT: setne %al
-; X32-NEXT: retl
+; X86-LABEL: length4_eq:
+; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %ecx
+; X86-NEXT: cmpl (%eax), %ecx
+; X86-NEXT: setne %al
+; X86-NEXT: retl
;
; X64-LABEL: length4_eq:
; X64: # BB#0:
@@ -234,12 +259,12 @@ define i1 @length4_eq(i8* %X, i8* %Y) nounwind {
}
define i1 @length4_eq_const(i8* %X) nounwind {
-; X32-LABEL: length4_eq_const:
-; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
-; X32-NEXT: sete %al
-; X32-NEXT: retl
+; X86-LABEL: length4_eq_const:
+; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
+; X86-NEXT: sete %al
+; X86-NEXT: retl
;
; X64-LABEL: length4_eq_const:
; X64: # BB#0:
@@ -252,15 +277,15 @@ define i1 @length4_eq_const(i8* %X) nounwind {
}
define i32 @length5(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length5:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $5
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: retl
+; X86-LABEL: length5:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $5
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
;
; X64-LABEL: length5:
; X64: # BB#0:
@@ -271,17 +296,17 @@ define i32 @length5(i8* %X, i8* %Y) nounwind {
}
define i1 @length5_eq(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length5_eq:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $5
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: setne %al
-; X32-NEXT: retl
+; X86-LABEL: length5_eq:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $5
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
;
; X64-LABEL: length5_eq:
; X64: # BB#0:
@@ -298,48 +323,45 @@ define i1 @length5_eq(i8* %X, i8* %Y) nounwind {
}
define i32 @length8(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length8:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $8
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: retl
+; X86-LABEL: length8:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $8
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
;
; X64-LABEL: length8:
-; X64: # BB#0: # %loadbb
+; X64: # BB#0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rax
; X64-NEXT: bswapq %rcx
+; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: je .LBB11_1
-; X64-NEXT: # BB#2: # %res_block
; X64-NEXT: movl $-1, %ecx
; X64-NEXT: movl $1, %eax
; X64-NEXT: cmovbl %ecx, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB11_1:
-; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmovel %edx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
ret i32 %m
}
define i1 @length8_eq(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length8_eq:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $8
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: sete %al
-; X32-NEXT: retl
+; X86-LABEL: length8_eq:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $8
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
;
; X64-LABEL: length8_eq:
; X64: # BB#0:
@@ -353,17 +375,17 @@ define i1 @length8_eq(i8* %X, i8* %Y) nounwind {
}
define i1 @length8_eq_const(i8* %X) nounwind {
-; X32-LABEL: length8_eq_const:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $8
-; X32-NEXT: pushl $.L.str
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: setne %al
-; X32-NEXT: retl
+; X86-LABEL: length8_eq_const:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $8
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
;
; X64-LABEL: length8_eq_const:
; X64: # BB#0:
@@ -377,17 +399,17 @@ define i1 @length8_eq_const(i8* %X) nounwind {
}
define i1 @length12_eq(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length12_eq:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $12
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: setne %al
-; X32-NEXT: retl
+; X86-LABEL: length12_eq:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $12
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
;
; X64-LABEL: length12_eq:
; X64: # BB#0:
@@ -404,15 +426,15 @@ define i1 @length12_eq(i8* %X, i8* %Y) nounwind {
}
define i32 @length12(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length12:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $12
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: retl
+; X86-LABEL: length12:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $12
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
;
; X64-LABEL: length12:
; X64: # BB#0:
@@ -425,15 +447,15 @@ define i32 @length12(i8* %X, i8* %Y) nounwind {
; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
define i32 @length16(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length16:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $16
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: retl
+; X86-LABEL: length16:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $16
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
;
; X64-LABEL: length16:
; X64: # BB#0:
@@ -444,86 +466,108 @@ define i32 @length16(i8* %X, i8* %Y) nounwind {
}
define i1 @length16_eq(i8* %x, i8* %y) nounwind {
-; X32-LABEL: length16_eq:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $16
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: setne %al
-; X32-NEXT: retl
-;
-; SSE2-LABEL: length16_eq:
-; SSE2: # BB#0:
-; SSE2-NEXT: movdqu (%rsi), %xmm0
-; SSE2-NEXT: movdqu (%rdi), %xmm1
-; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; SSE2-NEXT: pmovmskb %xmm1, %eax
-; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; SSE2-NEXT: setne %al
-; SSE2-NEXT: retq
-;
-; AVX2-LABEL: length16_eq:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %xmm0
-; AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
-; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; AVX2-NEXT: setne %al
-; AVX2-NEXT: retq
+; X86-NOSSE-LABEL: length16_eq:
+; X86-NOSSE: # BB#0:
+; X86-NOSSE-NEXT: pushl $0
+; X86-NOSSE-NEXT: pushl $16
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $16, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: length16_eq:
+; X86-SSE2: # BB#0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
+; X86-SSE2-NEXT: movdqu (%eax), %xmm1
+; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
+; X86-SSE2-NEXT: pmovmskb %xmm1, %eax
+; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X86-SSE2-NEXT: setne %al
+; X86-SSE2-NEXT: retl
+;
+; X64-SSE2-LABEL: length16_eq:
+; X64-SSE2: # BB#0:
+; X64-SSE2-NEXT: movdqu (%rsi), %xmm0
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm1
+; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
+; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: setne %al
+; X64-SSE2-NEXT: retq
+;
+; X64-AVX2-LABEL: length16_eq:
+; X64-AVX2: # BB#0:
+; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
+; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
+; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-AVX2-NEXT: setne %al
+; X64-AVX2-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
%cmp = icmp ne i32 %call, 0
ret i1 %cmp
}
define i1 @length16_eq_const(i8* %X) nounwind {
-; X32-LABEL: length16_eq_const:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $16
-; X32-NEXT: pushl $.L.str
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: sete %al
-; X32-NEXT: retl
-;
-; SSE2-LABEL: length16_eq_const:
-; SSE2: # BB#0:
-; SSE2-NEXT: movdqu (%rdi), %xmm0
-; SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
-; SSE2-NEXT: pmovmskb %xmm0, %eax
-; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; SSE2-NEXT: sete %al
-; SSE2-NEXT: retq
-;
-; AVX2-LABEL: length16_eq_const:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %xmm0
-; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; AVX2-NEXT: sete %al
-; AVX2-NEXT: retq
+; X86-NOSSE-LABEL: length16_eq_const:
+; X86-NOSSE: # BB#0:
+; X86-NOSSE-NEXT: pushl $0
+; X86-NOSSE-NEXT: pushl $16
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $16, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE2-LABEL: length16_eq_const:
+; X86-SSE2: # BB#0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movdqu (%eax), %xmm0
+; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
+; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X86-SSE2-NEXT: sete %al
+; X86-SSE2-NEXT: retl
+;
+; X64-SSE2-LABEL: length16_eq_const:
+; X64-SSE2: # BB#0:
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
+; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: sete %al
+; X64-SSE2-NEXT: retq
+;
+; X64-AVX2-LABEL: length16_eq_const:
+; X64-AVX2: # BB#0:
+; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
+; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-AVX2-NEXT: sete %al
+; X64-AVX2-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c
}
define i32 @length32(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length32:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $32
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: retl
+; X86-LABEL: length32:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $32
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
;
; X64-LABEL: length32:
; X64: # BB#0:
@@ -536,90 +580,90 @@ define i32 @length32(i8* %X, i8* %Y) nounwind {
; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
define i1 @length32_eq(i8* %x, i8* %y) nounwind {
-; X32-LABEL: length32_eq:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $32
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: sete %al
-; X32-NEXT: retl
-;
-; SSE2-LABEL: length32_eq:
-; SSE2: # BB#0:
-; SSE2-NEXT: pushq %rax
-; SSE2-NEXT: movl $32, %edx
-; SSE2-NEXT: callq memcmp
-; SSE2-NEXT: testl %eax, %eax
-; SSE2-NEXT: sete %al
-; SSE2-NEXT: popq %rcx
-; SSE2-NEXT: retq
-;
-; AVX2-LABEL: length32_eq:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
-; AVX2-NEXT: vpmovmskb %ymm0, %eax
-; AVX2-NEXT: cmpl $-1, %eax
-; AVX2-NEXT: sete %al
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; X86-LABEL: length32_eq:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $32
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-SSE2-LABEL: length32_eq:
+; X64-SSE2: # BB#0:
+; X64-SSE2-NEXT: pushq %rax
+; X64-SSE2-NEXT: movl $32, %edx
+; X64-SSE2-NEXT: callq memcmp
+; X64-SSE2-NEXT: testl %eax, %eax
+; X64-SSE2-NEXT: sete %al
+; X64-SSE2-NEXT: popq %rcx
+; X64-SSE2-NEXT: retq
+;
+; X64-AVX2-LABEL: length32_eq:
+; X64-AVX2: # BB#0:
+; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
+; X64-AVX2-NEXT: cmpl $-1, %eax
+; X64-AVX2-NEXT: sete %al
+; X64-AVX2-NEXT: vzeroupper
+; X64-AVX2-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
define i1 @length32_eq_const(i8* %X) nounwind {
-; X32-LABEL: length32_eq_const:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $32
-; X32-NEXT: pushl $.L.str
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: setne %al
-; X32-NEXT: retl
-;
-; SSE2-LABEL: length32_eq_const:
-; SSE2: # BB#0:
-; SSE2-NEXT: pushq %rax
-; SSE2-NEXT: movl $.L.str, %esi
-; SSE2-NEXT: movl $32, %edx
-; SSE2-NEXT: callq memcmp
-; SSE2-NEXT: testl %eax, %eax
-; SSE2-NEXT: setne %al
-; SSE2-NEXT: popq %rcx
-; SSE2-NEXT: retq
-;
-; AVX2-LABEL: length32_eq_const:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vpmovmskb %ymm0, %eax
-; AVX2-NEXT: cmpl $-1, %eax
-; AVX2-NEXT: setne %al
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; X86-LABEL: length32_eq_const:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $32
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-SSE2-LABEL: length32_eq_const:
+; X64-SSE2: # BB#0:
+; X64-SSE2-NEXT: pushq %rax
+; X64-SSE2-NEXT: movl $.L.str, %esi
+; X64-SSE2-NEXT: movl $32, %edx
+; X64-SSE2-NEXT: callq memcmp
+; X64-SSE2-NEXT: testl %eax, %eax
+; X64-SSE2-NEXT: setne %al
+; X64-SSE2-NEXT: popq %rcx
+; X64-SSE2-NEXT: retq
+;
+; X64-AVX2-LABEL: length32_eq_const:
+; X64-AVX2: # BB#0:
+; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
+; X64-AVX2-NEXT: cmpl $-1, %eax
+; X64-AVX2-NEXT: setne %al
+; X64-AVX2-NEXT: vzeroupper
+; X64-AVX2-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
}
define i32 @length64(i8* %X, i8* %Y) nounwind {
-; X32-LABEL: length64:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $64
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: retl
+; X86-LABEL: length64:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $64
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
;
; X64-LABEL: length64:
; X64: # BB#0:
@@ -630,17 +674,17 @@ define i32 @length64(i8* %X, i8* %Y) nounwind {
}
define i1 @length64_eq(i8* %x, i8* %y) nounwind {
-; X32-LABEL: length64_eq:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $64
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: setne %al
-; X32-NEXT: retl
+; X86-LABEL: length64_eq:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $64
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
;
; X64-LABEL: length64_eq:
; X64: # BB#0:
@@ -657,17 +701,17 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind {
}
define i1 @length64_eq_const(i8* %X) nounwind {
-; X32-LABEL: length64_eq_const:
-; X32: # BB#0:
-; X32-NEXT: pushl $0
-; X32-NEXT: pushl $64
-; X32-NEXT: pushl $.L.str
-; X32-NEXT: pushl {{[0-9]+}}(%esp)
-; X32-NEXT: calll memcmp
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: testl %eax, %eax
-; X32-NEXT: sete %al
-; X32-NEXT: retl
+; X86-LABEL: length64_eq_const:
+; X86: # BB#0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $64
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
;
; X64-LABEL: length64_eq_const:
; X64: # BB#0:
diff --git a/test/CodeGen/X86/palignr.ll b/test/CodeGen/X86/palignr.ll
index 11e97aadb45d..700c9cf5f3af 100644
--- a/test/CodeGen/X86/palignr.ll
+++ b/test/CodeGen/X86/palignr.ll
@@ -1,132 +1,162 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -march=x86 -mcpu=core2 -mattr=+ssse3 | FileCheck %s
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=CHECK-YONAH %s
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SSE --check-prefix=CHECK-SSE2
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SSE --check-prefix=CHECK-SSSE3
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK-LABEL: test1:
-; CHECK: # BB#0:
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
-; CHECK-NEXT: retl
-;
-; CHECK-YONAH-LABEL: test1:
-; CHECK-YONAH: # BB#0:
-; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
-; CHECK-YONAH-NEXT: retl
+; CHECK-SSE-LABEL: test1:
+; CHECK-SSE: # BB#0:
+; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
+; CHECK-SSE-NEXT: retl
+;
+; CHECK-AVX-LABEL: test1:
+; CHECK-AVX: # BB#0:
+; CHECK-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
+; CHECK-AVX-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
- ret <4 x i32> %C
+ ret <4 x i32> %C
}
define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK-LABEL: test2:
-; CHECK: # BB#0:
-; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
-; CHECK-NEXT: retl
-;
-; CHECK-YONAH-LABEL: test2:
-; CHECK-YONAH: # BB#0:
-; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
-; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
-; CHECK-YONAH-NEXT: retl
+; CHECK-SSE2-LABEL: test2:
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
+; CHECK-SSE2-NEXT: retl
+;
+; CHECK-SSSE3-LABEL: test2:
+; CHECK-SSSE3: # BB#0:
+; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0
+; CHECK-SSSE3-NEXT: retl
+;
+; CHECK-AVX-LABEL: test2:
+; CHECK-AVX: # BB#0:
+; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; CHECK-AVX-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 >
- ret <4 x i32> %C
+ ret <4 x i32> %C
}
define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK-LABEL: test3:
-; CHECK: # BB#0:
-; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
-; CHECK-NEXT: retl
-;
-; CHECK-YONAH-LABEL: test3:
-; CHECK-YONAH: # BB#0:
-; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
-; CHECK-YONAH-NEXT: retl
+; CHECK-SSE2-LABEL: test3:
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
+; CHECK-SSE2-NEXT: retl
+;
+; CHECK-SSSE3-LABEL: test3:
+; CHECK-SSSE3: # BB#0:
+; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0
+; CHECK-SSSE3-NEXT: retl
+;
+; CHECK-AVX-LABEL: test3:
+; CHECK-AVX: # BB#0:
+; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; CHECK-AVX-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
- ret <4 x i32> %C
+ ret <4 x i32> %C
}
define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK-LABEL: test4:
-; CHECK: # BB#0:
-; CHECK-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
-; CHECK-NEXT: retl
-;
-; CHECK-YONAH-LABEL: test4:
-; CHECK-YONAH: # BB#0:
-; CHECK-YONAH-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
-; CHECK-YONAH-NEXT: movapd %xmm1, %xmm0
-; CHECK-YONAH-NEXT: retl
+; CHECK-SSE2-LABEL: test4:
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
+; CHECK-SSE2-NEXT: movapd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: retl
+;
+; CHECK-SSSE3-LABEL: test4:
+; CHECK-SSSE3: # BB#0:
+; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; CHECK-SSSE3-NEXT: retl
+;
+; CHECK-AVX-LABEL: test4:
+; CHECK-AVX: # BB#0:
+; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; CHECK-AVX-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
- ret <4 x i32> %C
+ ret <4 x i32> %C
}
define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
-; CHECK-LABEL: test5:
-; CHECK: # BB#0:
-; CHECK-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
-; CHECK-NEXT: movapd %xmm1, %xmm0
-; CHECK-NEXT: retl
-;
-; CHECK-YONAH-LABEL: test5:
-; CHECK-YONAH: # BB#0:
-; CHECK-YONAH-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
-; CHECK-YONAH-NEXT: movapd %xmm1, %xmm0
-; CHECK-YONAH-NEXT: retl
+; CHECK-SSE-LABEL: test5:
+; CHECK-SSE: # BB#0:
+; CHECK-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
+; CHECK-SSE-NEXT: movapd %xmm1, %xmm0
+; CHECK-SSE-NEXT: retl
+;
+; CHECK-AVX-LABEL: test5:
+; CHECK-AVX: # BB#0:
+; CHECK-AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm1[1],xmm0[0]
+; CHECK-AVX-NEXT: retl
%C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
- ret <4 x float> %C
+ ret <4 x float> %C
}
define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {
-; CHECK-LABEL: test6:
-; CHECK: # BB#0:
-; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
-; CHECK-NEXT: retl
-;
-; CHECK-YONAH-LABEL: test6:
-; CHECK-YONAH: # BB#0:
-; CHECK-YONAH-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
-; CHECK-YONAH-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
-; CHECK-YONAH-NEXT: por %xmm1, %xmm0
-; CHECK-YONAH-NEXT: retl
+; CHECK-SSE2-LABEL: test6:
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
+; CHECK-SSE2-NEXT: por %xmm1, %xmm0
+; CHECK-SSE2-NEXT: retl
+;
+; CHECK-SSSE3-LABEL: test6:
+; CHECK-SSSE3: # BB#0:
+; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0
+; CHECK-SSSE3-NEXT: retl
+;
+; CHECK-AVX-LABEL: test6:
+; CHECK-AVX: # BB#0:
+; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; CHECK-AVX-NEXT: retl
%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
- ret <8 x i16> %C
+ ret <8 x i16> %C
}
define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
-; CHECK-LABEL: test7:
-; CHECK: # BB#0:
-; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
-; CHECK-NEXT: retl
-;
-; CHECK-YONAH-LABEL: test7:
-; CHECK-YONAH: # BB#0:
-; CHECK-YONAH-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-YONAH-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
-; CHECK-YONAH-NEXT: por %xmm1, %xmm0
-; CHECK-YONAH-NEXT: retl
+; CHECK-SSE2-LABEL: test7:
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
+; CHECK-SSE2-NEXT: por %xmm1, %xmm0
+; CHECK-SSE2-NEXT: retl
+;
+; CHECK-SSSE3-LABEL: test7:
+; CHECK-SSSE3: # BB#0:
+; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0
+; CHECK-SSSE3-NEXT: retl
+;
+; CHECK-AVX-LABEL: test7:
+; CHECK-AVX: # BB#0:
+; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; CHECK-AVX-NEXT: retl
%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
- ret <8 x i16> %C
+ ret <8 x i16> %C
}
define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
-; CHECK-LABEL: test8:
-; CHECK: # BB#0:
-; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
-; CHECK-NEXT: retl
-;
-; CHECK-YONAH-LABEL: test8:
-; CHECK-YONAH: # BB#0:
-; CHECK-YONAH-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
-; CHECK-YONAH-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
-; CHECK-YONAH-NEXT: por %xmm1, %xmm0
-; CHECK-YONAH-NEXT: retl
+; CHECK-SSE2-LABEL: test8:
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
+; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
+; CHECK-SSE2-NEXT: por %xmm1, %xmm0
+; CHECK-SSE2-NEXT: retl
+;
+; CHECK-SSSE3-LABEL: test8:
+; CHECK-SSSE3: # BB#0:
+; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
+; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0
+; CHECK-SSSE3-NEXT: retl
+;
+; CHECK-AVX-LABEL: test8:
+; CHECK-AVX: # BB#0:
+; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
+; CHECK-AVX-NEXT: retl
%C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
- ret <16 x i8> %C
+ ret <16 x i8> %C
}
; Check that we don't do unary (circular on single operand) palignr incorrectly.
@@ -134,21 +164,26 @@ define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
; incorrectly. In particular, one of the operands of the palignr node
; was an UNDEF.)
define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind {
-; CHECK-LABEL: test9:
-; CHECK: # BB#0:
-; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
-; CHECK-NEXT: retl
-;
-; CHECK-YONAH-LABEL: test9:
-; CHECK-YONAH: # BB#0:
-; CHECK-YONAH-NEXT: movdqa %xmm1, %xmm0
-; CHECK-YONAH-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
-; CHECK-YONAH-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; CHECK-YONAH-NEXT: por %xmm0, %xmm1
-; CHECK-YONAH-NEXT: movdqa %xmm1, %xmm0
-; CHECK-YONAH-NEXT: retl
+; CHECK-SSE2-LABEL: test9:
+; CHECK-SSE2: # BB#0:
+; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0
+; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; CHECK-SSE2-NEXT: por %xmm0, %xmm1
+; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0
+; CHECK-SSE2-NEXT: retl
+;
+; CHECK-SSSE3-LABEL: test9:
+; CHECK-SSSE3: # BB#0:
+; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
+; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0
+; CHECK-SSSE3-NEXT: retl
+;
+; CHECK-AVX-LABEL: test9:
+; CHECK-AVX: # BB#0:
+; CHECK-AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
+; CHECK-AVX-NEXT: retl
%C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >
- ret <8 x i16> %C
+ ret <8 x i16> %C
}
diff --git a/test/CodeGen/X86/peephole-recurrence.mir b/test/CodeGen/X86/peephole-recurrence.mir
new file mode 100644
index 000000000000..af57a4fd526f
--- /dev/null
+++ b/test/CodeGen/X86/peephole-recurrence.mir
@@ -0,0 +1,232 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=peephole-opt -o - %s | FileCheck %s
+
+--- |
+ define i32 @foo(i32 %a) {
+ bb0:
+ br label %bb1
+
+ bb1: ; preds = %bb7, %bb0
+ %vreg0 = phi i32 [ 0, %bb0 ], [ %vreg3, %bb7 ]
+ %cond0 = icmp eq i32 %a, 0
+ br i1 %cond0, label %bb4, label %bb3
+
+ bb3: ; preds = %bb1
+ br label %bb4
+
+ bb4: ; preds = %bb1, %bb3
+ %vreg5 = phi i32 [ 2, %bb3 ], [ 1, %bb1 ]
+ %cond1 = icmp eq i32 %vreg5, 0
+ br i1 %cond1, label %bb7, label %bb6
+
+ bb6: ; preds = %bb4
+ br label %bb7
+
+ bb7: ; preds = %bb4, %bb6
+ %vreg1 = phi i32 [ 2, %bb6 ], [ 1, %bb4 ]
+ %vreg2 = add i32 %vreg5, %vreg0
+ %vreg3 = add i32 %vreg1, %vreg2
+ %cond2 = icmp slt i32 %vreg3, 10
+ br i1 %cond2, label %bb1, label %bb8
+
+ bb8: ; preds = %bb7
+ ret i32 0
+ }
+
+ define i32 @bar(i32 %a, i32* %p) {
+ bb0:
+ br label %bb1
+
+ bb1: ; preds = %bb7, %bb0
+ %vreg0 = phi i32 [ 0, %bb0 ], [ %vreg3, %bb7 ]
+ %cond0 = icmp eq i32 %a, 0
+ br i1 %cond0, label %bb4, label %bb3
+
+ bb3: ; preds = %bb1
+ br label %bb4
+
+ bb4: ; preds = %bb1, %bb3
+ %vreg5 = phi i32 [ 2, %bb3 ], [ 1, %bb1 ]
+ %cond1 = icmp eq i32 %vreg5, 0
+ br i1 %cond1, label %bb7, label %bb6
+
+ bb6: ; preds = %bb4
+ br label %bb7
+
+ bb7: ; preds = %bb4, %bb6
+ %vreg1 = phi i32 [ 2, %bb6 ], [ 1, %bb4 ]
+ %vreg2 = add i32 %vreg5, %vreg0
+ store i32 %vreg0, i32* %p
+ %vreg3 = add i32 %vreg1, %vreg2
+ %cond2 = icmp slt i32 %vreg3, 10
+ br i1 %cond2, label %bb1, label %bb8
+
+ bb8: ; preds = %bb7
+ ret i32 0
+ }
+
+...
+---
+# There is a recurrence formulated around %0, %10, and %3. Check that operands
+# are commuted for ADD instructions in bb.5.bb7 so that the values involved in
+# the recurrence are tied. This will remove redundant copy instruction.
+name: foo
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32, preferred-register: '' }
+ - { id: 1, class: gr32, preferred-register: '' }
+ - { id: 2, class: gr32, preferred-register: '' }
+ - { id: 3, class: gr32, preferred-register: '' }
+ - { id: 4, class: gr32, preferred-register: '' }
+ - { id: 5, class: gr32, preferred-register: '' }
+ - { id: 6, class: gr32, preferred-register: '' }
+ - { id: 7, class: gr32, preferred-register: '' }
+ - { id: 8, class: gr32, preferred-register: '' }
+ - { id: 9, class: gr32, preferred-register: '' }
+ - { id: 10, class: gr32, preferred-register: '' }
+ - { id: 11, class: gr32, preferred-register: '' }
+ - { id: 12, class: gr32, preferred-register: '' }
+liveins:
+ - { reg: '%edi', virtual-reg: '%4' }
+body: |
+ bb.0.bb0:
+ successors: %bb.1.bb1(0x80000000)
+ liveins: %edi
+
+ %4 = COPY %edi
+ %5 = MOV32r0 implicit-def dead %eflags
+
+ bb.1.bb1:
+ successors: %bb.3.bb4(0x30000000), %bb.2.bb3(0x50000000)
+
+ ; CHECK: %0 = PHI %5, %bb.0.bb0, %3, %bb.5.bb7
+ %0 = PHI %5, %bb.0.bb0, %3, %bb.5.bb7
+ %6 = MOV32ri 1
+ TEST32rr %4, %4, implicit-def %eflags
+ JE_1 %bb.3.bb4, implicit %eflags
+ JMP_1 %bb.2.bb3
+
+ bb.2.bb3:
+ successors: %bb.3.bb4(0x80000000)
+
+ %7 = MOV32ri 2
+
+ bb.3.bb4:
+ successors: %bb.5.bb7(0x30000000), %bb.4.bb6(0x50000000)
+
+ %1 = PHI %6, %bb.1.bb1, %7, %bb.2.bb3
+ TEST32rr %1, %1, implicit-def %eflags
+ JE_1 %bb.5.bb7, implicit %eflags
+ JMP_1 %bb.4.bb6
+
+ bb.4.bb6:
+ successors: %bb.5.bb7(0x80000000)
+
+ %9 = MOV32ri 2
+
+ bb.5.bb7:
+ successors: %bb.1.bb1(0x7c000000), %bb.6.bb8(0x04000000)
+
+ %2 = PHI %6, %bb.3.bb4, %9, %bb.4.bb6
+ %10 = ADD32rr %1, %0, implicit-def dead %eflags
+ ; CHECK: %10 = ADD32rr
+ ; CHECK-SAME: %0,
+ ; CHECK-SAME: %1,
+ %3 = ADD32rr %2, killed %10, implicit-def dead %eflags
+ ; CHECK: %3 = ADD32rr
+ ; CHECK-SAME: %10,
+ ; CHECK-SAME: %2,
+ %11 = SUB32ri8 %3, 10, implicit-def %eflags
+ JL_1 %bb.1.bb1, implicit %eflags
+ JMP_1 %bb.6.bb8
+
+ bb.6.bb8:
+ %12 = MOV32r0 implicit-def dead %eflags
+ %eax = COPY %12
+ RET 0, %eax
+
+...
+---
+# Here a recurrence is formulated around %0, %11, and %3, but operands should
+# not be commuted because %0 has a use outside of recurrence. This is to
+# prevent the case of commuting operands ties the values with overlapping live
+# ranges.
+name: bar
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32, preferred-register: '' }
+ - { id: 1, class: gr32, preferred-register: '' }
+ - { id: 2, class: gr32, preferred-register: '' }
+ - { id: 3, class: gr32, preferred-register: '' }
+ - { id: 4, class: gr32, preferred-register: '' }
+ - { id: 5, class: gr64, preferred-register: '' }
+ - { id: 6, class: gr32, preferred-register: '' }
+ - { id: 7, class: gr32, preferred-register: '' }
+ - { id: 8, class: gr32, preferred-register: '' }
+ - { id: 9, class: gr32, preferred-register: '' }
+ - { id: 10, class: gr32, preferred-register: '' }
+ - { id: 11, class: gr32, preferred-register: '' }
+ - { id: 12, class: gr32, preferred-register: '' }
+ - { id: 13, class: gr32, preferred-register: '' }
+liveins:
+ - { reg: '%edi', virtual-reg: '%4' }
+ - { reg: '%rsi', virtual-reg: '%5' }
+body: |
+ bb.0.bb0:
+ successors: %bb.1.bb1(0x80000000)
+ liveins: %edi, %rsi
+
+ %5 = COPY %rsi
+ %4 = COPY %edi
+ %6 = MOV32r0 implicit-def dead %eflags
+
+ bb.1.bb1:
+ successors: %bb.3.bb4(0x30000000), %bb.2.bb3(0x50000000)
+
+ %0 = PHI %6, %bb.0.bb0, %3, %bb.5.bb7
+ ; CHECK: %0 = PHI %6, %bb.0.bb0, %3, %bb.5.bb7
+ %7 = MOV32ri 1
+ TEST32rr %4, %4, implicit-def %eflags
+ JE_1 %bb.3.bb4, implicit %eflags
+ JMP_1 %bb.2.bb3
+
+ bb.2.bb3:
+ successors: %bb.3.bb4(0x80000000)
+
+ %8 = MOV32ri 2
+
+ bb.3.bb4:
+ successors: %bb.5.bb7(0x30000000), %bb.4.bb6(0x50000000)
+
+ %1 = PHI %7, %bb.1.bb1, %8, %bb.2.bb3
+ TEST32rr %1, %1, implicit-def %eflags
+ JE_1 %bb.5.bb7, implicit %eflags
+ JMP_1 %bb.4.bb6
+
+ bb.4.bb6:
+ successors: %bb.5.bb7(0x80000000)
+
+ %10 = MOV32ri 2
+
+ bb.5.bb7:
+ successors: %bb.1.bb1(0x7c000000), %bb.6.bb8(0x04000000)
+
+ %2 = PHI %7, %bb.3.bb4, %10, %bb.4.bb6
+ %11 = ADD32rr %1, %0, implicit-def dead %eflags
+ ; CHECK: %11 = ADD32rr
+ ; CHECK-SAME: %1,
+ ; CHECK-SAME: %0,
+ MOV32mr %5, 1, _, 0, _, %0 :: (store 4 into %ir.p)
+ %3 = ADD32rr %2, killed %11, implicit-def dead %eflags
+ ; CHECK: %3 = ADD32rr
+ ; CHECK-SAME: %2,
+ ; CHECK-SAME: %11,
+ %12 = SUB32ri8 %3, 10, implicit-def %eflags
+ JL_1 %bb.1.bb1, implicit %eflags
+ JMP_1 %bb.6.bb8
+
+ bb.6.bb8:
+ %13 = MOV32r0 implicit-def dead %eflags
+ %eax = COPY %13
+ RET 0, %eax
+
+...
diff --git a/test/CodeGen/X86/sbb.ll b/test/CodeGen/X86/sbb.ll
index bc00fc7c66ad..414780b2d4e6 100644
--- a/test/CodeGen/X86/sbb.ll
+++ b/test/CodeGen/X86/sbb.ll
@@ -111,6 +111,86 @@ define i8 @i8_select_neg1_or_0_commuted_as_math(i8 %x) {
ret i8 %add
}
+; (X <u Y) ? -1 : 0 --> cmp, sbb
+
+define i32 @ult_select_neg1_or_0(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ult_select_neg1_or_0:
+; CHECK: # BB#0:
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: retq
+ %cmp = icmp ult i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; Swap the predicate and compare operands:
+; (Y >u X) ? -1 : 0 --> cmp, sbb
+
+define i32 @ugt_select_neg1_or_0(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ugt_select_neg1_or_0:
+; CHECK: # BB#0:
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: cmpl %edi, %esi
+; CHECK-NEXT: movl $-1, %eax
+; CHECK-NEXT: cmovbel %ecx, %eax
+; CHECK-NEXT: retq
+ %cmp = icmp ugt i32 %y, %x
+ %ext = sext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; Invert the predicate and effectively swap the select operands:
+; (X >=u Y) ? 0 : -1 --> (X <u Y) ? -1 : 0 --> cmp, sbb
+
+define i32 @uge_select_0_or_neg1(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: uge_select_0_or_neg1:
+; CHECK: # BB#0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: retq
+ %cmp = icmp uge i32 %x, %y
+ %ext = zext i1 %cmp to i32
+ %add = add i32 %ext, -1
+ ret i32 %add
+}
+
+; Swap the predicate and compare operands:
+; (Y <=u X) ? 0 : -1 --> (X <u Y) ? -1 : 0 --> cmp, sbb
+
+define i32 @ule_select_0_or_neg1(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ule_select_0_or_neg1:
+; CHECK: # BB#0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %edi, %esi
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: retq
+ %cmp = icmp ule i32 %y, %x
+ %ext = zext i1 %cmp to i32
+ %add = add i32 %ext, -1
+ ret i32 %add
+}
+
+; Verify that subtract with constant is the same thing.
+; (X >=u Y) ? 0 : -1 --> (X <u Y) ? -1 : 0 --> cmp, sbb
+
+define i32 @uge_select_0_or_neg1_sub(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: uge_select_0_or_neg1_sub:
+; CHECK: # BB#0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: retq
+ %cmp = icmp uge i32 %x, %y
+ %ext = zext i1 %cmp to i32
+ %sub = sub i32 %ext, 1
+ ret i32 %sub
+}
+
; Make sure we're creating nodes with the right value types. This would crash.
; https://bugs.llvm.org/show_bug.cgi?id=33560
diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll
index 2e65bd8c75c7..174a487160c7 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -279,6 +279,35 @@ define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a
ret <16 x i32> %c
}
+;FIXME: can do better with vpcompress
+define <8 x i32> @test_v16i32_1_3_5_7_9_11_13_15(<16 x i32> %v) {
+; ALL-LABEL: test_v16i32_1_3_5_7_9_11_13_15:
+; ALL: # BB#0:
+; ALL-NEXT: vextracti32x8 $1, %zmm0, %ymm1
+; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
+; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; ALL-NEXT: retq
+ %res = shufflevector <16 x i32> %v, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ ret <8 x i32> %res
+}
+
+;FIXME: can do better with vpcompress
+define <4 x i32> @test_v16i32_0_1_2_12 (<16 x i32> %v) {
+; ALL-LABEL: test_v16i32_0_1_2_12:
+; ALL: # BB#0:
+; ALL-NEXT: vpextrd $1, %xmm0, %eax
+; ALL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1
+; ALL-NEXT: vpextrd $2, %xmm0, %eax
+; ALL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; ALL-NEXT: vextracti32x4 $3, %zmm0, %xmm0
+; ALL-NEXT: vmovd %xmm0, %eax
+; ALL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
+; ALL-NEXT: vzeroupper
+; ALL-NEXT: retq
+ %res = shufflevector <16 x i32> %v, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 12>
+ ret <4 x i32> %res
+}
+
define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {
; ALL-LABEL: shuffle_v16f32_extract_256:
; ALL: # BB#0:
@@ -290,6 +319,34 @@ define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {
ret <8 x float> %v2
}
+;FIXME: can do better with vcompressp
+define <8 x float> @test_v16f32_0_1_2_3_4_6_7_10 (<16 x float> %v) {
+; ALL-LABEL: test_v16f32_0_1_2_3_4_6_7_10:
+; ALL: # BB#0:
+; ALL-NEXT: vextractf32x8 $1, %zmm0, %ymm1
+; ALL-NEXT: vmovsldup {{.*#+}} xmm1 = xmm1[0,0,2,2]
+; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,u]
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
+; ALL-NEXT: retq
+ %res = shufflevector <16 x float> %v, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 7, i32 10>
+ ret <8 x float> %res
+}
+
+;FIXME: can do better with vcompressp
+define <4 x float> @test_v16f32_0_1_3_6 (<16 x float> %v) {
+; ALL-LABEL: test_v16f32_0_1_3_6:
+; ALL: # BB#0:
+; ALL-NEXT: vextractf32x4 $1, %zmm0, %xmm1
+; ALL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,3,3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; ALL-NEXT: vzeroupper
+; ALL-NEXT: retq
+ %res = shufflevector <16 x float> %v, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 6>
+ ret <4 x float> %res
+}
+
define <16 x i32> @shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12:
; ALL: # BB#0:
diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll
index 30c8d1b2373e..d0b7e4eb205c 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -2659,3 +2659,91 @@ define <8 x double> @shuffle_v2f64_v8f64_01010101(<2 x double> %a) {
%shuffle = shufflevector <2 x double> %a, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
ret <8 x double> %shuffle
}
+
+;FIXME: compressp
+define <4 x double> @test_v8f64_2346 (<8 x double> %v) {
+; AVX512F-LABEL: test_v8f64_2346:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; AVX512F-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,2]
+; AVX512F-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: test_v8f64_2346:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX512F-32-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; AVX512F-32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,2]
+; AVX512F-32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; AVX512F-32-NEXT: retl
+ %res = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 6>
+ ret <4 x double> %res
+}
+
+;FIXME: compressp
+define <2 x double> @test_v8f64_34 (<8 x double> %v) {
+; AVX512F-LABEL: test_v8f64_34:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm1
+; AVX512F-NEXT: vextractf32x4 $1, %zmm0, %xmm0
+; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: test_v8f64_34:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1
+; AVX512F-32-NEXT: vextractf32x4 $1, %zmm0, %xmm0
+; AVX512F-32-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; AVX512F-32-NEXT: vzeroupper
+; AVX512F-32-NEXT: retl
+ %res = shufflevector <8 x double> %v, <8 x double> undef, <2 x i32> <i32 3, i32 4>
+ ret <2 x double> %res
+}
+
+; FIXME: vpcompress
+define <4 x i64> @test_v8i64_1257 (<8 x i64> %v) {
+; AVX512F-LABEL: test_v8i64_1257:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512F-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
+; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,3]
+; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: test_v8i64_1257:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512F-32-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
+; AVX512F-32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,3]
+; AVX512F-32-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX512F-32-NEXT: retl
+ %res = shufflevector <8 x i64> %v, <8 x i64> undef, <4 x i32> <i32 1, i32 2, i32 5, i32 7>
+ ret <4 x i64> %res
+}
+
+define <2 x i64> @test_v8i64_2_5 (<8 x i64> %v) {
+; AVX512F-LABEL: test_v8i64_2_5:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm0
+; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: test_v8i64_2_5:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vextracti32x4 $1, %zmm0, %xmm1
+; AVX512F-32-NEXT: vpextrd $1, %xmm1, %eax
+; AVX512F-32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; AVX512F-32-NEXT: vextracti32x4 $2, %zmm0, %xmm0
+; AVX512F-32-NEXT: vpextrd $2, %xmm0, %eax
+; AVX512F-32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; AVX512F-32-NEXT: vpextrd $3, %xmm0, %eax
+; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
+; AVX512F-32-NEXT: vzeroupper
+; AVX512F-32-NEXT: retl
+ %res = shufflevector <8 x i64> %v, <8 x i64> undef, <2 x i32> <i32 2, i32 5>
+ ret <2 x i64> %res
+}
diff --git a/test/CodeGen/X86/vector-truncate-combine.ll b/test/CodeGen/X86/vector-truncate-combine.ll
new file mode 100644
index 000000000000..1a6dac8fa6e4
--- /dev/null
+++ b/test/CodeGen/X86/vector-truncate-combine.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=x86_64-- -O2 -start-after=stack-protector -stop-before=loops %s -o - | FileCheck %s
+
+; This test verifies the fix for PR33368.
+;
+; The expected outcome of the operation is to store bytes 0 and 2 of the incoming
+; parameter into c2 (a 2 x i8 vector). DAGCombine converts shuffles into a
+; sequence of extend and subsequent truncate operations. The bug was that an extension
+; by 4 followed by a truncation by 8 was completely eliminated.
+
+; The test checks for the correct sequence of operations that results from the
+; preservation of the extend/truncate operations mentioned above (2 extend and
+; 3 truncate instructions).
+;
+; NOTE: This operation could be collapsed in to a single truncate. Once that is done
+; this test will have to be adjusted.
+
+; CHECK: PUNPCKLBWrr
+; CHECK: PUNPCKLWDrr
+; CHECK: PACKUSWBrr
+; CHECK: PACKUSWBrr
+; CHECK: PACKUSWBrr
+
+define void @test(double %vec.coerce) local_unnamed_addr {
+entry:
+ %c2 = alloca <2 x i8>, align 2
+ %0 = bitcast double %vec.coerce to <8 x i8>
+ %1 = shufflevector <8 x i8> %0, <8 x i8> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 0>
+ %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <2 x i32> <i32 3, i32 0>
+ store volatile <2 x i8> %2, <2 x i8>* %c2, align 2
+ br label %if.end
+
+if.end:
+ %3 = bitcast <2 x i8> %2 to i16
+ ret void
+}
diff --git a/test/CodeGen/X86/x86-interleaved-access.ll b/test/CodeGen/X86/x86-interleaved-access.ll
index 6047279bc6ed..1263605a6dc0 100644
--- a/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/test/CodeGen/X86/x86-interleaved-access.ll
@@ -194,6 +194,64 @@ define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, <
define void @interleaved_store_vf32_i8_stride4(<32 x i8> %x1, <32 x i8> %x2, <32 x i8> %x3, <32 x i8> %x4, <128 x i8>* %p) {
+; AVX1-LABEL: interleaved_store_vf32_i8_stride4:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm4 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm5
+; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
+; AVX1-NEXT: vandnps %ymm5, %ymm4, %ymm5
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm6 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm7 = xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm6, %ymm6
+; AVX1-NEXT: vandps %ymm4, %ymm6, %ymm6
+; AVX1-NEXT: vorps %ymm5, %ymm6, %ymm8
+; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm2[8],xmm3[8],xmm2[9],xmm3[9],xmm2[10],xmm3[10],xmm2[11],xmm3[11],xmm2[12],xmm3[12],xmm2[13],xmm3[13],xmm2[14],xmm3[14],xmm2[15],xmm3[15]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm7 = xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm6, %ymm6
+; AVX1-NEXT: vandnps %ymm6, %ymm4, %ymm6
+; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm7 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm7, %ymm5
+; AVX1-NEXT: vandps %ymm4, %ymm5, %ymm5
+; AVX1-NEXT: vorps %ymm6, %ymm5, %ymm9
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm7 = xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm5, %ymm5
+; AVX1-NEXT: vandnps %ymm5, %ymm4, %ymm5
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm7 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm7, %ymm6
+; AVX1-NEXT: vandps %ymm4, %ymm6, %ymm6
+; AVX1-NEXT: vorps %ymm5, %ymm6, %ymm5
+; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm2[8],xmm3[8],xmm2[9],xmm3[9],xmm2[10],xmm3[10],xmm2[11],xmm3[11],xmm2[12],xmm3[12],xmm2[13],xmm3[13],xmm2[14],xmm3[14],xmm2[15],xmm3[15]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT: vandnps %ymm2, %ymm4, %ymm2
+; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vmovaps %ymm0, 96(%rdi)
+; AVX1-NEXT: vmovaps %ymm5, 64(%rdi)
+; AVX1-NEXT: vmovaps %ymm9, 32(%rdi)
+; AVX1-NEXT: vmovaps %ymm8, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: interleaved_store_vf32_i8_stride4:
; AVX2: # BB#0:
; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
diff --git a/test/DebugInfo/COFF/lines-bb-start.ll b/test/DebugInfo/COFF/lines-bb-start.ll
new file mode 100644
index 000000000000..249b38d34998
--- /dev/null
+++ b/test/DebugInfo/COFF/lines-bb-start.ll
@@ -0,0 +1,97 @@
+; RUN: llc -O0 < %s | FileCheck %s
+
+source_filename = "t.c"
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc19.0.24215"
+
+@str_const = internal unnamed_addr constant [4 x i8] c"str\00", align 1
+
+declare i32 @puts(i8*)
+
+; We had a line info quality issue where the LEA for the string constant had no
+; location info, so the .cv_loc directive appeared after it. Now we have logic
+; that tries to emit the first valid location to the top of each MBB.
+
+define void @lea_str_loc(i1 %cond) !dbg !8 {
+entry:
+ br i1 %cond, label %if.then, label %if.end, !dbg !17
+
+if.then: ; preds = %entry
+ br label %return, !dbg !18
+
+if.end: ; preds = %entry
+ %call = call i32 @puts(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @str_const, i32 0, i32 0)), !dbg !19
+ br label %return, !dbg !20
+
+return: ; preds = %if.end, %if.then
+ ret void, !dbg !20
+}
+
+; The t.c:5 line marker should appear immediately after the BB label.
+
+; CHECK-LABEL: _lea_str_loc:
+; CHECK: .cv_loc {{.*}} # t.c:4:5
+; CHECK: jmp LBB{{.*}}
+; CHECK: LBB0_{{.*}}: # %if.end
+; CHECK-NEXT: .cv_loc {{.*}} # t.c:5:3
+; CHECK-NEXT: leal _str_const, %[[reg:[^ ]*]]
+; CHECK-NEXT: movl %[[reg]], (%esp)
+; CHECK-NEXT: calll _puts
+
+define void @instr_no_loc(i1 %cond) !dbg !21 {
+entry:
+ br i1 %cond, label %if.then, label %if.end, !dbg !22
+
+if.then: ; preds = %entry
+ br label %return, !dbg !23
+
+if.end: ; preds = %entry
+ call void asm sideeffect "nop", ""()
+ %call = call i32 @puts(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @str_const, i32 0, i32 0)), !dbg !24
+ br label %return, !dbg !25
+
+return: ; preds = %if.end, %if.then
+ ret void, !dbg !25
+}
+
+; CHECK-LABEL: _instr_no_loc:
+; CHECK: .cv_loc {{.*}} # t.c:4:5
+; CHECK: jmp LBB{{.*}}
+; CHECK: LBB1_{{.*}}: # %if.end
+; CHECK-NEXT: .cv_loc {{.*}} # t.c:5:3
+; CHECK-NEXT: #APP
+; CHECK-NEXT: nop
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: leal _str_const, %[[reg:[^ ]*]]
+; CHECK-NEXT: movl %[[reg]], (%esp)
+; CHECK-NEXT: calll _puts
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 ", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "t.c", directory: "C:\5Csrc\5Cllvm-project\5Cbuild", checksumkind: CSK_MD5, checksum: "b32df088e991f1996b4e4deb3855c14b")
+!2 = !{}
+!3 = !{i32 1, !"NumRegisterParameters", i32 0}
+!4 = !{i32 2, !"CodeView", i32 1}
+!5 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = !{i32 1, !"wchar_size", i32 2}
+!7 = !{!"clang version 5.0.0 "}
+!8 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 2, type: !9, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{null, !11}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!13 = !DIExpression()
+!14 = !DILocation(line: 2, column: 12, scope: !8)
+!15 = !DILocation(line: 3, column: 7, scope: !16)
+!16 = distinct !DILexicalBlock(scope: !8, file: !1, line: 3, column: 7)
+!17 = !DILocation(line: 3, column: 7, scope: !8)
+!18 = !DILocation(line: 4, column: 5, scope: !16)
+!19 = !DILocation(line: 5, column: 3, scope: !8)
+!20 = !DILocation(line: 6, column: 1, scope: !8)
+!21 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 2, type: !9, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!22 = !DILocation(line: 3, column: 7, scope: !21)
+!23 = !DILocation(line: 4, column: 5, scope: !21)
+!24 = !DILocation(line: 5, column: 3, scope: !21)
+!25 = !DILocation(line: 6, column: 1, scope: !21)
diff --git a/test/DebugInfo/COFF/local-variables.ll b/test/DebugInfo/COFF/local-variables.ll
index c0bac0d174a9..249b6e1103db 100644
--- a/test/DebugInfo/COFF/local-variables.ll
+++ b/test/DebugInfo/COFF/local-variables.ll
@@ -193,7 +193,7 @@
; OBJ: ChangeLineOffset: 1
; OBJ: ChangeCodeOffset: 0x35
; OBJ: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0xD, LineOffset: 1}
-; OBJ: ChangeCodeLength: 0xF
+; OBJ: ChangeCodeLength: 0xA
; OBJ: ]
; OBJ: }
; OBJ: Local {
diff --git a/test/DebugInfo/PDB/pdbdump-headers.test b/test/DebugInfo/PDB/pdbdump-headers.test
index 9a4544cce383..3b7895e06b77 100644
--- a/test/DebugInfo/PDB/pdbdump-headers.test
+++ b/test/DebugInfo/PDB/pdbdump-headers.test
@@ -114,11 +114,11 @@ ALL-NEXT: referent = 0x1004, mode = pointer, opts = const, kind = ptr
ALL-NEXT: 0x1006 | LF_ARGLIST [size = 12, hash = 194342]
ALL-NEXT: 0x1003: `__vc_attributes::threadingAttribute::threading_e`
ALL-NEXT: 0x1007 | LF_MFUNCTION [size = 28, hash = 254156]
-ALL-NEXT: return type = 1, # args = 0x1006, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 1, param list = 0x1006
ALL-NEXT: class type = 0x1004, this type = 0x1005, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x1008 | LF_MFUNCTION [size = 28, hash = 194536]
-ALL-NEXT: return type = 0, # args = 0x1000, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 0, param list = 0x1000
ALL-NEXT: class type = 0x1004, this type = 0x1005, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x1009 | LF_METHODLIST [size = 20, hash = 167492]
@@ -153,17 +153,17 @@ ALL-NEXT: 0x1010 | LF_ARGLIST [size = 16, hash = 134580]
ALL-NEXT: 0x100D: `__vc_attributes::event_receiverAttribute::type_e`
ALL-NEXT: 0x0030 (bool): `bool`
ALL-NEXT: 0x1011 | LF_MFUNCTION [size = 28, hash = 148190]
-ALL-NEXT: return type = 2, # args = 0x1010, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 2, param list = 0x1010
ALL-NEXT: class type = 0x100E, this type = 0x100F, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x1012 | LF_ARGLIST [size = 12, hash = 113636]
ALL-NEXT: 0x100D: `__vc_attributes::event_receiverAttribute::type_e`
ALL-NEXT: 0x1013 | LF_MFUNCTION [size = 28, hash = 53336]
-ALL-NEXT: return type = 1, # args = 0x1012, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 1, param list = 0x1012
ALL-NEXT: class type = 0x100E, this type = 0x100F, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x1014 | LF_MFUNCTION [size = 28, hash = 55779]
-ALL-NEXT: return type = 0, # args = 0x1000, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 0, param list = 0x1000
ALL-NEXT: class type = 0x100E, this type = 0x100F, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x1015 | LF_METHODLIST [size = 28, hash = 220695]
@@ -199,11 +199,11 @@ ALL-NEXT: referent = 0x101A, mode = pointer, opts = const, kind = ptr
ALL-NEXT: 0x101C | LF_ARGLIST [size = 12, hash = 159978]
ALL-NEXT: 0x1019: `__vc_attributes::aggregatableAttribute::type_e`
ALL-NEXT: 0x101D | LF_MFUNCTION [size = 28, hash = 249504]
-ALL-NEXT: return type = 1, # args = 0x101C, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 1, param list = 0x101C
ALL-NEXT: class type = 0x101A, this type = 0x101B, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x101E | LF_MFUNCTION [size = 28, hash = 141941]
-ALL-NEXT: return type = 0, # args = 0x1000, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 0, param list = 0x1000
ALL-NEXT: class type = 0x101A, this type = 0x101B, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x101F | LF_METHODLIST [size = 20, hash = 238785]
@@ -241,11 +241,11 @@ ALL-NEXT: referent = 0x1025, mode = pointer, opts = const, kind = ptr
ALL-NEXT: 0x1027 | LF_ARGLIST [size = 12, hash = 17744]
ALL-NEXT: 0x1022: `__vc_attributes::event_sourceAttribute::type_e`
ALL-NEXT: 0x1028 | LF_MFUNCTION [size = 28, hash = 239514]
-ALL-NEXT: return type = 1, # args = 0x1027, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 1, param list = 0x1027
ALL-NEXT: class type = 0x1025, this type = 0x1026, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x1029 | LF_MFUNCTION [size = 28, hash = 173189]
-ALL-NEXT: return type = 0, # args = 0x1000, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 0, param list = 0x1000
ALL-NEXT: class type = 0x1025, this type = 0x1026, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x102A | LF_METHODLIST [size = 20, hash = 130544]
@@ -303,17 +303,17 @@ ALL-NEXT: 0x0030 (bool): `bool`
ALL-NEXT: 0x1032: `const char*`
ALL-NEXT: 0x1032: `const char*`
ALL-NEXT: 0x1034 | LF_MFUNCTION [size = 28, hash = 48854]
-ALL-NEXT: return type = 15, # args = 0x1033, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 15, param list = 0x1033
ALL-NEXT: class type = 0x102F, this type = 0x1030, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x1035 | LF_ARGLIST [size = 12, hash = 170035]
ALL-NEXT: 0x102E: `__vc_attributes::moduleAttribute::type_e`
ALL-NEXT: 0x1036 | LF_MFUNCTION [size = 28, hash = 177041]
-ALL-NEXT: return type = 1, # args = 0x1035, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 1, param list = 0x1035
ALL-NEXT: class type = 0x102F, this type = 0x1030, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x1037 | LF_MFUNCTION [size = 28, hash = 102745]
-ALL-NEXT: return type = 0, # args = 0x1000, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 0, param list = 0x1000
ALL-NEXT: class type = 0x102F, this type = 0x1030, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x1038 | LF_METHODLIST [size = 28, hash = 16947]
@@ -389,7 +389,7 @@ ALL-NEXT: referent = 0x103D, mode = pointer, opts = const, kind = ptr
ALL-NEXT: 0x103F | LF_ARGLIST [size = 12, hash = 49018]
ALL-NEXT: 0x0075 (unsigned): `unsigned`
ALL-NEXT: 0x1040 | LF_MFUNCTION [size = 28, hash = 43821]
-ALL-NEXT: return type = 1, # args = 0x103F, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 1, param list = 0x103F
ALL-NEXT: class type = 0x103D, this type = 0x103E, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x1041 | LF_FIELDLIST [size = 60, hash = 202555]
@@ -422,7 +422,7 @@ ALL-NEXT: referent = 0x1045, mode = pointer, opts = const, kind = ptr
ALL-NEXT: 0x1047 | LF_ARGLIST [size = 12, hash = 103930]
ALL-NEXT: 0x1044: `__vc_attributes::helper_attributes::v1_alttypeAttribute::type_e`
ALL-NEXT: 0x1048 | LF_MFUNCTION [size = 28, hash = 110942]
-ALL-NEXT: return type = 1, # args = 0x1047, param list = 0x0003 (void)
+ALL-NEXT: return type = 0x0003 (void), # args = 1, param list = 0x1047
ALL-NEXT: class type = 0x1045, this type = 0x1046, this adjust = 0
ALL-NEXT: calling conv = thiscall, options = constructor
ALL-NEXT: 0x1049 | LF_FIELDLIST [size = 64, hash = 17991]
@@ -474,64 +474,64 @@ ALL-NEXT: TI: 0x1000, Offset: 0
ALL: Hash Adjusters:
ALL: Public Symbols
ALL-NEXT: ============================================================
-ALL-NEXT: - S_PUB32 [size = 36] `?__purecall@@3PAXA`
+ALL-NEXT: 0 | S_PUB32 [size = 36] `?__purecall@@3PAXA`
ALL-NEXT: flags = none, addr = 0003:0000
-ALL-NEXT: - S_PUB32 [size = 20] `_main`
+ALL-NEXT: 36 | S_PUB32 [size = 20] `_main`
ALL-NEXT: flags = function, addr = 0001:0016
-ALL-NEXT: - S_PROCREF [size = 20] `main`
+ALL-NEXT: 56 | S_PROCREF [size = 20] `main`
ALL-NEXT: module = 1, sum name = 0, offset = 120
-ALL-NEXT: - S_GDATA32 [size = 28] `__purecall`
+ALL-NEXT: 76 | S_GDATA32 [size = 28] `__purecall`
ALL-NEXT: type = 0x0403 (void*), addr = 0003:0000
ALL: Symbols
ALL-NEXT: ============================================================
ALL-NEXT: Mod 0000 | `d:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj`:
-ALL-NEXT: - S_OBJNAME [size = 56] sig=0, `d:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj`
-ALL-NEXT: - S_COMPILE3 [size = 60]
-ALL-NEXT: machine = intel pentium 3, Ver = Microsoft (R) Optimizing Compiler, language = c++
-ALL-NEXT: frontend = 18.0.31101.0, backend = 18.0.31101.0
-ALL-NEXT: flags = security checks
-ALL-NEXT: - S_GPROC32 [size = 44] `main`
-ALL-NEXT: parent = 0, addr = 0001:0016, code size = 10, end = 196
-ALL-NEXT: debug start = 3, debug end = 8, flags = has fp
-ALL-NEXT: - S_FRAMEPROC [size = 32]
-ALL-NEXT: size = 0, padding size = 0, offset to padding = 0
-ALL-NEXT: bytes of callee saved registers = 0, exception handler addr = 0000:0000
-ALL-NEXT: flags = has async eh | opt speed
-ALL-NEXT: - S_END [size = 4]
-ALL-NEXT: - S_BUILDINFO [size = 8] BuildId = `4110`
+ALL-NEXT: 4 | S_OBJNAME [size = 56] sig=0, `d:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj`
+ALL-NEXT: 60 | S_COMPILE3 [size = 60]
+ALL-NEXT: machine = intel pentium 3, Ver = Microsoft (R) Optimizing Compiler, language = c++
+ALL-NEXT: frontend = 18.0.31101.0, backend = 18.0.31101.0
+ALL-NEXT: flags = security checks
+ALL-NEXT: 120 | S_GPROC32 [size = 44] `main`
+ALL-NEXT: parent = 0, end = 196, addr = 0001:0016, code size = 10
+ALL-NEXT: debug start = 3, debug end = 8, flags = has fp
+ALL-NEXT: 164 | S_FRAMEPROC [size = 32]
+ALL-NEXT: size = 0, padding size = 0, offset to padding = 0
+ALL-NEXT: bytes of callee saved registers = 0, exception handler addr = 0000:0000
+ALL-NEXT: flags = has async eh | opt speed
+ALL-NEXT: 196 | S_END [size = 4]
+ALL-NEXT: 200 | S_BUILDINFO [size = 8] BuildId = `4110`
ALL-NEXT: Mod 0001 | `* Linker *`:
-ALL-NEXT: - S_OBJNAME [size = 20] sig=0, `* Linker *`
-ALL-NEXT: - S_COMPILE3 [size = 48]
-ALL-NEXT: machine = intel 80386, Ver = Microsoft (R) LINK, language = link
-ALL-NEXT: frontend = 0.0.0.0, backend = 12.0.31101.0
-ALL-NEXT: flags = none
-ALL-NEXT: - S_ENVBLOCK [size = 172]
-ALL-NEXT: - cwd
-ALL-NEXT: - d:\src\llvm\test\DebugInfo\PDB\Inputs
-ALL-NEXT: - exe
-ALL-NEXT: - C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\BIN\link.exe
-ALL-NEXT: - pdb
-ALL-NEXT: - d:\src\llvm\test\DebugInfo\PDB\Inputs\empty.pdb
-ALL-NEXT: - S_TRAMPOLINE [size = 20]
-ALL-NEXT: type = tramp incremental, size = 5, source = 0001:0005, target = 0001:0005
-ALL-NEXT: - S_SECTION [size = 28] `.text`
-ALL-NEXT: length = 4122, alignment = 12, rva = 4096, section # = 1, characteristics = 1610612768
-ALL-NEXT: - S_COFFGROUP [size = 28] `.text$mn`
-ALL-NEXT: length = 4122, addr = 0001:0000, characteristics = 1610612768
-ALL-NEXT: - S_SECTION [size = 28] `.rdata`
-ALL-NEXT: length = 690, alignment = 12, rva = 12288, section # = 2, characteristics = 1073741888
-ALL-NEXT: - S_COFFGROUP [size = 28] `.rdata`
-ALL-NEXT: length = 323, addr = 0002:0000, characteristics = 1073741888
-ALL-NEXT: - S_COFFGROUP [size = 28] `.edata`
-ALL-NEXT: length = 0, addr = 0002:0323, characteristics = 1073741888
-ALL-NEXT: - S_COFFGROUP [size = 32] `.rdata$debug`
-ALL-NEXT: length = 366, addr = 0002:0324, characteristics = 1073741888
-ALL-NEXT: - S_SECTION [size = 28] `.data`
-ALL-NEXT: length = 4, alignment = 12, rva = 16384, section # = 3, characteristics = 3221225536
-ALL-NEXT: - S_COFFGROUP [size = 24] `.bss`
-ALL-NEXT: length = 4, addr = 0003:0000, characteristics = 3221225600
-ALL-NEXT: - S_SECTION [size = 28] `.reloc`
-ALL-NEXT: length = 8, alignment = 12, rva = 20480, section # = 4, characteristics = 1107296320
+ALL-NEXT: 4 | S_OBJNAME [size = 20] sig=0, `* Linker *`
+ALL-NEXT: 24 | S_COMPILE3 [size = 48]
+ALL-NEXT: machine = intel 80386, Ver = Microsoft (R) LINK, language = link
+ALL-NEXT: frontend = 0.0.0.0, backend = 12.0.31101.0
+ALL-NEXT: flags = none
+ALL-NEXT: 72 | S_ENVBLOCK [size = 172]
+ALL-NEXT: - cwd
+ALL-NEXT: - d:\src\llvm\test\DebugInfo\PDB\Inputs
+ALL-NEXT: - exe
+ALL-NEXT: - C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\BIN\link.exe
+ALL-NEXT: - pdb
+ALL-NEXT: - d:\src\llvm\test\DebugInfo\PDB\Inputs\empty.pdb
+ALL-NEXT: 244 | S_TRAMPOLINE [size = 20]
+ALL-NEXT: type = tramp incremental, size = 5, source = 0001:0005, target = 0001:0005
+ALL-NEXT: 264 | S_SECTION [size = 28] `.text`
+ALL-NEXT: length = 4122, alignment = 12, rva = 4096, section # = 1, characteristics = 1610612768
+ALL-NEXT: 292 | S_COFFGROUP [size = 28] `.text$mn`
+ALL-NEXT: length = 4122, addr = 0001:0000, characteristics = 1610612768
+ALL-NEXT: 320 | S_SECTION [size = 28] `.rdata`
+ALL-NEXT: length = 690, alignment = 12, rva = 12288, section # = 2, characteristics = 1073741888
+ALL-NEXT: 348 | S_COFFGROUP [size = 28] `.rdata`
+ALL-NEXT: length = 323, addr = 0002:0000, characteristics = 1073741888
+ALL-NEXT: 376 | S_COFFGROUP [size = 28] `.edata`
+ALL-NEXT: length = 0, addr = 0002:0323, characteristics = 1073741888
+ALL-NEXT: 404 | S_COFFGROUP [size = 32] `.rdata$debug`
+ALL-NEXT: length = 366, addr = 0002:0324, characteristics = 1073741888
+ALL-NEXT: 436 | S_SECTION [size = 28] `.data`
+ALL-NEXT: length = 4, alignment = 12, rva = 16384, section # = 3, characteristics = 3221225536
+ALL-NEXT: 464 | S_COFFGROUP [size = 24] `.bss`
+ALL-NEXT: length = 4, addr = 0003:0000, characteristics = 3221225600
+ALL-NEXT: 488 | S_SECTION [size = 28] `.reloc`
+ALL-NEXT: length = 8, alignment = 12, rva = 20480, section # = 4, characteristics = 1107296320
ALL: Section Contributions
ALL-NEXT: ============================================================
ALL-NEXT: SC | mod = 1, 0001:0000, size = 10, data crc = 0, reloc crc = 0
diff --git a/test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test b/test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test
index d3b7ae56eaac..3903c07b027f 100644
--- a/test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test
+++ b/test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test
@@ -24,7 +24,7 @@ TPI-TYPES-NEXT: referent = 0x1003, mode = pointer, opts = None, kind
TPI-TYPES-NEXT: 0x1005 | LF_ARGLIST [size = 12]
TPI-TYPES-NEXT: 0x0074 (int): `int`
TPI-TYPES-NEXT: 0x1006 | LF_MFUNCTION [size = 28]
-TPI-TYPES-NEXT: return type = 1, # args = 0x1005, param list = 0x0003 (void)
+TPI-TYPES-NEXT: return type = 0x0003 (void), # args = 1, param list = 0x1005
TPI-TYPES-NEXT: class type = 0x1003, this type = 0x1004, this adjust = 0
TPI-TYPES-NEXT: calling conv = thiscall, options = constructor
TPI-TYPES-NEXT: 0x1007 | LF_PROCEDURE [size = 16]
diff --git a/test/DebugInfo/dwarfdump-accel.test b/test/DebugInfo/dwarfdump-accel.test
index c6a971a2b9aa..a49d024992c2 100644
--- a/test/DebugInfo/dwarfdump-accel.test
+++ b/test/DebugInfo/dwarfdump-accel.test
@@ -1,4 +1,5 @@
RUN: llvm-dwarfdump %p/Inputs/dwarfdump-objc.x86_64.o | FileCheck %s
+RUN: llvm-dwarfdump -verify %p/Inputs/dwarfdump-objc.x86_64.o | FileCheck %s --check-prefix=VERIFY
Gather some DIE indexes to verify the accelerator table contents.
CHECK: .debug_info contents
@@ -63,3 +64,7 @@ CHECK-NOT: Name
CHECK: {Atom[0]: [[READONLY]]}
CHECK: {Atom[0]: [[ASSIGN]]}
CHECK: {Atom[0]: [[SETASSIGN]]}
+
+Verify the debug info in the apple_names accelerator table.
+VERIFY: Verifying .apple_names
+VERIFY-NEXT: No errors.
diff --git a/test/Feature/optnone-opt.ll b/test/Feature/optnone-opt.ll
index efd35e566030..6410afb6be99 100644
--- a/test/Feature/optnone-opt.ll
+++ b/test/Feature/optnone-opt.ll
@@ -2,7 +2,7 @@
; RUN: opt -O1 -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-O1
; RUN: opt -O2 -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-O1 --check-prefix=OPT-O2O3
; RUN: opt -O3 -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-O1 --check-prefix=OPT-O2O3
-; RUN: opt -bb-vectorize -dce -die -gvn-hoist -loweratomic -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-MORE
+; RUN: opt -dce -die -gvn-hoist -loweratomic -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-MORE
; RUN: opt -indvars -licm -loop-deletion -loop-extract -loop-idiom -loop-instsimplify -loop-reduce -loop-reroll -loop-rotate -loop-unroll -loop-unswitch -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-LOOP
; REQUIRES: asserts
@@ -55,7 +55,6 @@ attributes #0 = { optnone noinline }
; OPT-O2O3-DAG: Skipping pass 'SLP Vectorizer'
; Additional IR passes that opt doesn't turn on by default.
-; OPT-MORE-DAG: Skipping pass 'Basic-Block Vectorization'
; OPT-MORE-DAG: Skipping pass 'Dead Code Elimination'
; OPT-MORE-DAG: Skipping pass 'Dead Instruction Elimination'
; OPT-MORE-DAG: Skipping pass 'Lower atomic intrinsics
diff --git a/test/Instrumentation/MemorySanitizer/msan_basic.ll b/test/Instrumentation/MemorySanitizer/msan_basic.ll
index 334e00dabf40..ffb239a15256 100644
--- a/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -535,8 +535,8 @@ entry:
; CHECK: ret i1
-; Check that loads of shadow have the same aligment as the original loads.
-; Check that loads of origin have the aligment of max(4, original alignment).
+; Check that loads of shadow have the same alignment as the original loads.
+; Check that loads of origin have the alignment of max(4, original alignment).
define i32 @ShadowLoadAlignmentLarge() nounwind uwtable sanitize_memory {
%y = alloca i32, align 64
diff --git a/test/LTO/Resolution/X86/Inputs/comdat-mixed-lto.ll b/test/LTO/Resolution/X86/Inputs/comdat-mixed-lto.ll
new file mode 100644
index 000000000000..0112b89f98db
--- /dev/null
+++ b/test/LTO/Resolution/X86/Inputs/comdat-mixed-lto.ll
@@ -0,0 +1,23 @@
+; ModuleID = 'comdat-mixed-lto1.o'
+source_filename = "comdat-mixed-lto1.cpp"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%"class.Test::ptr" = type { i32 }
+
+$C = comdat any
+
+@C = linkonce_odr global %"class.Test::ptr" zeroinitializer, comdat, align 4
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @__cxx_global_var_init, i8* bitcast (%"class.Test::ptr"* @C to i8*) }]
+
+define void @testglobfunc() #1 section ".text.startup" comdat($C) {
+entry:
+ ret void
+}
+
+; Function Attrs: noinline uwtable
+define internal void @__cxx_global_var_init() #1 section ".text.startup" comdat($C) {
+entry:
+ store i32 0, i32* getelementptr inbounds (%"class.Test::ptr", %"class.Test::ptr"* @C, i32 0, i32 0), align 4
+ ret void
+}
diff --git a/test/LTO/Resolution/X86/comdat-mixed-lto.ll b/test/LTO/Resolution/X86/comdat-mixed-lto.ll
new file mode 100644
index 000000000000..f6ee22e4161d
--- /dev/null
+++ b/test/LTO/Resolution/X86/comdat-mixed-lto.ll
@@ -0,0 +1,42 @@
+; Test of comdat handling with mixed thinlto and regular lto compilation.
+
+; This module is compiled with ThinLTO
+; RUN: opt -module-summary -o %t1.o %s
+; Input module compiled for regular LTO
+; RUN: opt -o %t2.o %p/Inputs/comdat-mixed-lto.ll
+
+; The copy of C from this module is prevailing. The copy of C from the
+; regular LTO module is not prevailing, and will be dropped to
+; available_externally.
+; RUN: llvm-lto2 run -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t2.o,testglobfunc,lxp -r=%t1.o,testglobfunc,lx -o %t3 %t1.o %t2.o -save-temps
+
+; The Input module (regular LTO) is %t3.0. Check to make sure that we removed
+; __cxx_global_var_init and testglobfunc from comdat. Also check to ensure
+; that testglobfunc was dropped to available_externally. Otherwise we would
+; have linker multiply defined errors as it is no longer in a comdat and
+; would clash with the copy from this module.
+; RUN: llvm-dis %t3.0.0.preopt.bc -o - | FileCheck %s
+; CHECK: define internal void @__cxx_global_var_init() section ".text.startup" {
+; CHECK: define available_externally void @testglobfunc() section ".text.startup" {
+
+; ModuleID = 'comdat-mixed-lto.o'
+source_filename = "comdat-mixed-lto.cpp"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%"class.Test::ptr" = type { i32 }
+
+$C = comdat any
+
+@C = linkonce_odr global %"class.Test::ptr" zeroinitializer, comdat, align 4
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @__cxx_global_var_init, i8* bitcast (%"class.Test::ptr"* @C to i8*) }]
+define void @testglobfunc() #1 section ".text.startup" comdat($C) {
+entry:
+ ret void
+}
+
+; Function Attrs: noinline uwtable
+define internal void @__cxx_global_var_init() #1 section ".text.startup" comdat($C) {
+entry:
+ ret void
+}
diff --git a/test/MC/AArch64/coff-basic.ll b/test/MC/AArch64/coff-basic.ll
new file mode 100644
index 000000000000..23f06b5360db
--- /dev/null
+++ b/test/MC/AArch64/coff-basic.ll
@@ -0,0 +1,8 @@
+; RUN: llc -mtriple aarch64-windows < %s | FileCheck %s
+
+define i32 @foo() {
+entry:
+ ret i32 1
+}
+
+; CHECK: .globl foo
diff --git a/test/MC/AMDGPU/code-object-metadata-kernel-args.s b/test/MC/AMDGPU/code-object-metadata-kernel-args.s
index 90915e61f99a..46cf4f506a5c 100644
--- a/test/MC/AMDGPU/code-object-metadata-kernel-args.s
+++ b/test/MC/AMDGPU/code-object-metadata-kernel-args.s
@@ -4,7 +4,9 @@
// CHECK: .amdgpu_code_object_metadata
// CHECK: Version: [ 1, 0 ]
-// CHECK: Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
+// CHECK: Printf:
+// CHECK: - '1:1:4:%d\n'
+// CHECK: - '2:1:8:%g\n'
// CHECK: Kernels:
// CHECK: - Name: test_kernel
// CHECK: Language: OpenCL C
diff --git a/test/MC/AMDGPU/code-object-metadata-kernel-attrs.s b/test/MC/AMDGPU/code-object-metadata-kernel-attrs.s
index 9669fcf53939..7884b6672e7e 100644
--- a/test/MC/AMDGPU/code-object-metadata-kernel-attrs.s
+++ b/test/MC/AMDGPU/code-object-metadata-kernel-attrs.s
@@ -4,7 +4,9 @@
// CHECK: .amdgpu_code_object_metadata
// CHECK: Version: [ 1, 0 ]
-// CHECK: Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
+// CHECK: Printf:
+// CHECK: - '1:1:4:%d\n'
+// CHECK: - '2:1:8:%g\n'
// CHECK: Kernels:
// CHECK: - Name: test_kernel
// CHECK: Language: OpenCL C
diff --git a/test/MC/AVR/out-of-range-fixups/adiw-fail.s b/test/MC/AVR/out-of-range-fixups/adiw-fail.s
deleted file mode 100644
index ab734695c9c5..000000000000
--- a/test/MC/AVR/out-of-range-fixups/adiw-fail.s
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: not llvm-mc -triple avr -mattr=avr6 -filetype=obj < %s 2>&1 | FileCheck %s
-
-; CHECK: error: out of range immediate (expected an integer in the range 0 to 63)
-adiw r24, foo+64
-
diff --git a/test/MC/AVR/out-of-range-fixups/in-fail.s b/test/MC/AVR/out-of-range-fixups/in-fail.s
deleted file mode 100644
index b929ead4c5a5..000000000000
--- a/test/MC/AVR/out-of-range-fixups/in-fail.s
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: not llvm-mc -triple avr -mattr=avr6 -filetype=obj < %s 2>&1 | FileCheck %s
-
-; CHECK: error: out of range port number (expected an integer in the range 0 to 63)
-in r3, foo+64
-
diff --git a/test/MC/AVR/out-of-range-fixups/lds-fail.s b/test/MC/AVR/out-of-range-fixups/lds-fail.s
deleted file mode 100644
index e28ad3e861bc..000000000000
--- a/test/MC/AVR/out-of-range-fixups/lds-fail.s
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: not llvm-mc -triple avr -mattr=avr6 -filetype=obj < %s 2>&1 | FileCheck %s
-
-; CHECK: error: out of range port number (expected an integer in the range 0 to 65535)
-lds r2, foo+65536
-
diff --git a/test/MC/AVR/out-of-range-fixups/sbi-fail.s b/test/MC/AVR/out-of-range-fixups/sbi-fail.s
deleted file mode 100644
index 4f23faacd61f..000000000000
--- a/test/MC/AVR/out-of-range-fixups/sbi-fail.s
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: not llvm-mc -triple avr -mattr=avr6 -filetype=obj < %s 2>&1 | FileCheck %s
-
-; CHECK: error: out of range port number (expected an integer in the range 0 to 31)
-sbi foo+32, 1
-
diff --git a/test/MC/Disassembler/SystemZ/insns.txt b/test/MC/Disassembler/SystemZ/insns.txt
index 75f7f9669b5c..2ca19363c46b 100644
--- a/test/MC/Disassembler/SystemZ/insns.txt
+++ b/test/MC/Disassembler/SystemZ/insns.txt
@@ -406,6 +406,36 @@
# CHECK: ah %r15, 0
0x4a 0xf0 0x00 0x00
+# CHECK: ahhhr %r0, %r0, %r0
+0xb9 0xc8 0x00 0x00
+
+# CHECK: ahhhr %r0, %r0, %r15
+0xb9 0xc8 0xf0 0x00
+
+# CHECK: ahhhr %r0, %r15, %r0
+0xb9 0xc8 0x00 0x0f
+
+# CHECK: ahhhr %r15, %r0, %r0
+0xb9 0xc8 0x00 0xf0
+
+# CHECK: ahhhr %r7, %r8, %r9
+0xb9 0xc8 0x90 0x78
+
+# CHECK: ahhlr %r0, %r0, %r0
+0xb9 0xd8 0x00 0x00
+
+# CHECK: ahhlr %r0, %r0, %r15
+0xb9 0xd8 0xf0 0x00
+
+# CHECK: ahhlr %r0, %r15, %r0
+0xb9 0xd8 0x00 0x0f
+
+# CHECK: ahhlr %r15, %r0, %r0
+0xb9 0xd8 0x00 0xf0
+
+# CHECK: ahhlr %r7, %r8, %r9
+0xb9 0xd8 0x90 0x78
+
# CHECK: ahi %r0, -32768
0xa7 0x0a 0x80 0x00
@@ -754,6 +784,36 @@
# CHECK: algsi 524287(%r15), 42
0xeb 0x2a 0xff 0xff 0x7f 0x7e
+# CHECK: alhhhr %r0, %r0, %r0
+0xb9 0xca 0x00 0x00
+
+# CHECK: alhhhr %r0, %r0, %r15
+0xb9 0xca 0xf0 0x00
+
+# CHECK: alhhhr %r0, %r15, %r0
+0xb9 0xca 0x00 0x0f
+
+# CHECK: alhhhr %r15, %r0, %r0
+0xb9 0xca 0x00 0xf0
+
+# CHECK: alhhhr %r7, %r8, %r9
+0xb9 0xca 0x90 0x78
+
+# CHECK: alhhlr %r0, %r0, %r0
+0xb9 0xda 0x00 0x00
+
+# CHECK: alhhlr %r0, %r0, %r15
+0xb9 0xda 0xf0 0x00
+
+# CHECK: alhhlr %r0, %r15, %r0
+0xb9 0xda 0x00 0x0f
+
+# CHECK: alhhlr %r15, %r0, %r0
+0xb9 0xda 0x00 0xf0
+
+# CHECK: alhhlr %r7, %r8, %r9
+0xb9 0xda 0x90 0x78
+
# CHECK: alhsik %r0, %r1, -32768
0xec 0x01 0x80 0x00 0x00 0xda
@@ -826,6 +886,42 @@
# CHECK: alsi 524287(%r15), 42
0xeb 0x2a 0xff 0xff 0x7f 0x6e
+# CHECK: alsih %r0, -2147483648
+0xcc 0x0a 0x80 0x00 0x00 0x00
+
+# CHECK: alsih %r0, -1
+0xcc 0x0a 0xff 0xff 0xff 0xff
+
+# CHECK: alsih %r0, 0
+0xcc 0x0a 0x00 0x00 0x00 0x00
+
+# CHECK: alsih %r0, 1
+0xcc 0x0a 0x00 0x00 0x00 0x01
+
+# CHECK: alsih %r0, 2147483647
+0xcc 0x0a 0x7f 0xff 0xff 0xff
+
+# CHECK: alsih %r15, 0
+0xcc 0xfa 0x00 0x00 0x00 0x00
+
+# CHECK: alsihn %r0, -2147483648
+0xcc 0x0b 0x80 0x00 0x00 0x00
+
+# CHECK: alsihn %r0, -1
+0xcc 0x0b 0xff 0xff 0xff 0xff
+
+# CHECK: alsihn %r0, 0
+0xcc 0x0b 0x00 0x00 0x00 0x00
+
+# CHECK: alsihn %r0, 1
+0xcc 0x0b 0x00 0x00 0x00 0x01
+
+# CHECK: alsihn %r0, 2147483647
+0xcc 0x0b 0x7f 0xff 0xff 0xff
+
+# CHECK: alsihn %r15, 0
+0xcc 0xfb 0x00 0x00 0x00 0x00
+
# CHECK: aly %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x5e
@@ -1126,6 +1222,18 @@
# CHECK: b 4095(%r15,%r1)
0x47 0xff 0x1f 0xff
+# CHECK: bakr %r0, %r0
+0xb2 0x40 0x00 0x00
+
+# CHECK: bakr %r0, %r15
+0xb2 0x40 0x00 0x0f
+
+# CHECK: bakr %r15, %r0
+0xb2 0x40 0x00 0xf0
+
+# CHECK: bakr %r7, %r8
+0xb2 0x40 0x00 0x78
+
# CHECK: bal %r0, 0
0x45 0x00 0x00 0x00
@@ -1387,6 +1495,30 @@
# CHECK: bctr %r15, %r9
0x06 0xf9
+# CHECK: bsa %r0, %r0
+0xb2 0x5a 0x00 0x00
+
+# CHECK: bsa %r0, %r15
+0xb2 0x5a 0x00 0x0f
+
+# CHECK: bsa %r15, %r0
+0xb2 0x5a 0x00 0xf0
+
+# CHECK: bsa %r7, %r8
+0xb2 0x5a 0x00 0x78
+
+# CHECK: bsg %r0, %r0
+0xb2 0x58 0x00 0x00
+
+# CHECK: bsg %r0, %r15
+0xb2 0x58 0x00 0x0f
+
+# CHECK: bsg %r15, %r0
+0xb2 0x58 0x00 0xf0
+
+# CHECK: bsg %r7, %r8
+0xb2 0x58 0x00 0x78
+
# CHECK: bsm %r0, %r1
0x0b 0x01
@@ -3058,6 +3190,18 @@
# CHECK: chf %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0xcd
+# CHECK: chhr %r0, %r0
+0xb9 0xcd 0x00 0x00
+
+# CHECK: chhr %r0, %r15
+0xb9 0xcd 0x00 0x0f
+
+# CHECK: chhr %r15, %r0
+0xb9 0xcd 0x00 0xf0
+
+# CHECK: chhr %r7, %r8
+0xb9 0xcd 0x00 0x78
+
# CHECK: chhsi 0, 0
0xe5 0x54 0x00 0x00 0x00 0x00
@@ -3109,6 +3253,18 @@
# CHECK: chi %r15, 0
0xa7 0xfe 0x00 0x00
+# CHECK: chlr %r0, %r0
+0xb9 0xdd 0x00 0x00
+
+# CHECK: chlr %r0, %r15
+0xb9 0xdd 0x00 0x0f
+
+# CHECK: chlr %r15, %r0
+0xb9 0xdd 0x00 0xf0
+
+# CHECK: chlr %r7, %r8
+0xb9 0xdd 0x00 0x78
+
# CHECK: chsi 0, 0
0xe5 0x5c 0x00 0x00 0x00 0x00
@@ -3940,6 +4096,18 @@
# CHECK: clhf %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0xcf
+# CHECK: clhhr %r0, %r0
+0xb9 0xcf 0x00 0x00
+
+# CHECK: clhhr %r0, %r15
+0xb9 0xcf 0x00 0x0f
+
+# CHECK: clhhr %r15, %r0
+0xb9 0xcf 0x00 0xf0
+
+# CHECK: clhhr %r7, %r8
+0xb9 0xcf 0x00 0x78
+
# CHECK: clhhsi 0, 0
0xe5 0x55 0x00 0x00 0x00 0x00
@@ -3961,6 +4129,18 @@
# CHECK: clhhsi 4095(%r15), 42
0xe5 0x55 0xff 0xff 0x00 0x2a
+# CHECK: clhlr %r0, %r0
+0xb9 0xdf 0x00 0x00
+
+# CHECK: clhlr %r0, %r15
+0xb9 0xdf 0x00 0x0f
+
+# CHECK: clhlr %r15, %r0
+0xb9 0xdf 0x00 0xf0
+
+# CHECK: clhlr %r7, %r8
+0xb9 0xdf 0x00 0x78
+
# CHECK: cli 0, 0
0x95 0x00 0x00 0x00
@@ -4582,6 +4762,24 @@
# CHECK: crb %r0, %r0, 15, 0
0xec 0x00 0x00 0x00 0xf0 0xf6
+# CHECK: crdte %r0, %r0, %r0
+0xb9 0x8f 0x00 0x00
+
+# CHECK: crdte %r0, %r0, %r14
+0xb9 0x8f 0x00 0x0e
+
+# CHECK: crdte %r0, %r15, %r0
+0xb9 0x8f 0xf0 0x00
+
+# CHECK: crdte %r14, %r0, %r0
+0xb9 0x8f 0x00 0xe0
+
+# CHECK: crdte %r0, %r0, %r0, 15
+0xb9 0x8f 0x0f 0x00
+
+# CHECK: crdte %r4, %r5, %r6, 7
+0xb9 0x8f 0x57 0x46
+
# CHECK: crth %r0, %r1
0xb9 0x72 0x20 0x01
@@ -4624,6 +4822,9 @@
# CHECK: cs %r15, %r0, 0
0xba 0xf0 0x00 0x00
+# CHECK: csch
+0xb2 0x30 0x00 0x00
+
# CHECK: csdtr %r0, %f0, 0
0xb3 0xe3 0x00 0x00
@@ -4672,6 +4873,30 @@
# CHECK: csg %r15, %r0, 0
0xeb 0xf0 0x00 0x00 0x00 0x30
+# CHECK: csp %r0, %r0
+0xb2 0x50 0x00 0x00
+
+# CHECK: csp %r0, %r15
+0xb2 0x50 0x00 0x0f
+
+# CHECK: csp %r14, %r0
+0xb2 0x50 0x00 0xe0
+
+# CHECK: csp %r6, %r8
+0xb2 0x50 0x00 0x68
+
+# CHECK: cspg %r0, %r0
+0xb9 0x8a 0x00 0x00
+
+# CHECK: cspg %r0, %r15
+0xb9 0x8a 0x00 0x0f
+
+# CHECK: cspg %r14, %r0
+0xb9 0x8a 0x00 0xe0
+
+# CHECK: cspg %r6, %r8
+0xb9 0x8a 0x00 0x68
+
# CHECK: csst 0, 0, %r0
0xc8 0x02 0x00 0x00 0x00 0x00
@@ -5623,6 +5848,36 @@
# CHECK: der %f15, %f0
0x3d 0xf0
+# CHECK: diag %r0, %r0, 0
+0x83 0x00 0x00 0x00
+
+# CHECK: diag %r0, %r15, 0
+0x83 0x0f 0x00 0x00
+
+# CHECK: diag %r14, %r15, 0
+0x83 0xef 0x00 0x00
+
+# CHECK: diag %r15, %r15, 0
+0x83 0xff 0x00 0x00
+
+# CHECK: diag %r0, %r0, 4095
+0x83 0x00 0x0f 0xff
+
+# CHECK: diag %r0, %r0, 1
+0x83 0x00 0x00 0x01
+
+# CHECK: diag %r0, %r0, 0(%r1)
+0x83 0x00 0x10 0x00
+
+# CHECK: diag %r0, %r0, 0(%r15)
+0x83 0x00 0xf0 0x00
+
+# CHECK: diag %r0, %r0, 4095(%r1)
+0x83 0x00 0x1f 0xff
+
+# CHECK: diag %r0, %r0, 4095(%r15)
+0x83 0x00 0xff 0xff
+
# CHECK: didbr %f0, %f0, %f0, 1
0xb3 0x5b 0x01 0x00
@@ -5992,6 +6247,30 @@
# CHECK: ecag %r0, %r0, 524287(%r15)
0xeb 0x00 0xff 0xff 0x7f 0x4c
+# CHECK: ecctr %r0, %r0
+0xb2 0xe4 0x00 0x00
+
+# CHECK: ecctr %r0, %r15
+0xb2 0xe4 0x00 0x0f
+
+# CHECK: ecctr %r15, %r0
+0xb2 0xe4 0x00 0xf0
+
+# CHECK: ecctr %r7, %r8
+0xb2 0xe4 0x00 0x78
+
+# CHECK: ecpga %r0, %r0
+0xb2 0xed 0x00 0x00
+
+# CHECK: ecpga %r0, %r15
+0xb2 0xed 0x00 0x0f
+
+# CHECK: ecpga %r15, %r0
+0xb2 0xed 0x00 0xf0
+
+# CHECK: ecpga %r7, %r8
+0xb2 0xed 0x00 0x78
+
# CHECK: ectg 0, 0, %r0
0xc8 0x01 0x00 0x00 0x00 0x00
@@ -6118,6 +6397,36 @@
# CHECK: efpc %r15
0xb3 0x8c 0x00 0xf0
+# CHECK: epar %r0
+0xb2 0x26 0x00 0x00
+
+# CHECK: epar %r1
+0xb2 0x26 0x00 0x10
+
+# CHECK: epar %r15
+0xb2 0x26 0x00 0xf0
+
+# CHECK: epair %r0
+0xb9 0x9a 0x00 0x00
+
+# CHECK: epair %r1
+0xb9 0x9a 0x00 0x10
+
+# CHECK: epair %r15
+0xb9 0x9a 0x00 0xf0
+
+# CHECK: epctr %r0, %r0
+0xb2 0xe5 0x00 0x00
+
+# CHECK: epctr %r0, %r15
+0xb2 0xe5 0x00 0x0f
+
+# CHECK: epctr %r15, %r0
+0xb2 0xe5 0x00 0xf0
+
+# CHECK: epctr %r7, %r8
+0xb2 0xe5 0x00 0x78
+
# CHECK: epsw %r0, %r0
0xb9 0x8d 0x00 0x00
@@ -6130,6 +6439,48 @@
# CHECK: epsw %r6, %r8
0xb9 0x8d 0x00 0x68
+# CHECK: ereg %r0, %r0
+0xb2 0x49 0x00 0x00
+
+# CHECK: ereg %r0, %r15
+0xb2 0x49 0x00 0x0f
+
+# CHECK: ereg %r15, %r0
+0xb2 0x49 0x00 0xf0
+
+# CHECK: ereg %r7, %r8
+0xb2 0x49 0x00 0x78
+
+# CHECK: eregg %r0, %r0
+0xb9 0x0e 0x00 0x00
+
+# CHECK: eregg %r0, %r15
+0xb9 0x0e 0x00 0x0f
+
+# CHECK: eregg %r15, %r0
+0xb9 0x0e 0x00 0xf0
+
+# CHECK: eregg %r7, %r8
+0xb9 0x0e 0x00 0x78
+
+# CHECK: esar %r0
+0xb2 0x27 0x00 0x00
+
+# CHECK: esar %r1
+0xb2 0x27 0x00 0x10
+
+# CHECK: esar %r15
+0xb2 0x27 0x00 0xf0
+
+# CHECK: esair %r0
+0xb9 0x9b 0x00 0x00
+
+# CHECK: esair %r1
+0xb9 0x9b 0x00 0x10
+
+# CHECK: esair %r15
+0xb9 0x9b 0x00 0xf0
+
# CHECK: esdtr %f0, %f9
0xb3 0xe7 0x00 0x09
@@ -6142,6 +6493,27 @@
# CHECK: esdtr %f15, %f9
0xb3 0xe7 0x00 0xf9
+# CHECK: esea %r0
+0xb9 0x9d 0x00 0x00
+
+# CHECK: esea %r1
+0xb9 0x9d 0x00 0x10
+
+# CHECK: esea %r15
+0xb9 0x9d 0x00 0xf0
+
+# CHECK: esta %r0, %r0
+0xb2 0x4a 0x00 0x00
+
+# CHECK: esta %r0, %r15
+0xb2 0x4a 0x00 0x0f
+
+# CHECK: esta %r14, %r0
+0xb2 0x4a 0x00 0xe0
+
+# CHECK: esta %r6, %r8
+0xb2 0x4a 0x00 0x68
+
# CHECK: esxtr %f0, %f8
0xb3 0xef 0x00 0x08
@@ -6391,6 +6763,18 @@
# CHECK: her %f15, %f0
0x34 0xf0
+# CHECK: hsch
+0xb2 0x31 0x00 0x00
+
+# CHECK: iac %r0
+0xb2 0x24 0x00 0x00
+
+# CHECK: iac %r1
+0xb2 0x24 0x00 0x10
+
+# CHECK: iac %r15
+0xb2 0x24 0x00 0xf0
+
# CHECK: ic %r0, 0
0x43 0x00 0x00 0x00
@@ -6523,6 +6907,24 @@
# CHECK: icy %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x73
+# CHECK: idte %r0, %r0, %r0
+0xb9 0x8e 0x00 0x00
+
+# CHECK: idte %r0, %r0, %r15
+0xb9 0x8e 0x00 0x0f
+
+# CHECK: idte %r0, %r15, %r0
+0xb9 0x8e 0xf0 0x00
+
+# CHECK: idte %r15, %r0, %r0
+0xb9 0x8e 0x00 0xf0
+
+# CHECK: idte %r0, %r0, %r0, 15
+0xb9 0x8e 0x0f 0x00
+
+# CHECK: idte %r4, %r5, %r6, 7
+0xb9 0x8e 0x57 0x46
+
# CHECK: iedtr %f0, %f0, %f0
0xb3 0xf6 0x00 0x00
@@ -6625,6 +7027,9 @@
# CHECK: iill %r15, 0
0xa5 0xf3 0x00 0x00
+# CHECK: ipk
+0xb2 0x0b 0x00 0x00
+
# CHECK: ipm %r0
0xb2 0x22 0x00 0x00
@@ -6634,6 +7039,48 @@
# CHECK: ipm %r15
0xb2 0x22 0x00 0xf0
+# CHECK: ipte %r0, %r0
+0xb2 0x21 0x00 0x00
+
+# CHECK: ipte %r0, %r15
+0xb2 0x21 0x00 0x0f
+
+# CHECK: ipte %r15, %r0
+0xb2 0x21 0x00 0xf0
+
+# CHECK: ipte %r0, %r0, %r15
+0xb2 0x21 0xf0 0x00
+
+# CHECK: ipte %r0, %r0, %r0, 15
+0xb2 0x21 0x0f 0x00
+
+# CHECK: ipte %r7, %r8, %r9, 10
+0xb2 0x21 0x9a 0x78
+
+# CHECK: iske %r0, %r0
+0xb2 0x29 0x00 0x00
+
+# CHECK: iske %r0, %r15
+0xb2 0x29 0x00 0x0f
+
+# CHECK: iske %r15, %r0
+0xb2 0x29 0x00 0xf0
+
+# CHECK: iske %r7, %r8
+0xb2 0x29 0x00 0x78
+
+# CHECK: ivsk %r0, %r0
+0xb2 0x23 0x00 0x00
+
+# CHECK: ivsk %r0, %r15
+0xb2 0x23 0x00 0x0f
+
+# CHECK: ivsk %r15, %r0
+0xb2 0x23 0x00 0xf0
+
+# CHECK: ivsk %r7, %r8
+0xb2 0x23 0x00 0x78
+
# CHECK: kdb %f0, 0
0xed 0x00 0x00 0x00 0x00 0x18
@@ -7258,6 +7705,36 @@
# CHECK: laog %r15, %r0, 0
0xeb 0xf0 0x00 0x00 0x00 0xe6
+# CHECK: lasp 0, 0
+0xe5 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: lasp 0(%r1), 0(%r2)
+0xe5 0x00 0x10 0x00 0x20 0x00
+
+# CHECK: lasp 160(%r1), 320(%r15)
+0xe5 0x00 0x10 0xa0 0xf1 0x40
+
+# CHECK: lasp 0(%r1), 4095
+0xe5 0x00 0x10 0x00 0x0f 0xff
+
+# CHECK: lasp 0(%r1), 4095(%r2)
+0xe5 0x00 0x10 0x00 0x2f 0xff
+
+# CHECK: lasp 0(%r1), 4095(%r15)
+0xe5 0x00 0x10 0x00 0xff 0xff
+
+# CHECK: lasp 0(%r1), 0
+0xe5 0x00 0x10 0x00 0x00 0x00
+
+# CHECK: lasp 0(%r15), 0
+0xe5 0x00 0xf0 0x00 0x00 0x00
+
+# CHECK: lasp 4095(%r1), 0
+0xe5 0x00 0x1f 0xff 0x00 0x00
+
+# CHECK: lasp 4095(%r15), 0
+0xe5 0x00 0xff 0xff 0x00 0x00
+
# CHECK: lat %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x9f
@@ -7453,6 +7930,24 @@
# CHECK: lbr %r15, %r0
0xb9 0x26 0x00 0xf0
+# CHECK: lcctl 0
+0xb2 0x84 0x00 0x00
+
+# CHECK: lcctl 0(%r1)
+0xb2 0x84 0x10 0x00
+
+# CHECK: lcctl 0(%r15)
+0xb2 0x84 0xf0 0x00
+
+# CHECK: lcctl 4095
+0xb2 0x84 0x0f 0xff
+
+# CHECK: lcctl 4095(%r1)
+0xb2 0x84 0x1f 0xff
+
+# CHECK: lcctl 4095(%r15)
+0xb2 0x84 0xff 0xff
+
# CHECK: lcdbr %f0, %f9
0xb3 0x13 0x00 0x09
@@ -7536,6 +8031,75 @@
# CHECK: lcr %r7, %r8
0x13 0x78
+# CHECK: lctl %c0, %c0, 0
+0xb7 0x00 0x00 0x00
+
+# CHECK: lctl %c0, %c15, 0
+0xb7 0x0f 0x00 0x00
+
+# CHECK: lctl %c14, %c15, 0
+0xb7 0xef 0x00 0x00
+
+# CHECK: lctl %c15, %c15, 0
+0xb7 0xff 0x00 0x00
+
+# CHECK: lctl %c0, %c0, 4095
+0xb7 0x00 0x0f 0xff
+
+# CHECK: lctl %c0, %c0, 1
+0xb7 0x00 0x00 0x01
+
+# CHECK: lctl %c0, %c0, 0(%r1)
+0xb7 0x00 0x10 0x00
+
+# CHECK: lctl %c0, %c0, 0(%r15)
+0xb7 0x00 0xf0 0x00
+
+# CHECK: lctl %c0, %c0, 4095(%r1)
+0xb7 0x00 0x1f 0xff
+
+# CHECK: lctl %c0, %c0, 4095(%r15)
+0xb7 0x00 0xff 0xff
+
+# CHECK: lctlg %c0, %c0, 0
+0xeb 0x00 0x00 0x00 0x00 0x2f
+
+# CHECK: lctlg %c0, %c15, 0
+0xeb 0x0f 0x00 0x00 0x00 0x2f
+
+# CHECK: lctlg %c14, %c15, 0
+0xeb 0xef 0x00 0x00 0x00 0x2f
+
+# CHECK: lctlg %c15, %c15, 0
+0xeb 0xff 0x00 0x00 0x00 0x2f
+
+# CHECK: lctlg %c0, %c0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x2f
+
+# CHECK: lctlg %c0, %c0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x2f
+
+# CHECK: lctlg %c0, %c0, 0
+0xeb 0x00 0x00 0x00 0x00 0x2f
+
+# CHECK: lctlg %c0, %c0, 1
+0xeb 0x00 0x00 0x01 0x00 0x2f
+
+# CHECK: lctlg %c0, %c0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x2f
+
+# CHECK: lctlg %c0, %c0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x2f
+
+# CHECK: lctlg %c0, %c0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x2f
+
+# CHECK: lctlg %c0, %c0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x2f
+
+# CHECK: lctlg %c0, %c0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x2f
+
# CHECK: lcxbr %f0, %f8
0xb3 0x43 0x00 0x08
@@ -9282,6 +9846,24 @@
# CHECK: locgr %r11, %r3, 15
0xb9 0xe2 0xf0 0xb3
+# CHECK: lpctl 0
+0xb2 0x85 0x00 0x00
+
+# CHECK: lpctl 0(%r1)
+0xb2 0x85 0x10 0x00
+
+# CHECK: lpctl 0(%r15)
+0xb2 0x85 0xf0 0x00
+
+# CHECK: lpctl 4095
+0xb2 0x85 0x0f 0xff
+
+# CHECK: lpctl 4095(%r1)
+0xb2 0x85 0x1f 0xff
+
+# CHECK: lpctl 4095(%r15)
+0xb2 0x85 0xff 0xff
+
# CHECK: lpd %r0, 0, 0
0xc8 0x04 0x00 0x00 0x00 0x00
@@ -9396,6 +9978,24 @@
# CHECK: lpgr %r7, %r8
0xb9 0x00 0x00 0x78
+# CHECK: lpp 0
+0xb2 0x80 0x00 0x00
+
+# CHECK: lpp 0(%r1)
+0xb2 0x80 0x10 0x00
+
+# CHECK: lpp 0(%r15)
+0xb2 0x80 0xf0 0x00
+
+# CHECK: lpp 4095
+0xb2 0x80 0x0f 0xff
+
+# CHECK: lpp 4095(%r1)
+0xb2 0x80 0x1f 0xff
+
+# CHECK: lpp 4095(%r15)
+0xb2 0x80 0xff 0xff
+
# CHECK: lpq %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x8f
@@ -9438,6 +10038,60 @@
# CHECK: lpr %r7, %r8
0x10 0x78
+# CHECK: lpsw 0
+0x82 0x00 0x00 0x00
+
+# CHECK: lpsw 0(%r1)
+0x82 0x00 0x10 0x00
+
+# CHECK: lpsw 0(%r15)
+0x82 0x00 0xf0 0x00
+
+# CHECK: lpsw 4095
+0x82 0x00 0x0f 0xff
+
+# CHECK: lpsw 4095(%r1)
+0x82 0x00 0x1f 0xff
+
+# CHECK: lpsw 4095(%r15)
+0x82 0x00 0xff 0xff
+
+# CHECK: lpswe 0
+0xb2 0xb2 0x00 0x00
+
+# CHECK: lpswe 0(%r1)
+0xb2 0xb2 0x10 0x00
+
+# CHECK: lpswe 0(%r15)
+0xb2 0xb2 0xf0 0x00
+
+# CHECK: lpswe 4095
+0xb2 0xb2 0x0f 0xff
+
+# CHECK: lpswe 4095(%r1)
+0xb2 0xb2 0x1f 0xff
+
+# CHECK: lpswe 4095(%r15)
+0xb2 0xb2 0xff 0xff
+
+# CHECK: lptea %r0, %r0, %r0, 0
+0xb9 0xaa 0x00 0x00
+
+# CHECK: lptea %r0, %r0, %r0, 15
+0xb9 0xaa 0x0f 0x00
+
+# CHECK: lptea %r0, %r0, %r15, 0
+0xb9 0xaa 0x00 0x0f
+
+# CHECK: lptea %r0, %r15, %r0, 0
+0xb9 0xaa 0xf0 0x00
+
+# CHECK: lptea %r4, %r5, %r6, 7
+0xb9 0xaa 0x57 0x46
+
+# CHECK: lptea %r15, %r0, %r0, 0
+0xb9 0xaa 0x00 0xf0
+
# CHECK: lpxbr %f0, %f8
0xb3 0x40 0x00 0x08
@@ -9474,6 +10128,87 @@
# CHECK: lr %r15, %r9
0x18 0xf9
+# CHECK: lra %r0, 0
+0xb1 0x00 0x00 0x00
+
+# CHECK: lra %r0, 4095
+0xb1 0x00 0x0f 0xff
+
+# CHECK: lra %r0, 0(%r1)
+0xb1 0x00 0x10 0x00
+
+# CHECK: lra %r0, 0(%r15)
+0xb1 0x00 0xf0 0x00
+
+# CHECK: lra %r0, 4095(%r1,%r15)
+0xb1 0x01 0xff 0xff
+
+# CHECK: lra %r0, 4095(%r15,%r1)
+0xb1 0x0f 0x1f 0xff
+
+# CHECK: lra %r15, 0
+0xb1 0xf0 0x00 0x00
+
+# CHECK: lrag %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x03
+
+# CHECK: lrag %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x03
+
+# CHECK: lrag %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x03
+
+# CHECK: lrag %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x03
+
+# CHECK: lrag %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x03
+
+# CHECK: lrag %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x03
+
+# CHECK: lrag %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x03
+
+# CHECK: lrag %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x03
+
+# CHECK: lrag %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x03
+
+# CHECK: lrag %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x03
+
+# CHECK: lray %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x13
+
+# CHECK: lray %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x13
+
+# CHECK: lray %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x13
+
+# CHECK: lray %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x13
+
+# CHECK: lray %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x13
+
+# CHECK: lray %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x13
+
+# CHECK: lray %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x13
+
+# CHECK: lray %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x13
+
+# CHECK: lray %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x13
+
+# CHECK: lray %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x13
+
# CHECK: lrv %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x1e
@@ -9594,6 +10329,24 @@
# CHECK: lrvr %r15, %r15
0xb9 0x1f 0x00 0xff
+# CHECK: lsctl 0
+0xb2 0x87 0x00 0x00
+
+# CHECK: lsctl 0(%r1)
+0xb2 0x87 0x10 0x00
+
+# CHECK: lsctl 0(%r15)
+0xb2 0x87 0xf0 0x00
+
+# CHECK: lsctl 4095
+0xb2 0x87 0x0f 0xff
+
+# CHECK: lsctl 4095(%r1)
+0xb2 0x87 0x1f 0xff
+
+# CHECK: lsctl 4095(%r15)
+0xb2 0x87 0xff 0xff
+
# CHECK: lt %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x12
@@ -9816,6 +10569,30 @@
# CHECK: ltxtr %f13, %f9
0xb3 0xde 0x00 0xd9
+# CHECK: lura %r0, %r0
+0xb2 0x4b 0x00 0x00
+
+# CHECK: lura %r0, %r15
+0xb2 0x4b 0x00 0x0f
+
+# CHECK: lura %r15, %r0
+0xb2 0x4b 0x00 0xf0
+
+# CHECK: lura %r7, %r8
+0xb2 0x4b 0x00 0x78
+
+# CHECK: lurag %r0, %r0
+0xb9 0x05 0x00 0x00
+
+# CHECK: lurag %r0, %r15
+0xb9 0x05 0x00 0x0f
+
+# CHECK: lurag %r15, %r0
+0xb9 0x05 0x00 0xf0
+
+# CHECK: lurag %r7, %r8
+0xb9 0x05 0x00 0x78
+
# CHECK: lxd %f0, 4095
0xed 0x00 0x0f 0xff 0x00 0x25
@@ -10887,6 +11664,24 @@
# CHECK: ms %r15, 0
0x71 0xf0 0x00 0x00
+# CHECK: msch 0
+0xb2 0x32 0x00 0x00
+
+# CHECK: msch 0(%r1)
+0xb2 0x32 0x10 0x00
+
+# CHECK: msch 0(%r15)
+0xb2 0x32 0xf0 0x00
+
+# CHECK: msch 4095
+0xb2 0x32 0x0f 0xff
+
+# CHECK: msch 4095(%r1)
+0xb2 0x32 0x1f 0xff
+
+# CHECK: msch 4095(%r15)
+0xb2 0x32 0xff 0xff
+
# CHECK: msd %f0, %f0, 0
0xed 0x00 0x00 0x00 0x00 0x3f
@@ -11199,6 +11994,15 @@
# CHECK: msr %r7, %r8
0xb2 0x52 0x00 0x78
+# CHECK: msta %r0
+0xb2 0x47 0x00 0x00
+
+# CHECK: msta %r2
+0xb2 0x47 0x00 0x20
+
+# CHECK: msta %r14
+0xb2 0x47 0x00 0xe0
+
# CHECK: msy %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x51
@@ -11265,6 +12069,36 @@
# CHECK: mvc 0(256,%r15), 0
0xd2 0xff 0xf0 0x00 0x00 0x00
+# CHECK: mvcdk 0, 0
+0xe5 0x0f 0x00 0x00 0x00 0x00
+
+# CHECK: mvcdk 0(%r1), 0(%r2)
+0xe5 0x0f 0x10 0x00 0x20 0x00
+
+# CHECK: mvcdk 160(%r1), 320(%r15)
+0xe5 0x0f 0x10 0xa0 0xf1 0x40
+
+# CHECK: mvcdk 0(%r1), 4095
+0xe5 0x0f 0x10 0x00 0x0f 0xff
+
+# CHECK: mvcdk 0(%r1), 4095(%r2)
+0xe5 0x0f 0x10 0x00 0x2f 0xff
+
+# CHECK: mvcdk 0(%r1), 4095(%r15)
+0xe5 0x0f 0x10 0x00 0xff 0xff
+
+# CHECK: mvcdk 0(%r1), 0
+0xe5 0x0f 0x10 0x00 0x00 0x00
+
+# CHECK: mvcdk 0(%r15), 0
+0xe5 0x0f 0xf0 0x00 0x00 0x00
+
+# CHECK: mvcdk 4095(%r1), 0
+0xe5 0x0f 0x1f 0xff 0x00 0x00
+
+# CHECK: mvcdk 4095(%r15), 0
+0xe5 0x0f 0xff 0xff 0x00 0x00
+
# CHECK: mvcin 0(1), 0
0xe8 0x00 0x00 0x00 0x00 0x00
@@ -11385,6 +12219,132 @@
# CHECK: mvclu %r14, %r0, 0
0xeb 0xe0 0x00 0x00 0x00 0x8e
+# CHECK: mvcos 0, 0, %r0
+0xc8 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: mvcos 0(%r1), 0(%r15), %r2
+0xc8 0x20 0x10 0x00 0xf0 0x00
+
+# CHECK: mvcos 1(%r1), 0(%r15), %r2
+0xc8 0x20 0x10 0x01 0xf0 0x00
+
+# CHECK: mvcos 4095(%r1), 0(%r15), %r2
+0xc8 0x20 0x1f 0xff 0xf0 0x00
+
+# CHECK: mvcos 0(%r1), 1(%r15), %r2
+0xc8 0x20 0x10 0x00 0xf0 0x01
+
+# CHECK: mvcos 0(%r1), 4095(%r15), %r2
+0xc8 0x20 0x10 0x00 0xff 0xff
+
+# CHECK: mvcp 0(%r0), 0, %r3
+0xda 0x03 0x00 0x00 0x00 0x00
+
+# CHECK: mvcp 0(%r1), 0, %r3
+0xda 0x13 0x00 0x00 0x00 0x00
+
+# CHECK: mvcp 0(%r1), 0(%r1), %r3
+0xda 0x13 0x00 0x00 0x10 0x00
+
+# CHECK: mvcp 0(%r1), 0(%r15), %r3
+0xda 0x13 0x00 0x00 0xf0 0x00
+
+# CHECK: mvcp 0(%r1), 4095, %r3
+0xda 0x13 0x00 0x00 0x0f 0xff
+
+# CHECK: mvcp 0(%r1), 4095(%r1), %r3
+0xda 0x13 0x00 0x00 0x1f 0xff
+
+# CHECK: mvcp 0(%r1), 4095(%r15), %r3
+0xda 0x13 0x00 0x00 0xff 0xff
+
+# CHECK: mvcp 0(%r2,%r1), 0, %r3
+0xda 0x23 0x10 0x00 0x00 0x00
+
+# CHECK: mvcp 0(%r2,%r15), 0, %r3
+0xda 0x23 0xf0 0x00 0x00 0x00
+
+# CHECK: mvcp 4095(%r2,%r1), 0, %r3
+0xda 0x23 0x1f 0xff 0x00 0x00
+
+# CHECK: mvcp 4095(%r2,%r15), 0, %r3
+0xda 0x23 0xff 0xff 0x00 0x00
+
+# CHECK: mvcp 0(%r2,%r1), 0, %r3
+0xda 0x23 0x10 0x00 0x00 0x00
+
+# CHECK: mvcp 0(%r2,%r15), 0, %r3
+0xda 0x23 0xf0 0x00 0x00 0x00
+
+# CHECK: mvcs 0(%r0), 0, %r3
+0xdb 0x03 0x00 0x00 0x00 0x00
+
+# CHECK: mvcs 0(%r1), 0, %r3
+0xdb 0x13 0x00 0x00 0x00 0x00
+
+# CHECK: mvcs 0(%r1), 0(%r1), %r3
+0xdb 0x13 0x00 0x00 0x10 0x00
+
+# CHECK: mvcs 0(%r1), 0(%r15), %r3
+0xdb 0x13 0x00 0x00 0xf0 0x00
+
+# CHECK: mvcs 0(%r1), 4095, %r3
+0xdb 0x13 0x00 0x00 0x0f 0xff
+
+# CHECK: mvcs 0(%r1), 4095(%r1), %r3
+0xdb 0x13 0x00 0x00 0x1f 0xff
+
+# CHECK: mvcs 0(%r1), 4095(%r15), %r3
+0xdb 0x13 0x00 0x00 0xff 0xff
+
+# CHECK: mvcs 0(%r2,%r1), 0, %r3
+0xdb 0x23 0x10 0x00 0x00 0x00
+
+# CHECK: mvcs 0(%r2,%r15), 0, %r3
+0xdb 0x23 0xf0 0x00 0x00 0x00
+
+# CHECK: mvcs 4095(%r2,%r1), 0, %r3
+0xdb 0x23 0x1f 0xff 0x00 0x00
+
+# CHECK: mvcs 4095(%r2,%r15), 0, %r3
+0xdb 0x23 0xff 0xff 0x00 0x00
+
+# CHECK: mvcs 0(%r2,%r1), 0, %r3
+0xdb 0x23 0x10 0x00 0x00 0x00
+
+# CHECK: mvcs 0(%r2,%r15), 0, %r3
+0xdb 0x23 0xf0 0x00 0x00 0x00
+
+# CHECK: mvcsk 0, 0
+0xe5 0x0e 0x00 0x00 0x00 0x00
+
+# CHECK: mvcsk 0(%r1), 0(%r2)
+0xe5 0x0e 0x10 0x00 0x20 0x00
+
+# CHECK: mvcsk 160(%r1), 320(%r15)
+0xe5 0x0e 0x10 0xa0 0xf1 0x40
+
+# CHECK: mvcsk 0(%r1), 4095
+0xe5 0x0e 0x10 0x00 0x0f 0xff
+
+# CHECK: mvcsk 0(%r1), 4095(%r2)
+0xe5 0x0e 0x10 0x00 0x2f 0xff
+
+# CHECK: mvcsk 0(%r1), 4095(%r15)
+0xe5 0x0e 0x10 0x00 0xff 0xff
+
+# CHECK: mvcsk 0(%r1), 0
+0xe5 0x0e 0x10 0x00 0x00 0x00
+
+# CHECK: mvcsk 0(%r15), 0
+0xe5 0x0e 0xf0 0x00 0x00 0x00
+
+# CHECK: mvcsk 4095(%r1), 0
+0xe5 0x0e 0x1f 0xff 0x00 0x00
+
+# CHECK: mvcsk 4095(%r15), 0
+0xe5 0x0e 0xff 0xff 0x00 0x00
+
# CHECK: mvghi 0, 0
0xe5 0x48 0x00 0x00 0x00 0x00
@@ -11613,6 +12573,18 @@
# CHECK: mvo 0(1), 0(16,%r15)
0xf1 0x0f 0x00 0x00 0xf0 0x00
+# CHECK: mvpg %r0, %r0
+0xb2 0x54 0x00 0x00
+
+# CHECK: mvpg %r0, %r15
+0xb2 0x54 0x00 0x0f
+
+# CHECK: mvpg %r15, %r0
+0xb2 0x54 0x00 0xf0
+
+# CHECK: mvpg %r7, %r8
+0xb2 0x54 0x00 0x78
+
# CHECK: mvst %r0, %r0
0xb2 0x55 0x00 0x00
@@ -12543,9 +13515,33 @@
# CHECK: pack 0(1), 0(16,%r15)
0xf2 0x0f 0x00 0x00 0xf0 0x00
+# CHECK: palb
+0xb2 0x48 0x00 0x00
+
+# CHECK: pc 0
+0xb2 0x18 0x00 0x00
+
+# CHECK: pc 0(%r1)
+0xb2 0x18 0x10 0x00
+
+# CHECK: pc 0(%r15)
+0xb2 0x18 0xf0 0x00
+
+# CHECK: pc 4095
+0xb2 0x18 0x0f 0xff
+
+# CHECK: pc 4095(%r1)
+0xb2 0x18 0x1f 0xff
+
+# CHECK: pc 4095(%r15)
+0xb2 0x18 0xff 0xff
+
# CHECK: pcc
0xb9 0x2c 0x00 0x00
+# CHECK: pckmo
+0xb9 0x28 0x00 0x00
+
# CHECK: pfd 0, -524288
0xe3 0x00 0x00 0x00 0x80 0x36
@@ -12576,9 +13572,54 @@
# CHECK: pfd 15, 0
0xe3 0xf0 0x00 0x00 0x00 0x36
+# CHECK: pfmf %r0, %r0
+0xb9 0xaf 0x00 0x00
+
+# CHECK: pfmf %r0, %r15
+0xb9 0xaf 0x00 0x0f
+
+# CHECK: pfmf %r15, %r0
+0xb9 0xaf 0x00 0xf0
+
+# CHECK: pfmf %r7, %r8
+0xb9 0xaf 0x00 0x78
+
+# CHECK: pfmf %r15, %r15
+0xb9 0xaf 0x00 0xff
+
# CHECK: pfpo
0x01 0x0a
+# CHECK: pgin %r0, %r0
+0xb2 0x2e 0x00 0x00
+
+# CHECK: pgin %r0, %r15
+0xb2 0x2e 0x00 0x0f
+
+# CHECK: pgin %r15, %r0
+0xb2 0x2e 0x00 0xf0
+
+# CHECK: pgin %r7, %r8
+0xb2 0x2e 0x00 0x78
+
+# CHECK: pgin %r15, %r15
+0xb2 0x2e 0x00 0xff
+
+# CHECK: pgout %r0, %r0
+0xb2 0x2f 0x00 0x00
+
+# CHECK: pgout %r0, %r15
+0xb2 0x2f 0x00 0x0f
+
+# CHECK: pgout %r15, %r0
+0xb2 0x2f 0x00 0xf0
+
+# CHECK: pgout %r7, %r8
+0xb2 0x2f 0x00 0x78
+
+# CHECK: pgout %r15, %r15
+0xb2 0x2f 0x00 0xff
+
# CHECK: pka 0, 0(1)
0xe9 0x00 0x00 0x00 0x00 0x00
@@ -12702,6 +13743,45 @@
# CHECK: pr
0x01 0x01
+# CHECK: pt %r0, %r0
+0xb2 0x28 0x00 0x00
+
+# CHECK: pt %r0, %r15
+0xb2 0x28 0x00 0x0f
+
+# CHECK: pt %r15, %r0
+0xb2 0x28 0x00 0xf0
+
+# CHECK: pt %r7, %r8
+0xb2 0x28 0x00 0x78
+
+# CHECK: ptf %r0
+0xb9 0xa2 0x00 0x00
+
+# CHECK: ptf %r1
+0xb9 0xa2 0x00 0x10
+
+# CHECK: ptf %r15
+0xb9 0xa2 0x00 0xf0
+
+# CHECK: ptff
+0x01 0x04
+
+# CHECK: pti %r0, %r0
+0xb9 0x9e 0x00 0x00
+
+# CHECK: pti %r0, %r15
+0xb9 0x9e 0x00 0x0f
+
+# CHECK: pti %r15, %r0
+0xb9 0x9e 0x00 0xf0
+
+# CHECK: pti %r7, %r8
+0xb9 0x9e 0x00 0x78
+
+# CHECK: ptlb
+0xb2 0x0d 0x00 0x00
+
# CHECK: qadtr %f0, %f0, %f0, 0
0xb3 0xf5 0x00 0x00
@@ -12738,6 +13818,45 @@
# CHECK: qaxtr %f13, %f0, %f0, 0
0xb3 0xfd 0x00 0xd0
+# CHECK: qctri 0
+0xb2 0x8e 0x00 0x00
+
+# CHECK: qctri 0(%r1)
+0xb2 0x8e 0x10 0x00
+
+# CHECK: qctri 0(%r15)
+0xb2 0x8e 0xf0 0x00
+
+# CHECK: qctri 4095
+0xb2 0x8e 0x0f 0xff
+
+# CHECK: qctri 4095(%r1)
+0xb2 0x8e 0x1f 0xff
+
+# CHECK: qctri 4095(%r15)
+0xb2 0x8e 0xff 0xff
+
+# CHECK: qsi 0
+0xb2 0x86 0x00 0x00
+
+# CHECK: qsi 0(%r1)
+0xb2 0x86 0x10 0x00
+
+# CHECK: qsi 0(%r15)
+0xb2 0x86 0xf0 0x00
+
+# CHECK: qsi 4095
+0xb2 0x86 0x0f 0xff
+
+# CHECK: qsi 4095(%r1)
+0xb2 0x86 0x1f 0xff
+
+# CHECK: qsi 4095(%r15)
+0xb2 0x86 0xff 0xff
+
+# CHECK: rchp
+0xb2 0x3b 0x00 0x00
+
# CHECK: risbg %r0, %r0, 0, 0, 0
0xec 0x00 0x00 0x00 0x00 0x55
@@ -12936,6 +14055,54 @@
# CHECK: rosbg %r4, %r5, 6, 7, 8
0xec 0x45 0x06 0x07 0x08 0x56
+# CHECK: rp 0
+0xb2 0x77 0x00 0x00
+
+# CHECK: rp 0(%r1)
+0xb2 0x77 0x10 0x00
+
+# CHECK: rp 0(%r15)
+0xb2 0x77 0xf0 0x00
+
+# CHECK: rp 4095
+0xb2 0x77 0x0f 0xff
+
+# CHECK: rp 4095(%r1)
+0xb2 0x77 0x1f 0xff
+
+# CHECK: rp 4095(%r15)
+0xb2 0x77 0xff 0xff
+
+# CHECK: rrbe %r0, %r0
+0xb2 0x2a 0x00 0x00
+
+# CHECK: rrbe %r0, %r15
+0xb2 0x2a 0x00 0x0f
+
+# CHECK: rrbe %r15, %r0
+0xb2 0x2a 0x00 0xf0
+
+# CHECK: rrbe %r7, %r8
+0xb2 0x2a 0x00 0x78
+
+# CHECK: rrbe %r15, %r15
+0xb2 0x2a 0x00 0xff
+
+# CHECK: rrbm %r0, %r0
+0xb9 0xae 0x00 0x00
+
+# CHECK: rrbm %r0, %r15
+0xb9 0xae 0x00 0x0f
+
+# CHECK: rrbm %r15, %r0
+0xb9 0xae 0x00 0xf0
+
+# CHECK: rrbm %r7, %r8
+0xb9 0xae 0x00 0x78
+
+# CHECK: rrbm %r15, %r15
+0xb9 0xae 0x00 0xff
+
# CHECK: rrdtr %f0, %f0, %f0, 0
0xb3 0xf7 0x00 0x00
@@ -12972,6 +14139,9 @@
# CHECK: rrxtr %f13, %f0, %f0, 0
0xb3 0xff 0x00 0xd0
+# CHECK: rsch
+0xb2 0x38 0x00 0x00
+
# CHECK: rxsbg %r0, %r0, 0, 0, 0
0xec 0x00 0x00 0x00 0x00 0x57
@@ -13014,6 +14184,45 @@
# CHECK: s %r15, 0
0x5b 0xf0 0x00 0x00
+# CHECK: sac 0
+0xb2 0x19 0x00 0x00
+
+# CHECK: sac 0(%r1)
+0xb2 0x19 0x10 0x00
+
+# CHECK: sac 0(%r15)
+0xb2 0x19 0xf0 0x00
+
+# CHECK: sac 4095
+0xb2 0x19 0x0f 0xff
+
+# CHECK: sac 4095(%r1)
+0xb2 0x19 0x1f 0xff
+
+# CHECK: sac 4095(%r15)
+0xb2 0x19 0xff 0xff
+
+# CHECK: sacf 0
+0xb2 0x79 0x00 0x00
+
+# CHECK: sacf 0(%r1)
+0xb2 0x79 0x10 0x00
+
+# CHECK: sacf 0(%r15)
+0xb2 0x79 0xf0 0x00
+
+# CHECK: sacf 4095
+0xb2 0x79 0x0f 0xff
+
+# CHECK: sacf 4095(%r1)
+0xb2 0x79 0x1f 0xff
+
+# CHECK: sacf 4095(%r15)
+0xb2 0x79 0xff 0xff
+
+# CHECK: sal
+0xb2 0x37 0x00 0x00
+
# CHECK: sam24
0x01 0x0c
@@ -13038,6 +14247,60 @@
# CHECK: sar %a15, %r15
0xb2 0x4e 0x00 0xff
+# CHECK: scctr %r0, %r0
+0xb2 0xe0 0x00 0x00
+
+# CHECK: scctr %r0, %r15
+0xb2 0xe0 0x00 0x0f
+
+# CHECK: scctr %r15, %r0
+0xb2 0xe0 0x00 0xf0
+
+# CHECK: scctr %r7, %r8
+0xb2 0xe0 0x00 0x78
+
+# CHECK: schm
+0xb2 0x3c 0x00 0x00
+
+# CHECK: sck 0
+0xb2 0x04 0x00 0x00
+
+# CHECK: sck 0(%r1)
+0xb2 0x04 0x10 0x00
+
+# CHECK: sck 0(%r15)
+0xb2 0x04 0xf0 0x00
+
+# CHECK: sck 4095
+0xb2 0x04 0x0f 0xff
+
+# CHECK: sck 4095(%r1)
+0xb2 0x04 0x1f 0xff
+
+# CHECK: sck 4095(%r15)
+0xb2 0x04 0xff 0xff
+
+# CHECK: sckc 0
+0xb2 0x06 0x00 0x00
+
+# CHECK: sckc 0(%r1)
+0xb2 0x06 0x10 0x00
+
+# CHECK: sckc 0(%r15)
+0xb2 0x06 0xf0 0x00
+
+# CHECK: sckc 4095
+0xb2 0x06 0x0f 0xff
+
+# CHECK: sckc 4095(%r1)
+0xb2 0x06 0x1f 0xff
+
+# CHECK: sckc 4095(%r15)
+0xb2 0x06 0xff 0xff
+
+# CHECK: sckpf
+0x01 0x07
+
# CHECK: sd %f0, 0
0x6b 0x00 0x00 0x00
@@ -13332,6 +14595,36 @@
# CHECK: sh %r15, 0
0x4b 0xf0 0x00 0x00
+# CHECK: shhhr %r0, %r0, %r0
+0xb9 0xc9 0x00 0x00
+
+# CHECK: shhhr %r0, %r0, %r15
+0xb9 0xc9 0xf0 0x00
+
+# CHECK: shhhr %r0, %r15, %r0
+0xb9 0xc9 0x00 0x0f
+
+# CHECK: shhhr %r15, %r0, %r0
+0xb9 0xc9 0x00 0xf0
+
+# CHECK: shhhr %r7, %r8, %r9
+0xb9 0xc9 0x90 0x78
+
+# CHECK: shhlr %r0, %r0, %r0
+0xb9 0xd9 0x00 0x00
+
+# CHECK: shhlr %r0, %r0, %r15
+0xb9 0xd9 0xf0 0x00
+
+# CHECK: shhlr %r0, %r15, %r0
+0xb9 0xd9 0x00 0x0f
+
+# CHECK: shhlr %r15, %r0, %r0
+0xb9 0xd9 0x00 0xf0
+
+# CHECK: shhlr %r7, %r8, %r9
+0xb9 0xd9 0x90 0x78
+
# CHECK: shy %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x7b
@@ -13362,6 +14655,72 @@
# CHECK: shy %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x7b
+# CHECK: sie 0
+0xb2 0x14 0x00 0x00
+
+# CHECK: sie 0(%r1)
+0xb2 0x14 0x10 0x00
+
+# CHECK: sie 0(%r15)
+0xb2 0x14 0xf0 0x00
+
+# CHECK: sie 4095
+0xb2 0x14 0x0f 0xff
+
+# CHECK: sie 4095(%r1)
+0xb2 0x14 0x1f 0xff
+
+# CHECK: sie 4095(%r15)
+0xb2 0x14 0xff 0xff
+
+# CHECK: siga 0
+0xb2 0x74 0x00 0x00
+
+# CHECK: siga 0(%r1)
+0xb2 0x74 0x10 0x00
+
+# CHECK: siga 0(%r15)
+0xb2 0x74 0xf0 0x00
+
+# CHECK: siga 4095
+0xb2 0x74 0x0f 0xff
+
+# CHECK: siga 4095(%r1)
+0xb2 0x74 0x1f 0xff
+
+# CHECK: siga 4095(%r15)
+0xb2 0x74 0xff 0xff
+
+# CHECK: sigp %r0, %r0, 0
+0xae 0x00 0x00 0x00
+
+# CHECK: sigp %r0, %r15, 0
+0xae 0x0f 0x00 0x00
+
+# CHECK: sigp %r14, %r15, 0
+0xae 0xef 0x00 0x00
+
+# CHECK: sigp %r15, %r15, 0
+0xae 0xff 0x00 0x00
+
+# CHECK: sigp %r0, %r0, 4095
+0xae 0x00 0x0f 0xff
+
+# CHECK: sigp %r0, %r0, 1
+0xae 0x00 0x00 0x01
+
+# CHECK: sigp %r0, %r0, 0(%r1)
+0xae 0x00 0x10 0x00
+
+# CHECK: sigp %r0, %r0, 0(%r15)
+0xae 0x00 0xf0 0x00
+
+# CHECK: sigp %r0, %r0, 4095(%r1)
+0xae 0x00 0x1f 0xff
+
+# CHECK: sigp %r0, %r0, 4095(%r15)
+0xae 0x00 0xff 0xff
+
# CHECK: sl %r0, 0
0x5f 0x00 0x00 0x00
@@ -13746,6 +15105,36 @@
# CHECK: slgrk %r2, %r3, %r4
0xb9 0xeb 0x40 0x23
+# CHECK: slhhhr %r0, %r0, %r0
+0xb9 0xcb 0x00 0x00
+
+# CHECK: slhhhr %r0, %r0, %r15
+0xb9 0xcb 0xf0 0x00
+
+# CHECK: slhhhr %r0, %r15, %r0
+0xb9 0xcb 0x00 0x0f
+
+# CHECK: slhhhr %r15, %r0, %r0
+0xb9 0xcb 0x00 0xf0
+
+# CHECK: slhhhr %r7, %r8, %r9
+0xb9 0xcb 0x90 0x78
+
+# CHECK: slhhlr %r0, %r0, %r0
+0xb9 0xdb 0x00 0x00
+
+# CHECK: slhhlr %r0, %r0, %r15
+0xb9 0xdb 0xf0 0x00
+
+# CHECK: slhhlr %r0, %r15, %r0
+0xb9 0xdb 0x00 0x0f
+
+# CHECK: slhhlr %r15, %r0, %r0
+0xb9 0xdb 0x00 0xf0
+
+# CHECK: slhhlr %r7, %r8, %r9
+0xb9 0xdb 0x90 0x78
+
# CHECK: sll %r0, 0
0x89 0x00 0x00 0x00
@@ -13959,6 +15348,36 @@
# CHECK: sp 0(1), 0(16,%r15)
0xfb 0x0f 0x00 0x00 0xf0 0x00
+# CHECK: spctr %r0, %r0
+0xb2 0xe1 0x00 0x00
+
+# CHECK: spctr %r0, %r15
+0xb2 0xe1 0x00 0x0f
+
+# CHECK: spctr %r15, %r0
+0xb2 0xe1 0x00 0xf0
+
+# CHECK: spctr %r7, %r8
+0xb2 0xe1 0x00 0x78
+
+# CHECK: spka 0
+0xb2 0x0a 0x00 0x00
+
+# CHECK: spka 0(%r1)
+0xb2 0x0a 0x10 0x00
+
+# CHECK: spka 0(%r15)
+0xb2 0x0a 0xf0 0x00
+
+# CHECK: spka 4095
+0xb2 0x0a 0x0f 0xff
+
+# CHECK: spka 4095(%r1)
+0xb2 0x0a 0x1f 0xff
+
+# CHECK: spka 4095(%r15)
+0xb2 0x0a 0xff 0xff
+
# CHECK: spm %r0
0x04 0x00
@@ -13968,6 +15387,42 @@
# CHECK: spm %r15
0x04 0xf0
+# CHECK: spt 0
+0xb2 0x08 0x00 0x00
+
+# CHECK: spt 0(%r1)
+0xb2 0x08 0x10 0x00
+
+# CHECK: spt 0(%r15)
+0xb2 0x08 0xf0 0x00
+
+# CHECK: spt 4095
+0xb2 0x08 0x0f 0xff
+
+# CHECK: spt 4095(%r1)
+0xb2 0x08 0x1f 0xff
+
+# CHECK: spt 4095(%r15)
+0xb2 0x08 0xff 0xff
+
+# CHECK: spx 0
+0xb2 0x10 0x00 0x00
+
+# CHECK: spx 0(%r1)
+0xb2 0x10 0x10 0x00
+
+# CHECK: spx 0(%r15)
+0xb2 0x10 0xf0 0x00
+
+# CHECK: spx 4095
+0xb2 0x10 0x0f 0xff
+
+# CHECK: spx 4095(%r1)
+0xb2 0x10 0x1f 0xff
+
+# CHECK: spx 4095(%r15)
+0xb2 0x10 0xff 0xff
+
# CHECK: sqd %f0, 0
0xed 0x00 0x00 0x00 0x00 0x35
@@ -14553,6 +16008,75 @@
# CHECK: srxt %f13, %f13, 0
0xed 0xd0 0x00 0x00 0xd0 0x49
+# CHECK: ssar %r0
+0xb2 0x25 0x00 0x00
+
+# CHECK: ssar %r1
+0xb2 0x25 0x00 0x10
+
+# CHECK: ssar %r15
+0xb2 0x25 0x00 0xf0
+
+# CHECK: ssair %r0
+0xb9 0x9f 0x00 0x00
+
+# CHECK: ssair %r1
+0xb9 0x9f 0x00 0x10
+
+# CHECK: ssair %r15
+0xb9 0x9f 0x00 0xf0
+
+# CHECK: ssch 0
+0xb2 0x33 0x00 0x00
+
+# CHECK: ssch 0(%r1)
+0xb2 0x33 0x10 0x00
+
+# CHECK: ssch 0(%r15)
+0xb2 0x33 0xf0 0x00
+
+# CHECK: ssch 4095
+0xb2 0x33 0x0f 0xff
+
+# CHECK: ssch 4095(%r1)
+0xb2 0x33 0x1f 0xff
+
+# CHECK: ssch 4095(%r15)
+0xb2 0x33 0xff 0xff
+
+# CHECK: sske %r0, %r0
+0xb2 0x2b 0x00 0x00
+
+# CHECK: sske %r0, %r15
+0xb2 0x2b 0x00 0x0f
+
+# CHECK: sske %r15, %r0
+0xb2 0x2b 0x00 0xf0
+
+# CHECK: sske %r0, %r0, 15
+0xb2 0x2b 0xf0 0x00
+
+# CHECK: sske %r4, %r6, 7
+0xb2 0x2b 0x70 0x46
+
+# CHECK: ssm 0
+0x80 0x00 0x00 0x00
+
+# CHECK: ssm 0(%r1)
+0x80 0x00 0x10 0x00
+
+# CHECK: ssm 0(%r15)
+0x80 0x00 0xf0 0x00
+
+# CHECK: ssm 4095
+0x80 0x00 0x0f 0xff
+
+# CHECK: ssm 4095(%r1)
+0x80 0x00 0x1f 0xff
+
+# CHECK: ssm 4095(%r15)
+0x80 0x00 0xff 0xff
+
# CHECK: st %r0, 0
0x50 0x00 0x00 0x00
@@ -14643,6 +16167,24 @@
# CHECK: stamy %a0, %a0, 524287(%r15)
0xeb 0x00 0xff 0xff 0x7f 0x9b
+# CHECK: stap 0
+0xb2 0x12 0x00 0x00
+
+# CHECK: stap 0(%r1)
+0xb2 0x12 0x10 0x00
+
+# CHECK: stap 0(%r15)
+0xb2 0x12 0xf0 0x00
+
+# CHECK: stap 4095
+0xb2 0x12 0x0f 0xff
+
+# CHECK: stap 4095(%r1)
+0xb2 0x12 0x1f 0xff
+
+# CHECK: stap 4095(%r15)
+0xb2 0x12 0xff 0xff
+
# CHECK: stc %r0, 0
0x42 0x00 0x00 0x00
@@ -14712,6 +16254,24 @@
# CHECK: stck 4095(%r15)
0xb2 0x05 0xff 0xff
+# CHECK: stckc 0
+0xb2 0x07 0x00 0x00
+
+# CHECK: stckc 0(%r1)
+0xb2 0x07 0x10 0x00
+
+# CHECK: stckc 0(%r15)
+0xb2 0x07 0xf0 0x00
+
+# CHECK: stckc 4095
+0xb2 0x07 0x0f 0xff
+
+# CHECK: stckc 4095(%r1)
+0xb2 0x07 0x1f 0xff
+
+# CHECK: stckc 4095(%r15)
+0xb2 0x07 0xff 0xff
+
# CHECK: stcke 0
0xb2 0x78 0x00 0x00
@@ -14829,6 +16389,111 @@
# CHECK: stcmy %r15, 0, 0
0xeb 0xf0 0x00 0x00 0x00 0x2d
+# CHECK: stcps 0
+0xb2 0x3a 0x00 0x00
+
+# CHECK: stcps 0(%r1)
+0xb2 0x3a 0x10 0x00
+
+# CHECK: stcps 0(%r15)
+0xb2 0x3a 0xf0 0x00
+
+# CHECK: stcps 4095
+0xb2 0x3a 0x0f 0xff
+
+# CHECK: stcps 4095(%r1)
+0xb2 0x3a 0x1f 0xff
+
+# CHECK: stcps 4095(%r15)
+0xb2 0x3a 0xff 0xff
+
+# CHECK: stcrw 0
+0xb2 0x39 0x00 0x00
+
+# CHECK: stcrw 0(%r1)
+0xb2 0x39 0x10 0x00
+
+# CHECK: stcrw 0(%r15)
+0xb2 0x39 0xf0 0x00
+
+# CHECK: stcrw 4095
+0xb2 0x39 0x0f 0xff
+
+# CHECK: stcrw 4095(%r1)
+0xb2 0x39 0x1f 0xff
+
+# CHECK: stcrw 4095(%r15)
+0xb2 0x39 0xff 0xff
+
+# CHECK: stctg %c0, %c0, 0
+0xeb 0x00 0x00 0x00 0x00 0x25
+
+# CHECK: stctg %c0, %c15, 0
+0xeb 0x0f 0x00 0x00 0x00 0x25
+
+# CHECK: stctg %c14, %c15, 0
+0xeb 0xef 0x00 0x00 0x00 0x25
+
+# CHECK: stctg %c15, %c15, 0
+0xeb 0xff 0x00 0x00 0x00 0x25
+
+# CHECK: stctg %c0, %c0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x25
+
+# CHECK: stctg %c0, %c0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x25
+
+# CHECK: stctg %c0, %c0, 0
+0xeb 0x00 0x00 0x00 0x00 0x25
+
+# CHECK: stctg %c0, %c0, 1
+0xeb 0x00 0x00 0x01 0x00 0x25
+
+# CHECK: stctg %c0, %c0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x25
+
+# CHECK: stctg %c0, %c0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x25
+
+# CHECK: stctg %c0, %c0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x25
+
+# CHECK: stctg %c0, %c0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x25
+
+# CHECK: stctg %c0, %c0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x25
+
+# CHECK: stctl %c0, %c0, 0
+0xb6 0x00 0x00 0x00
+
+# CHECK: stctl %c0, %c15, 0
+0xb6 0x0f 0x00 0x00
+
+# CHECK: stctl %c14, %c15, 0
+0xb6 0xef 0x00 0x00
+
+# CHECK: stctl %c15, %c15, 0
+0xb6 0xff 0x00 0x00
+
+# CHECK: stctl %c0, %c0, 4095
+0xb6 0x00 0x0f 0xff
+
+# CHECK: stctl %c0, %c0, 1
+0xb6 0x00 0x00 0x01
+
+# CHECK: stctl %c0, %c0, 0(%r1)
+0xb6 0x00 0x10 0x00
+
+# CHECK: stctl %c0, %c0, 0(%r15)
+0xb6 0x00 0xf0 0x00
+
+# CHECK: stctl %c0, %c0, 4095(%r1)
+0xb6 0x00 0x1f 0xff
+
+# CHECK: stctl %c0, %c0, 4095(%r15)
+0xb6 0x00 0xff 0xff
+
# CHECK: stcy %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x72
@@ -14991,6 +16656,24 @@
# CHECK: stfh %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0xcb
+# CHECK: stfl 0
+0xb2 0xb1 0x00 0x00
+
+# CHECK: stfl 0(%r1)
+0xb2 0xb1 0x10 0x00
+
+# CHECK: stfl 0(%r15)
+0xb2 0xb1 0xf0 0x00
+
+# CHECK: stfl 4095
+0xb2 0xb1 0x0f 0xff
+
+# CHECK: stfl 4095(%r1)
+0xb2 0xb1 0x1f 0xff
+
+# CHECK: stfl 4095(%r15)
+0xb2 0xb1 0xff 0xff
+
# CHECK: stfle 0
0xb2 0xb0 0x00 0x00
@@ -15138,6 +16821,24 @@
# CHECK: sthy %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x70
+# CHECK: stidp 0
+0xb2 0x02 0x00 0x00
+
+# CHECK: stidp 0(%r1)
+0xb2 0x02 0x10 0x00
+
+# CHECK: stidp 0(%r15)
+0xb2 0x02 0xf0 0x00
+
+# CHECK: stidp 4095
+0xb2 0x02 0x0f 0xff
+
+# CHECK: stidp 4095(%r1)
+0xb2 0x02 0x1f 0xff
+
+# CHECK: stidp 4095(%r15)
+0xb2 0x02 0xff 0xff
+
# CHECK: stm %r0, %r0, 0
0x90 0x00 0x00 0x00
@@ -15285,6 +16986,27 @@
# CHECK: stmy %r0, %r0, 524287(%r15)
0xeb 0x00 0xff 0xff 0x7f 0x90
+# CHECK: stnsm 0, 0
+0xac 0x00 0x00 0x00
+
+# CHECK: stnsm 4095, 0
+0xac 0x00 0x0f 0xff
+
+# CHECK: stnsm 0, 255
+0xac 0xff 0x00 0x00
+
+# CHECK: stnsm 0(%r1), 42
+0xac 0x2a 0x10 0x00
+
+# CHECK: stnsm 0(%r15), 42
+0xac 0x2a 0xf0 0x00
+
+# CHECK: stnsm 4095(%r1), 42
+0xac 0x2a 0x1f 0xff
+
+# CHECK: stnsm 4095(%r15), 42
+0xac 0x2a 0xff 0xff
+
# CHECK: stoc %r1, 2(%r3), 0
0xeb 0x10 0x30 0x02 0x00 0xf3
@@ -15381,6 +17103,27 @@
# CHECK: stocg %r1, 2(%r3), 15
0xeb 0x1f 0x30 0x02 0x00 0xe3
+# CHECK: stosm 0, 0
+0xad 0x00 0x00 0x00
+
+# CHECK: stosm 4095, 0
+0xad 0x00 0x0f 0xff
+
+# CHECK: stosm 0, 255
+0xad 0xff 0x00 0x00
+
+# CHECK: stosm 0(%r1), 42
+0xad 0x2a 0x10 0x00
+
+# CHECK: stosm 0(%r15), 42
+0xad 0x2a 0xf0 0x00
+
+# CHECK: stosm 4095(%r1), 42
+0xad 0x2a 0x1f 0xff
+
+# CHECK: stosm 4095(%r15), 42
+0xad 0x2a 0xff 0xff
+
# CHECK: stpq %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x8e
@@ -15411,6 +17154,42 @@
# CHECK: stpq %r14, 0
0xe3 0xe0 0x00 0x00 0x00 0x8e
+# CHECK: stpt 0
+0xb2 0x09 0x00 0x00
+
+# CHECK: stpt 0(%r1)
+0xb2 0x09 0x10 0x00
+
+# CHECK: stpt 0(%r15)
+0xb2 0x09 0xf0 0x00
+
+# CHECK: stpt 4095
+0xb2 0x09 0x0f 0xff
+
+# CHECK: stpt 4095(%r1)
+0xb2 0x09 0x1f 0xff
+
+# CHECK: stpt 4095(%r15)
+0xb2 0x09 0xff 0xff
+
+# CHECK: stpx 0
+0xb2 0x11 0x00 0x00
+
+# CHECK: stpx 0(%r1)
+0xb2 0x11 0x10 0x00
+
+# CHECK: stpx 0(%r15)
+0xb2 0x11 0xf0 0x00
+
+# CHECK: stpx 4095
+0xb2 0x11 0x0f 0xff
+
+# CHECK: stpx 4095(%r1)
+0xb2 0x11 0x1f 0xff
+
+# CHECK: stpx 4095(%r15)
+0xb2 0x11 0xff 0xff
+
# CHECK: strag 0, 0
0xe5 0x02 0x00 0x00 0x00 0x00
@@ -15519,6 +17298,66 @@
# CHECK: strvh %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x3f
+# CHECK: stsch 0
+0xb2 0x34 0x00 0x00
+
+# CHECK: stsch 0(%r1)
+0xb2 0x34 0x10 0x00
+
+# CHECK: stsch 0(%r15)
+0xb2 0x34 0xf0 0x00
+
+# CHECK: stsch 4095
+0xb2 0x34 0x0f 0xff
+
+# CHECK: stsch 4095(%r1)
+0xb2 0x34 0x1f 0xff
+
+# CHECK: stsch 4095(%r15)
+0xb2 0x34 0xff 0xff
+
+# CHECK: stsi 0
+0xb2 0x7d 0x00 0x00
+
+# CHECK: stsi 0(%r1)
+0xb2 0x7d 0x10 0x00
+
+# CHECK: stsi 0(%r15)
+0xb2 0x7d 0xf0 0x00
+
+# CHECK: stsi 4095
+0xb2 0x7d 0x0f 0xff
+
+# CHECK: stsi 4095(%r1)
+0xb2 0x7d 0x1f 0xff
+
+# CHECK: stsi 4095(%r15)
+0xb2 0x7d 0xff 0xff
+
+# CHECK: stura %r0, %r0
+0xb2 0x46 0x00 0x00
+
+# CHECK: stura %r0, %r15
+0xb2 0x46 0x00 0x0f
+
+# CHECK: stura %r15, %r0
+0xb2 0x46 0x00 0xf0
+
+# CHECK: stura %r7, %r8
+0xb2 0x46 0x00 0x78
+
+# CHECK: sturg %r0, %r0
+0xb9 0x25 0x00 0x00
+
+# CHECK: sturg %r0, %r15
+0xb9 0x25 0x00 0x0f
+
+# CHECK: sturg %r15, %r0
+0xb9 0x25 0x00 0xf0
+
+# CHECK: sturg %r7, %r8
+0xb9 0x25 0x00 0x78
+
# CHECK: sty %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x50
@@ -15735,6 +17574,33 @@
# CHECK: tam
0x01 0x0b
+# CHECK: tar %a0, %r0
+0xb2 0x4c 0x00 0x00
+
+# CHECK: tar %a0, %r15
+0xb2 0x4c 0x00 0x0f
+
+# CHECK: tar %a15, %r0
+0xb2 0x4c 0x00 0xf0
+
+# CHECK: tar %a7, %r8
+0xb2 0x4c 0x00 0x78
+
+# CHECK: tb %r0, %r0
+0xb2 0x2c 0x00 0x00
+
+# CHECK: tb %r0, %r15
+0xb2 0x2c 0x00 0x0f
+
+# CHECK: tb %r15, %r0
+0xb2 0x2c 0x00 0xf0
+
+# CHECK: tb %r7, %r8
+0xb2 0x2c 0x00 0x78
+
+# CHECK: tb %r15, %r15
+0xb2 0x2c 0x00 0xff
+
# CHECK: tbdr %f0, 0, %f0
0xb3 0x51 0x00 0x00
@@ -16167,6 +18033,54 @@
# CHECK: tp 0(16,%r15)
0xeb 0xf0 0xf0 0x00 0x00 0xc0
+# CHECK: tpi 0
+0xb2 0x36 0x00 0x00
+
+# CHECK: tpi 0(%r1)
+0xb2 0x36 0x10 0x00
+
+# CHECK: tpi 0(%r15)
+0xb2 0x36 0xf0 0x00
+
+# CHECK: tpi 4095
+0xb2 0x36 0x0f 0xff
+
+# CHECK: tpi 4095(%r1)
+0xb2 0x36 0x1f 0xff
+
+# CHECK: tpi 4095(%r15)
+0xb2 0x36 0xff 0xff
+
+# CHECK: tprot 0, 0
+0xe5 0x01 0x00 0x00 0x00 0x00
+
+# CHECK: tprot 0(%r1), 0(%r2)
+0xe5 0x01 0x10 0x00 0x20 0x00
+
+# CHECK: tprot 160(%r1), 320(%r15)
+0xe5 0x01 0x10 0xa0 0xf1 0x40
+
+# CHECK: tprot 0(%r1), 4095
+0xe5 0x01 0x10 0x00 0x0f 0xff
+
+# CHECK: tprot 0(%r1), 4095(%r2)
+0xe5 0x01 0x10 0x00 0x2f 0xff
+
+# CHECK: tprot 0(%r1), 4095(%r15)
+0xe5 0x01 0x10 0x00 0xff 0xff
+
+# CHECK: tprot 0(%r1), 0
+0xe5 0x01 0x10 0x00 0x00 0x00
+
+# CHECK: tprot 0(%r15), 0
+0xe5 0x01 0xf0 0x00 0x00 0x00
+
+# CHECK: tprot 4095(%r1), 0
+0xe5 0x01 0x1f 0xff 0x00 0x00
+
+# CHECK: tprot 4095(%r15), 0
+0xe5 0x01 0xff 0xff 0x00 0x00
+
# CHECK: tr 0(1), 0
0xdc 0x00 0x00 0x00 0x00 0x00
@@ -16203,6 +18117,96 @@
# CHECK: tr 0(256,%r15), 0
0xdc 0xff 0xf0 0x00 0x00 0x00
+# CHECK: trace %r0, %r0, 0
+0x99 0x00 0x00 0x00
+
+# CHECK: trace %r0, %r15, 0
+0x99 0x0f 0x00 0x00
+
+# CHECK: trace %r14, %r15, 0
+0x99 0xef 0x00 0x00
+
+# CHECK: trace %r15, %r15, 0
+0x99 0xff 0x00 0x00
+
+# CHECK: trace %r0, %r0, 4095
+0x99 0x00 0x0f 0xff
+
+# CHECK: trace %r0, %r0, 1
+0x99 0x00 0x00 0x01
+
+# CHECK: trace %r0, %r0, 0(%r1)
+0x99 0x00 0x10 0x00
+
+# CHECK: trace %r0, %r0, 0(%r15)
+0x99 0x00 0xf0 0x00
+
+# CHECK: trace %r0, %r0, 4095(%r1)
+0x99 0x00 0x1f 0xff
+
+# CHECK: trace %r0, %r0, 4095(%r15)
+0x99 0x00 0xff 0xff
+
+# CHECK: tracg %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x0f
+
+# CHECK: tracg %r0, %r15, 0
+0xeb 0x0f 0x00 0x00 0x00 0x0f
+
+# CHECK: tracg %r14, %r15, 0
+0xeb 0xef 0x00 0x00 0x00 0x0f
+
+# CHECK: tracg %r15, %r15, 0
+0xeb 0xff 0x00 0x00 0x00 0x0f
+
+# CHECK: tracg %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x0f
+
+# CHECK: tracg %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x0f
+
+# CHECK: tracg %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x0f
+
+# CHECK: tracg %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0x0f
+
+# CHECK: tracg %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x0f
+
+# CHECK: tracg %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x0f
+
+# CHECK: tracg %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x0f
+
+# CHECK: tracg %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x0f
+
+# CHECK: tracg %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x0f
+
+# CHECK: trap2
+0x01 0xff
+
+# CHECK: trap4 0
+0xb2 0xff 0x00 0x00
+
+# CHECK: trap4 0(%r1)
+0xb2 0xff 0x10 0x00
+
+# CHECK: trap4 0(%r15)
+0xb2 0xff 0xf0 0x00
+
+# CHECK: trap4 4095
+0xb2 0xff 0x0f 0xff
+
+# CHECK: trap4 4095(%r1)
+0xb2 0xff 0x1f 0xff
+
+# CHECK: trap4 4095(%r15)
+0xb2 0xff 0xff 0xff
+
# CHECK: tre %r0, %r0
0xb2 0xa5 0x00 0x00
@@ -16413,6 +18417,24 @@
# CHECK: ts 4095(%r15)
0x93 0x00 0xff 0xff
+# CHECK: tsch 0
+0xb2 0x35 0x00 0x00
+
+# CHECK: tsch 0(%r1)
+0xb2 0x35 0x10 0x00
+
+# CHECK: tsch 0(%r15)
+0xb2 0x35 0xf0 0x00
+
+# CHECK: tsch 4095
+0xb2 0x35 0x0f 0xff
+
+# CHECK: tsch 4095(%r1)
+0xb2 0x35 0x1f 0xff
+
+# CHECK: tsch 4095(%r15)
+0xb2 0x35 0xff 0xff
+
# CHECK: unpk 0(1), 0(1)
0xf3 0x00 0x00 0x00 0x00 0x00
@@ -16722,6 +18744,9 @@
# CHECK: xrk %r2, %r3, %r4
0xb9 0xf7 0x40 0x23
+# CHECK: xsch
+0xb2 0x76 0x00 0x00
+
# CHECK: xy %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x57
diff --git a/test/MC/Mips/macro-dla-bad.s b/test/MC/Mips/macro-dla-bad.s
new file mode 100644
index 000000000000..cd377f4557ca
--- /dev/null
+++ b/test/MC/Mips/macro-dla-bad.s
@@ -0,0 +1,21 @@
+# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips3 -target-abi n64 2>&1 | \
+# RUN: FileCheck %s
+
+ .text
+ .option pic2
+ dla $5, symbol+0x8000
+ # CHECK: :[[@LINE-1]]:3: error: macro instruction uses large offset, which is not currently supported
+ dla $5, symbol-0x8001
+ # CHECK: :[[@LINE-1]]:3: error: macro instruction uses large offset, which is not currently supported
+ dla $5, symbol+0x8000($6)
+ # CHECK: :[[@LINE-1]]:3: error: macro instruction uses large offset, which is not currently supported
+ dla $5, symbol-0x8001($6)
+ # CHECK: :[[@LINE-1]]:3: error: macro instruction uses large offset, which is not currently supported
+ dla $25, symbol+0x8000
+ # CHECK: :[[@LINE-1]]:3: error: macro instruction uses large offset, which is not currently supported
+ dla $25, symbol-0x8001
+ # CHECK: :[[@LINE-1]]:3: error: macro instruction uses large offset, which is not currently supported
+ dla $25, symbol+0x8000($6)
+ # CHECK: :[[@LINE-1]]:3: error: macro instruction uses large offset, which is not currently supported
+ dla $25, symbol-0x8001($6)
+ # CHECK: :[[@LINE-1]]:3: error: macro instruction uses large offset, which is not currently supported
diff --git a/test/MC/Mips/macro-dla-pic.s b/test/MC/Mips/macro-dla-pic.s
new file mode 100644
index 000000000000..ed5aa202618e
--- /dev/null
+++ b/test/MC/Mips/macro-dla-pic.s
@@ -0,0 +1,50 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips3 | \
+# RUN: FileCheck %s
+
+.option pic2
+dla $5, symbol # CHECK: ld $5, %got_disp(symbol)($gp) # encoding: [0xdf,0x85,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(symbol), kind: fixup_Mips_GOT_DISP
+dla $5, symbol($6) # CHECK: ld $5, %got_disp(symbol)($gp) # encoding: [0xdf,0x85,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(symbol), kind: fixup_Mips_GOT_DISP
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $6, symbol($6) # CHECK: ld $1, %got_disp(symbol)($gp) # encoding: [0xdf,0x81,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(symbol), kind: fixup_Mips_GOT_DISP
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $5, symbol+8 # CHECK: ld $5, %got_disp(symbol)($gp) # encoding: [0xdf,0x85,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(symbol), kind: fixup_Mips_GOT_DISP
+ # CHECK: daddiu $5, $5, 8 # encoding: [0x64,0xa5,0x00,0x08]
+dla $5, symbol+8($6) # CHECK: ld $5, %got_disp(symbol)($gp) # encoding: [0xdf,0x85,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(symbol), kind: fixup_Mips_GOT_DISP
+ # CHECK: daddiu $5, $5, 8 # encoding: [0x64,0xa5,0x00,0x08]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $6, symbol+8($6) # CHECK: ld $1, %got_disp(symbol)($gp) # encoding: [0xdf,0x81,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(symbol), kind: fixup_Mips_GOT_DISP
+ # CHECK: daddiu $1, $1, 8 # encoding: [0x64,0x21,0x00,0x08]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $5, 1f # CHECK: ld $5, %got_disp(.Ltmp0)($gp) # encoding: [0xdf,0x85,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(.Ltmp0), kind: fixup_Mips_GOT_DISP
+1:
+
+# PIC expansions involving $25 are special.
+dla $25, symbol # CHECK: ld $25, %call16(symbol)($gp) # encoding: [0xdf,0x99,A,A]
+ # CHECK: # fixup A - offset: 0, value: %call16(symbol), kind: fixup_Mips_CALL16
+dla $25, symbol($6) # CHECK: ld $25, %got_disp(symbol)($gp) # encoding: [0xdf,0x99,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(symbol), kind: fixup_Mips_GOT_DISP
+ # CHECK: daddu $25, $25, $6 # encoding: [0x03,0x26,0xc8,0x2d]
+dla $25, symbol($25) # CHECK: ld $1, %got_disp(symbol)($gp) # encoding: [0xdf,0x81,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(symbol), kind: fixup_Mips_GOT_DISP
+ # CHECK: daddu $25, $1, $25 # encoding: [0x00,0x39,0xc8,0x2d]
+dla $25, symbol+8 # CHECK: ld $25, %got_disp(symbol)($gp) # encoding: [0xdf,0x99,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(symbol), kind: fixup_Mips_GOT_DISP
+ # CHECK: daddiu $25, $25, 8 # encoding: [0x67,0x39,0x00,0x08]
+dla $25, symbol+8($6) # CHECK: ld $25, %got_disp(symbol)($gp) # encoding: [0xdf,0x99,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(symbol), kind: fixup_Mips_GOT_DISP
+ # CHECK: daddiu $25, $25, 8 # encoding: [0x67,0x39,0x00,0x08]
+ # CHECK: daddu $25, $25, $6 # encoding: [0x03,0x26,0xc8,0x2d]
+dla $25, symbol+8($25)# CHECK: ld $1, %got_disp(symbol)($gp) # encoding: [0xdf,0x81,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(symbol), kind: fixup_Mips_GOT_DISP
+ # CHECK: daddiu $1, $1, 8 # encoding: [0x64,0x21,0x00,0x08]
+ # CHECK: daddu $25, $1, $25 # encoding: [0x00,0x39,0xc8,0x2d]
+dla $25, 1f # CHECK: ld $25, %got_disp(.Ltmp1)($gp) # encoding: [0xdf,0x99,A,A]
+ # CHECK: # fixup A - offset: 0, value: %got_disp(.Ltmp1), kind: fixup_Mips_GOT_DISP
+1:
diff --git a/test/MC/Mips/micromips64r6/valid.s b/test/MC/Mips/micromips64r6/valid.s
index d757384344d4..3ead62fc6169 100644
--- a/test/MC/Mips/micromips64r6/valid.s
+++ b/test/MC/Mips/micromips64r6/valid.s
@@ -24,6 +24,10 @@ a:
dextm $9, $6, 3, 39 # CHECK: dextm $9, $6, 3, 39 # encoding: [0x59,0x26,0x30,0xe4]
dextu $9, $6, 35, 7 # CHECK: dextu $9, $6, 35, 7 # encoding: [0x59,0x26,0x30,0xd4]
dalign $4, $2, $3, 5 # CHECK: dalign $4, $2, $3, 5 # encoding: [0x58,0x43,0x25,0x1c]
+ dsll $4, $5 # CHECK: dsllv $4, $4, $5 # encoding: [0x58,0x85,0x20,0x10]
+ dsll $4, $4, $5 # CHECK: dsllv $4, $4, $5 # encoding: [0x58,0x85,0x20,0x10]
+ dsrl $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x58,0x85,0x20,0x50]
+ dsrl $4, $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x58,0x85,0x20,0x50]
ldpc $2, 16 # CHECK: ldpc $2, 16 # encoding: [0x78,0x58,0x00,0x02]
lw $3, 32($gp) # CHECK: lw $3, 32($gp) # encoding: [0x65,0x88]
lw $3, 24($sp) # CHECK: lw $3, 24($sp) # encoding: [0x48,0x66]
diff --git a/test/MC/Mips/mips3/valid.s b/test/MC/Mips/mips3/valid.s
index 1f0e22dbe3f9..694952b3a2ba 100644
--- a/test/MC/Mips/mips3/valid.s
+++ b/test/MC/Mips/mips3/valid.s
@@ -83,6 +83,8 @@ a:
dsll $zero,18 # CHECK: dsll $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xb8]
dsll $zero,$s4,18 # CHECK: dsll $zero, $20, 18 # encoding: [0x00,0x14,0x04,0xb8]
dsll $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
+ dsll $4, $5 # CHECK: dsllv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x14]
+ dsll $4, $5, $5 # CHECK: dsllv $4, $5, $5 # encoding: [0x00,0xa5,0x20,0x14]
dsll32 $zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsll32 $zero,$zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsllv $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
@@ -95,6 +97,8 @@ a:
dsrl $s3,23 # CHECK: dsrl $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfa]
dsrl $s3,$6,23 # CHECK: dsrl $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfa]
dsrl $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
+ dsrl $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
+ dsrl $4, $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
dsrl32 $s3,23 # CHECK: dsrl32 $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfe]
dsrl32 $s3,$6,23 # CHECK: dsrl32 $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfe]
dsrlv $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index 69cea599e748..68f269c1c8dd 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -115,6 +115,8 @@ a:
dsll $zero,18 # CHECK: dsll $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xb8]
dsll $zero,$s4,18 # CHECK: dsll $zero, $20, 18 # encoding: [0x00,0x14,0x04,0xb8]
dsll $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
+ dsll $4, $5 # CHECK: dsllv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x14]
+ dsll $4, $5, $5 # CHECK: dsllv $4, $5, $5 # encoding: [0x00,0xa5,0x20,0x14]
dsll32 $zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsll32 $zero,$zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsllv $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
@@ -127,6 +129,8 @@ a:
dsrl $s3,23 # CHECK: dsrl $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfa]
dsrl $s3,$6,23 # CHECK: dsrl $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfa]
dsrl $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
+ dsrl $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
+ dsrl $4, $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
dsrl32 $s3,23 # CHECK: dsrl32 $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfe]
dsrl32 $s3,$6,23 # CHECK: dsrl32 $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfe]
dsrlv $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index 85fdfb507aad..ad58a15ca8ce 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -115,6 +115,8 @@ a:
dsll $zero,18 # CHECK: dsll $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xb8]
dsll $zero,$s4,18 # CHECK: dsll $zero, $20, 18 # encoding: [0x00,0x14,0x04,0xb8]
dsll $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
+ dsll $4, $5 # CHECK: dsllv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x14]
+ dsll $4, $5, $5 # CHECK: dsllv $4, $5, $5 # encoding: [0x00,0xa5,0x20,0x14]
dsll32 $zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsll32 $zero,$zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsllv $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
@@ -127,6 +129,8 @@ a:
dsrl $s3,23 # CHECK: dsrl $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfa]
dsrl $s3,$6,23 # CHECK: dsrl $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfa]
dsrl $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
+ dsrl $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
+ dsrl $4, $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
dsrl32 $s3,23 # CHECK: dsrl32 $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfe]
dsrl32 $s3,$6,23 # CHECK: dsrl32 $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfe]
dsrlv $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index 716488df7b50..d40b784a789d 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -122,6 +122,8 @@ a:
dsll $zero,18 # CHECK: dsll $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xb8]
dsll $zero,$s4,18 # CHECK: dsll $zero, $20, 18 # encoding: [0x00,0x14,0x04,0xb8]
dsll $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
+ dsll $4, $5 # CHECK: dsllv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x14]
+ dsll $4, $5, $5 # CHECK: dsllv $4, $5, $5 # encoding: [0x00,0xa5,0x20,0x14]
dsll32 $zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsll32 $zero,$zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsllv $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
@@ -134,6 +136,8 @@ a:
dsrl $s3,23 # CHECK: dsrl $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfa]
dsrl $s3,$6,23 # CHECK: dsrl $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfa]
dsrl $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
+ dsrl $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
+ dsrl $4, $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
dsrl32 $s3,23 # CHECK: dsrl32 $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfe]
dsrl32 $s3,$6,23 # CHECK: dsrl32 $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfe]
dsrlv $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index 656b76c758eb..0826391fcaaf 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -131,6 +131,8 @@ a:
dsll $zero,18 # CHECK: dsll $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xb8]
dsll $zero,$s4,18 # CHECK: dsll $zero, $20, 18 # encoding: [0x00,0x14,0x04,0xb8]
dsll $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
+ dsll $4, $5 # CHECK: dsllv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x14]
+ dsll $4, $5, $5 # CHECK: dsllv $4, $5, $5 # encoding: [0x00,0xa5,0x20,0x14]
dsll32 $zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsll32 $zero,$zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsllv $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
@@ -143,6 +145,8 @@ a:
dsrl $s3,23 # CHECK: dsrl $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfa]
dsrl $s3,$6,23 # CHECK: dsrl $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfa]
dsrl $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
+ dsrl $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
+ dsrl $4, $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
dsrl32 $s3,23 # CHECK: dsrl32 $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfe]
dsrl32 $s3,$6,23 # CHECK: dsrl32 $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfe]
dsrlv $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
diff --git a/test/MC/Mips/mips64r3/valid.s b/test/MC/Mips/mips64r3/valid.s
index 52d44da8f56f..4bdef443d70b 100644
--- a/test/MC/Mips/mips64r3/valid.s
+++ b/test/MC/Mips/mips64r3/valid.s
@@ -131,6 +131,8 @@ a:
dsll $zero,18 # CHECK: dsll $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xb8]
dsll $zero,$s4,18 # CHECK: dsll $zero, $20, 18 # encoding: [0x00,0x14,0x04,0xb8]
dsll $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
+ dsll $4, $5 # CHECK: dsllv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x14]
+ dsll $4, $5, $5 # CHECK: dsllv $4, $5, $5 # encoding: [0x00,0xa5,0x20,0x14]
dsll32 $zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsll32 $zero,$zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsllv $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
@@ -143,6 +145,8 @@ a:
dsrl $s3,23 # CHECK: dsrl $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfa]
dsrl $s3,$6,23 # CHECK: dsrl $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfa]
dsrl $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
+ dsrl $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
+ dsrl $4, $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
dsrl32 $s3,23 # CHECK: dsrl32 $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfe]
dsrl32 $s3,$6,23 # CHECK: dsrl32 $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfe]
dsrlv $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
diff --git a/test/MC/Mips/mips64r5/valid.s b/test/MC/Mips/mips64r5/valid.s
index f400436b696f..4215f67daa0c 100644
--- a/test/MC/Mips/mips64r5/valid.s
+++ b/test/MC/Mips/mips64r5/valid.s
@@ -131,6 +131,8 @@ a:
dsll $zero,18 # CHECK: dsll $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xb8]
dsll $zero,$s4,18 # CHECK: dsll $zero, $20, 18 # encoding: [0x00,0x14,0x04,0xb8]
dsll $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
+ dsll $4, $5 # CHECK: dsllv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x14]
+ dsll $4, $5, $5 # CHECK: dsllv $4, $5, $5 # encoding: [0x00,0xa5,0x20,0x14]
dsll32 $zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsll32 $zero,$zero,18 # CHECK: dsll32 $zero, $zero, 18 # encoding: [0x00,0x00,0x04,0xbc]
dsllv $zero,$s4,$12 # CHECK: dsllv $zero, $20, $12 # encoding: [0x01,0x94,0x00,0x14]
@@ -143,6 +145,8 @@ a:
dsrl $s3,23 # CHECK: dsrl $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfa]
dsrl $s3,$6,23 # CHECK: dsrl $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfa]
dsrl $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
+ dsrl $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
+ dsrl $4, $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
dsrl32 $s3,23 # CHECK: dsrl32 $19, $19, 23 # encoding: [0x00,0x13,0x9d,0xfe]
dsrl32 $s3,$6,23 # CHECK: dsrl32 $19, $6, 23 # encoding: [0x00,0x06,0x9d,0xfe]
dsrlv $s3,$6,$s4 # CHECK: dsrlv $19, $6, $20 # encoding: [0x02,0x86,0x98,0x16]
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index a86b3c915e85..486738baa84d 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -125,9 +125,13 @@ a:
dmuhu $2,$3,$4 # CHECK: dmuhu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdd]
dmul $2,$3,$4 # CHECK: dmul $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9c]
dmulu $2,$3,$4 # CHECK: dmulu $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9d]
- dneg $2 # CHECK: dneg $2, $2 # encoding: [0x00,0x02,0x10,0x2e]
- dneg $2,$3 # CHECK: dneg $2, $3 # encoding: [0x00,0x03,0x10,0x2e]
- dnegu $2,$3 # CHECK: dnegu $2, $3 # encoding: [0x00,0x03,0x10,0x2f]
+ dneg $2 # CHECK: dneg $2, $2 # encoding: [0x00,0x02,0x10,0x2e]
+ dneg $2,$3 # CHECK: dneg $2, $3 # encoding: [0x00,0x03,0x10,0x2e]
+ dnegu $2,$3 # CHECK: dnegu $2, $3 # encoding: [0x00,0x03,0x10,0x2f]
+ dsll $4, $5 # CHECK: dsllv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x14]
+ dsll $4, $5, $5 # CHECK: dsllv $4, $5, $5 # encoding: [0x00,0xa5,0x20,0x14]
+ dsrl $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
+ dsrl $4, $4, $5 # CHECK: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
dsubu $14,-4586 # CHECK: daddiu $14, $14, 4586 # encoding: [0x65,0xce,0x11,0xea]
dsubu $15,$11,5025 # CHECK: daddiu $15, $11, -5025 # encoding: [0x65,0x6f,0xec,0x5f]
dvp $4 # CHECK: dvp $4 # encoding: [0x41,0x64,0x00,0x24]
diff --git a/test/MC/SystemZ/insn-bad-z196.s b/test/MC/SystemZ/insn-bad-z196.s
index 78d50bca9746..33059529f74b 100644
--- a/test/MC/SystemZ/insn-bad-z196.s
+++ b/test/MC/SystemZ/insn-bad-z196.s
@@ -43,6 +43,22 @@
aih %r0, (1 << 31)
#CHECK: error: invalid operand
+#CHECK: alsih %r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: alsih %r0, (1 << 31)
+
+ alsih %r0, (-1 << 31) - 1
+ alsih %r0, (1 << 31)
+
+#CHECK: error: invalid operand
+#CHECK: alsihn %r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: alsihn %r0, (1 << 31)
+
+ alsihn %r0, (-1 << 31) - 1
+ alsihn %r0, (1 << 31)
+
+#CHECK: error: invalid operand
#CHECK: axtra %f0, %f0, %f0, -1
#CHECK: error: invalid operand
#CHECK: axtra %f0, %f0, %f0, 16
@@ -592,6 +608,11 @@
clih %r0, -1
clih %r0, (1 << 32)
+#CHECK: error: instruction requires: enhanced-dat-2
+#CHECK: crdte %r0, %r0, %r0, 0
+
+ crdte %r0, %r0, %r0, 0
+
#CHECK: error: invalid operand
#CHECK: cxfbra %f0, 0, %r0, -1
#CHECK: error: invalid operand
diff --git a/test/MC/SystemZ/insn-bad-zEC12.s b/test/MC/SystemZ/insn-bad-zEC12.s
index 80197a3c1ef1..552180a7c610 100644
--- a/test/MC/SystemZ/insn-bad-zEC12.s
+++ b/test/MC/SystemZ/insn-bad-zEC12.s
@@ -162,6 +162,20 @@
cpxt %f0, 0(1), 0
+#CHECK: error: invalid register pair
+#CHECK: crdte %r1, %r0, %r0, 0
+#CHECK: error: invalid register pair
+#CHECK: crdte %r0, %r0, %r1, 0
+#CHECK: error: invalid operand
+#CHECK: crdte %r0, %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: crdte %r0, %r0, %r0, 16
+
+ crdte %r1, %r0, %r0, 0
+ crdte %r0, %r0, %r1, 0
+ crdte %r0, %r0, %r0, -1
+ crdte %r0, %r0, %r0, 16
+
#CHECK: error: instruction requires: dfp-packed-conversion
#CHECK: cxpt %f0, 0(1), 0
diff --git a/test/MC/SystemZ/insn-bad.s b/test/MC/SystemZ/insn-bad.s
index 259ad05e5f4a..57c69f60361b 100644
--- a/test/MC/SystemZ/insn-bad.s
+++ b/test/MC/SystemZ/insn-bad.s
@@ -127,6 +127,16 @@
ah %r0, -1
ah %r0, 4096
+#CHECK: error: instruction requires: high-word
+#CHECK: ahhhr %r0, %r0, %r0
+
+ ahhhr %r0, %r0, %r0
+
+#CHECK: error: instruction requires: high-word
+#CHECK: ahhlr %r0, %r0, %r0
+
+ ahhlr %r0, %r0, %r0
+
#CHECK: error: invalid operand
#CHECK: ahi %r0, -32769
#CHECK: error: invalid operand
@@ -222,6 +232,16 @@
algrk %r2,%r3,%r4
+#CHECK: error: instruction requires: high-word
+#CHECK: alhhhr %r0, %r0, %r0
+
+ alhhhr %r0, %r0, %r0
+
+#CHECK: error: instruction requires: high-word
+#CHECK: alhhlr %r0, %r0, %r0
+
+ alhhlr %r0, %r0, %r0
+
#CHECK: error: instruction requires: distinct-ops
#CHECK: alhsik %r1, %r2, 3
@@ -266,6 +286,16 @@
alsi 0, -129
alsi 0, 128
+#CHECK: error: instruction requires: high-word
+#CHECK: alsih %r0, 0
+
+ alsih %r0, 0
+
+#CHECK: error: instruction requires: high-word
+#CHECK: alsihn %r0, 0
+
+ alsihn %r0, 0
+
#CHECK: error: invalid operand
#CHECK: aly %r0, -524289
#CHECK: error: invalid operand
@@ -1176,6 +1206,11 @@
chf %r0, 0
+#CHECK: error: instruction requires: high-word
+#CHECK: chhr %r0, %r0
+
+ chhr %r0, %r0
+
#CHECK: error: invalid operand
#CHECK: chhsi -1, 0
#CHECK: error: invalid operand
@@ -1204,6 +1239,11 @@
chi %r0, 32768
chi %r0, foo
+#CHECK: error: instruction requires: high-word
+#CHECK: chlr %r0, %r0
+
+ chlr %r0, %r0
+
#CHECK: error: offset out of range
#CHECK: chrl %r0, -0x1000000002
#CHECK: error: offset out of range
@@ -1628,6 +1668,11 @@
clhf %r0, 0
+#CHECK: error: instruction requires: high-word
+#CHECK: clhhr %r0, %r0
+
+ clhhr %r0, %r0
+
#CHECK: error: invalid operand
#CHECK: clhhsi -1, 0
#CHECK: error: invalid operand
@@ -1645,6 +1690,11 @@
clhhsi 0, -1
clhhsi 0, 65536
+#CHECK: error: instruction requires: high-word
+#CHECK: clhlr %r0, %r0
+
+ clhlr %r0, %r0
+
#CHECK: error: offset out of range
#CHECK: clhrl %r0, -0x1000000002
#CHECK: error: offset out of range
@@ -1957,6 +2007,16 @@
csg %r0, %r0, 524288
csg %r0, %r0, 0(%r1,%r2)
+#CHECK: error: invalid register pair
+#CHECK: csp %r1, %r0
+
+ csp %r1, %r0
+
+#CHECK: error: invalid register pair
+#CHECK: cspg %r1, %r0
+
+ cspg %r1, %r0
+
#CHECK: error: invalid use of indexed addressing
#CHECK: csst 160(%r1,%r15), 160(%r15), %r2
#CHECK: error: invalid operand
@@ -2325,6 +2385,17 @@
deb %f0, 4096
#CHECK: error: invalid operand
+#CHECK: diag %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: diag %r0, %r0, 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: diag %r0, %r0, 0(%r1,%r2)
+
+ diag %r0, %r0, -1
+ diag %r0, %r0, 4096
+ diag %r0, %r0, 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: didbr %f0, %f0, %f0, -1
#CHECK: error: invalid operand
#CHECK: didbr %f0, %f0, %f0, 16
@@ -2619,6 +2690,11 @@
eextr %f2, %f0
#CHECK: error: invalid register pair
+#CHECK: esta %r1, %r0
+
+ esta %r1, %r0
+
+#CHECK: error: invalid register pair
#CHECK: esxtr %f0, %f2
#CHECK: error: invalid register pair
#CHECK: esxtr %f2, %f0
@@ -2784,6 +2860,14 @@
icy %r0, -524289
icy %r0, 524288
+#CHECK: error: invalid operand
+#CHECK: idte %r0, %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: idte %r0, %r0, %r0, 16
+
+ idte %r0, %r0, %r0, -1
+ idte %r0, %r0, %r0, 16
+
#CHECK: error: invalid register pair
#CHECK: iextr %f0, %f0, %f2
#CHECK: error: invalid register pair
@@ -2844,6 +2928,14 @@
iill %r0, 0x10000
#CHECK: error: invalid operand
+#CHECK: ipte %r0, %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: ipte %r0, %r0, %r0, 16
+
+ ipte %r0, %r0, %r0, -1
+ ipte %r0, %r0, %r0, 16
+
+#CHECK: error: invalid operand
#CHECK: kdb %f0, -1
#CHECK: error: invalid operand
#CHECK: kdb %f0, 4096
@@ -3018,6 +3110,23 @@
larl %r0, 1
larl %r0, 0x100000000
+#CHECK: error: invalid use of indexed addressing
+#CHECK: lasp 160(%r1,%r15),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: lasp -1(%r1),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: lasp 4096(%r1),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: lasp 0(%r1),-1(%r15)
+#CHECK: error: invalid operand
+#CHECK: lasp 0(%r1),4096(%r15)
+
+ lasp 160(%r1,%r15),160(%r15)
+ lasp -1(%r1),160(%r15)
+ lasp 4096(%r1),160(%r15)
+ lasp 0(%r1),-1(%r15)
+ lasp 0(%r1),4096(%r15)
+
#CHECK: error: instruction requires: interlocked-access1
#CHECK: lax %r1, %r2, 100(%r3)
lax %r1, %r2, 100(%r3)
@@ -3047,6 +3156,39 @@
lbh %r0, 0
+#CHECK: error: invalid operand
+#CHECK: lcctl -1
+#CHECK: error: invalid operand
+#CHECK: lcctl 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: lcctl 0(%r1,%r2)
+
+ lcctl -1
+ lcctl 4096
+ lcctl 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: lctl %c0, %c0, -1
+#CHECK: error: invalid operand
+#CHECK: lctl %c0, %c0, 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: lctl %c0, %c0, 0(%r1,%r2)
+
+ lctl %c0, %c0, -1
+ lctl %c0, %c0, 4096
+ lctl %c0, %c0, 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: lctlg %c0, %c0, -524289
+#CHECK: error: invalid operand
+#CHECK: lctlg %c0, %c0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: lctlg %c0, %c0, 0(%r1,%r2)
+
+ lctlg %c0, %c0, -524289
+ lctlg %c0, %c0, 524288
+ lctlg %c0, %c0, 0(%r1,%r2)
+
#CHECK: error: invalid register pair
#CHECK: lcxbr %f0, %f2
#CHECK: error: invalid register pair
@@ -3574,6 +3716,17 @@
lnxr %f0, %f2
lnxr %f2, %f0
+#CHECK: error: invalid operand
+#CHECK: lpctl -1
+#CHECK: error: invalid operand
+#CHECK: lpctl 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: lpctl 0(%r1,%r2)
+
+ lpctl -1
+ lpctl 4096
+ lpctl 0(%r1,%r2)
+
#CHECK: error: instruction requires: interlocked-access1
#CHECK: lpd %r0, 0, 0
lpd %r0, 0, 0
@@ -3582,6 +3735,17 @@
#CHECK: lpdg %r0, 0, 0
lpdg %r0, 0, 0
+#CHECK: error: invalid operand
+#CHECK: lpp -1
+#CHECK: error: invalid operand
+#CHECK: lpp 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: lpp 0(%r1,%r2)
+
+ lpp -1
+ lpp 4096
+ lpp 0(%r1,%r2)
+
#CHECK: error: invalid register pair
#CHECK: lpq %r1, 0
#CHECK: error: invalid operand
@@ -3593,6 +3757,36 @@
lpq %r0, -524289
lpq %r0, 524288
+#CHECK: error: invalid operand
+#CHECK: lptea %r0, %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: lptea %r0, %r0, %r0, 16
+
+ lptea %r0, %r0, %r0, -1
+ lptea %r0, %r0, %r0, 16
+
+#CHECK: error: invalid operand
+#CHECK: lpsw -1
+#CHECK: error: invalid operand
+#CHECK: lpsw 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: lpsw 0(%r1,%r2)
+
+ lpsw -1
+ lpsw 4096
+ lpsw 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: lpswe -1
+#CHECK: error: invalid operand
+#CHECK: lpswe 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: lpswe 0(%r1,%r2)
+
+ lpswe -1
+ lpswe 4096
+ lpswe 0(%r1,%r2)
+
#CHECK: error: invalid register pair
#CHECK: lpxbr %f0, %f2
#CHECK: error: invalid register pair
@@ -3609,6 +3803,30 @@
lpxr %f0, %f2
lpxr %f2, %f0
+#CHECK: error: invalid operand
+#CHECK: lra %r0, -1
+#CHECK: error: invalid operand
+#CHECK: lra %r0, 4096
+
+ lra %r0, -1
+ lra %r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: lrag %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lrag %r0, 524288
+
+ lrag %r0, -524289
+ lrag %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lray %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lray %r0, 524288
+
+ lray %r0, -524289
+ lray %r0, 524288
+
#CHECK: error: invalid register pair
#CHECK: lrdr %f0, %f2
@@ -3645,6 +3863,17 @@
lrvg %r0, 524288
#CHECK: error: invalid operand
+#CHECK: lsctl -1
+#CHECK: error: invalid operand
+#CHECK: lsctl 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: lsctl 0(%r1,%r2)
+
+ lsctl -1
+ lsctl 4096
+ lsctl 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: lt %r0, -524289
#CHECK: error: invalid operand
#CHECK: lt %r0, 524288
@@ -4089,6 +4318,17 @@
ms %r0, 4096
#CHECK: error: invalid operand
+#CHECK: msch -1
+#CHECK: error: invalid operand
+#CHECK: msch 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: msch 0(%r1,%r2)
+
+ msch -1
+ msch 4096
+ msch 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: msd %f0, %f0, -1
#CHECK: error: invalid operand
#CHECK: msd %f0, %f0, 4096
@@ -4152,6 +4392,11 @@
msgfi %r0, (-1 << 31) - 1
msgfi %r0, (1 << 31)
+#CHECK: error: invalid register pair
+#CHECK: msta %r1
+
+ msta %r1
+
#CHECK: error: invalid operand
#CHECK: msy %r0, -524289
#CHECK: error: invalid operand
@@ -4204,6 +4449,23 @@
mvc 0(1,%r2), 0(%r1,%r2)
mvc 0(-), 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvcdk 160(%r1,%r15),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: mvcdk -1(%r1),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: mvcdk 4096(%r1),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: mvcdk 0(%r1),-1(%r15)
+#CHECK: error: invalid operand
+#CHECK: mvcdk 0(%r1),4096(%r15)
+
+ mvcdk 160(%r1,%r15),160(%r15)
+ mvcdk -1(%r1),160(%r15)
+ mvcdk 4096(%r1),160(%r15)
+ mvcdk 0(%r1),-1(%r15)
+ mvcdk 0(%r1),4096(%r15)
+
#CHECK: error: missing length in address
#CHECK: mvcin 0, 0
#CHECK: error: missing length in address
@@ -4313,6 +4575,98 @@
mvclu %r0, %r0, -524289
mvclu %r0, %r0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvcos 160(%r1,%r15), 160(%r15), %r2
+#CHECK: error: invalid operand
+#CHECK: mvcos -1(%r1), 160(%r15), %r2
+#CHECK: error: invalid operand
+#CHECK: mvcos 4096(%r1), 160(%r15), %r2
+#CHECK: error: invalid operand
+#CHECK: mvcos 0(%r1), -1(%r15), %r2
+#CHECK: error: invalid operand
+#CHECK: mvcos 0(%r1), 4096(%r15), %r2
+
+ mvcos 160(%r1,%r15), 160(%r15), %r2
+ mvcos -1(%r1), 160(%r15), %r2
+ mvcos 4096(%r1), 160(%r15), %r2
+ mvcos 0(%r1), -1(%r15), %r2
+ mvcos 0(%r1), 4096(%r15), %r2
+
+#CHECK: error: invalid use of length addressing
+#CHECK: mvcp 0(%r1,%r1), 0(2,%r1), %r3
+#CHECK: error: invalid operand
+#CHECK: mvcp -1(%r1,%r1), 0(%r1), %r3
+#CHECK: error: invalid operand
+#CHECK: mvcp 4096(%r1,%r1), 0(%r1), %r3
+#CHECK: error: invalid operand
+#CHECK: mvcp 0(%r1,%r1), -1(%r1), %r3
+#CHECK: error: invalid operand
+#CHECK: mvcp 0(%r1,%r1), 4096(%r1), %r3
+#CHECK: error: %r0 used in an address
+#CHECK: mvcp 0(%r1,%r0), 0(%r1), %r3
+#CHECK: error: %r0 used in an address
+#CHECK: mvcp 0(%r1,%r1), 0(%r0), %r3
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvcp 0(%r1,%r2), 0(%r1,%r2), %r3
+#CHECK: error: unknown token in expression
+#CHECK: mvcp 0(-), 0, %r3
+
+ mvcp 0(%r1,%r1), 0(2,%r1), %r3
+ mvcp -1(%r1,%r1), 0(%r1), %r3
+ mvcp 4096(%r1,%r1), 0(%r1), %r3
+ mvcp 0(%r1,%r1), -1(%r1), %r3
+ mvcp 0(%r1,%r1), 4096(%r1), %r3
+ mvcp 0(%r1,%r0), 0(%r1), %r3
+ mvcp 0(%r1,%r1), 0(%r0), %r3
+ mvcp 0(%r1,%r2), 0(%r1,%r2), %r3
+ mvcp 0(-), 0, %r3
+
+#CHECK: error: invalid use of length addressing
+#CHECK: mvcs 0(%r1,%r1), 0(2,%r1), %r3
+#CHECK: error: invalid operand
+#CHECK: mvcs -1(%r1,%r1), 0(%r1), %r3
+#CHECK: error: invalid operand
+#CHECK: mvcs 4096(%r1,%r1), 0(%r1), %r3
+#CHECK: error: invalid operand
+#CHECK: mvcs 0(%r1,%r1), -1(%r1), %r3
+#CHECK: error: invalid operand
+#CHECK: mvcs 0(%r1,%r1), 4096(%r1), %r3
+#CHECK: error: %r0 used in an address
+#CHECK: mvcs 0(%r1,%r0), 0(%r1), %r3
+#CHECK: error: %r0 used in an address
+#CHECK: mvcs 0(%r1,%r1), 0(%r0), %r3
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvcs 0(%r1,%r2), 0(%r1,%r2), %r3
+#CHECK: error: unknown token in expression
+#CHECK: mvcs 0(-), 0, %r3
+
+ mvcs 0(%r1,%r1), 0(2,%r1), %r3
+ mvcs -1(%r1,%r1), 0(%r1), %r3
+ mvcs 4096(%r1,%r1), 0(%r1), %r3
+ mvcs 0(%r1,%r1), -1(%r1), %r3
+ mvcs 0(%r1,%r1), 4096(%r1), %r3
+ mvcs 0(%r1,%r0), 0(%r1), %r3
+ mvcs 0(%r1,%r1), 0(%r0), %r3
+ mvcs 0(%r1,%r2), 0(%r1,%r2), %r3
+ mvcs 0(-), 0, %r3
+
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvcsk 160(%r1,%r15),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: mvcsk -1(%r1),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: mvcsk 4096(%r1),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: mvcsk 0(%r1),-1(%r15)
+#CHECK: error: invalid operand
+#CHECK: mvcsk 0(%r1),4096(%r15)
+
+ mvcsk 160(%r1,%r15),160(%r15)
+ mvcsk -1(%r1),160(%r15)
+ mvcsk 4096(%r1),160(%r15)
+ mvcsk 0(%r1),-1(%r15)
+ mvcsk 0(%r1),4096(%r15)
+
#CHECK: error: invalid operand
#CHECK: mvghi -1, 0
#CHECK: error: invalid operand
@@ -5008,11 +5362,27 @@
pack 0(1,%r2), 0(%r1,%r2)
pack 0(-), 0(1)
+#CHECK: error: invalid operand
+#CHECK: pc -1
+#CHECK: error: invalid operand
+#CHECK: pc 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: pc 0(%r1,%r2)
+
+ pc -1
+ pc 4096
+ pc 0(%r1,%r2)
+
#CHECK: error: instruction requires: message-security-assist-extension4
#CHECK: pcc
pcc
+#CHECK: error: instruction requires: message-security-assist-extension3
+#CHECK: pckmo
+
+ pckmo
+
#CHECK: error: invalid operand
#CHECK: pfd -1, 0
#CHECK: error: invalid operand
@@ -5187,6 +5557,28 @@
qaxtr %f2, %f0, %f0, 0
#CHECK: error: invalid operand
+#CHECK: qctri -1
+#CHECK: error: invalid operand
+#CHECK: qctri 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: qctri 0(%r1,%r2)
+
+ qctri -1
+ qctri 4096
+ qctri 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: qsi -1
+#CHECK: error: invalid operand
+#CHECK: qsi 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: qsi 0(%r1,%r2)
+
+ qsi -1
+ qsi 4096
+ qsi 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: risbg %r0,%r0,0,0,-1
#CHECK: error: invalid operand
#CHECK: risbg %r0,%r0,0,0,64
@@ -5285,6 +5677,22 @@
rosbg %r0,%r0,256,0,0
#CHECK: error: invalid operand
+#CHECK: rp -1
+#CHECK: error: invalid operand
+#CHECK: rp 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: rp 0(%r1,%r2)
+
+ rp -1
+ rp 4096
+ rp 0(%r1,%r2)
+
+#CHECK: error: instruction requires: reset-reference-bits-multiple
+#CHECK: rrbm %r0, %r0
+
+ rrbm %r0, %r0
+
+#CHECK: error: invalid operand
#CHECK: rrdtr %f0, %f0, %f0, -1
#CHECK: error: invalid operand
#CHECK: rrdtr %f0, %f0, %f0, 16
@@ -5338,6 +5746,50 @@
s %r0, 4096
#CHECK: error: invalid operand
+#CHECK: sac -1
+#CHECK: error: invalid operand
+#CHECK: sac 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sac 0(%r1,%r2)
+
+ sac -1
+ sac 4096
+ sac 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: sacf -1
+#CHECK: error: invalid operand
+#CHECK: sacf 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sacf 0(%r1,%r2)
+
+ sacf -1
+ sacf 4096
+ sacf 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: sck -1
+#CHECK: error: invalid operand
+#CHECK: sck 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sck 0(%r1,%r2)
+
+ sck -1
+ sck 4096
+ sck 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: sckc -1
+#CHECK: error: invalid operand
+#CHECK: sckc 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sckc 0(%r1,%r2)
+
+ sckc -1
+ sckc 4096
+ sckc 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: sd %f0, -1
#CHECK: error: invalid operand
#CHECK: sd %f0, 4096
@@ -5403,6 +5855,16 @@
sh %r0, -1
sh %r0, 4096
+#CHECK: error: instruction requires: high-word
+#CHECK: shhhr %r0, %r0, %r0
+
+ shhhr %r0, %r0, %r0
+
+#CHECK: error: instruction requires: high-word
+#CHECK: shhlr %r0, %r0, %r0
+
+ shhlr %r0, %r0, %r0
+
#CHECK: error: invalid operand
#CHECK: shy %r0, -524289
#CHECK: error: invalid operand
@@ -5412,6 +5874,39 @@
shy %r0, 524288
#CHECK: error: invalid operand
+#CHECK: sie -1
+#CHECK: error: invalid operand
+#CHECK: sie 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sie 0(%r1,%r2)
+
+ sie -1
+ sie 4096
+ sie 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: siga -1
+#CHECK: error: invalid operand
+#CHECK: siga 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: siga 0(%r1,%r2)
+
+ siga -1
+ siga 4096
+ siga 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: sigp %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: sigp %r0, %r0, 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sigp %r0, %r0, 0(%r1,%r2)
+
+ sigp %r0, %r0, -1
+ sigp %r0, %r0, 4096
+ sigp %r0, %r0, 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: sl %r0, -1
#CHECK: error: invalid operand
#CHECK: sl %r0, 4096
@@ -5547,6 +6042,16 @@
slgrk %r2,%r3,%r4
+#CHECK: error: instruction requires: high-word
+#CHECK: slhhhr %r0, %r0, %r0
+
+ slhhhr %r0, %r0, %r0
+
+#CHECK: error: instruction requires: high-word
+#CHECK: slhhlr %r0, %r0, %r0
+
+ slhhlr %r0, %r0, %r0
+
#CHECK: error: invalid operand
#CHECK: sll %r0,-1
#CHECK: error: invalid operand
@@ -5661,6 +6166,39 @@
sp 0(-), 0(1)
#CHECK: error: invalid operand
+#CHECK: spka -1
+#CHECK: error: invalid operand
+#CHECK: spka 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: spka 0(%r1,%r2)
+
+ spka -1
+ spka 4096
+ spka 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: spt -1
+#CHECK: error: invalid operand
+#CHECK: spt 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: spt 0(%r1,%r2)
+
+ spt -1
+ spt 4096
+ spt 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: spx -1
+#CHECK: error: invalid operand
+#CHECK: spx 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: spx 0(%r1,%r2)
+
+ spx -1
+ spx 4096
+ spx 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: sqd %f0, -1
#CHECK: error: invalid operand
#CHECK: sqd %f0, 4096
@@ -5913,6 +6451,36 @@
srxt %f2, %f0, 0
#CHECK: error: invalid operand
+#CHECK: ssch -1
+#CHECK: error: invalid operand
+#CHECK: ssch 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: ssch 0(%r1,%r2)
+
+ ssch -1
+ ssch 4096
+ ssch 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: sske %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: sske %r0, %r0, 16
+
+ sske %r0, %r0, -1
+ sske %r0, %r0, 16
+
+#CHECK: error: invalid operand
+#CHECK: ssm -1
+#CHECK: error: invalid operand
+#CHECK: ssm 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: ssm 0(%r1,%r2)
+
+ ssm -1
+ ssm 4096
+ ssm 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: st %r0, -1
#CHECK: error: invalid operand
#CHECK: st %r0, 4096
@@ -5940,6 +6508,17 @@
stamy %a0, %a0, 0(%r1,%r2)
#CHECK: error: invalid operand
+#CHECK: stap -1
+#CHECK: error: invalid operand
+#CHECK: stap 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stap 0(%r1,%r2)
+
+ stap -1
+ stap 4096
+ stap 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: stc %r0, -1
#CHECK: error: invalid operand
#CHECK: stc %r0, 4096
@@ -5953,6 +6532,50 @@
stch %r0, 0
#CHECK: error: invalid operand
+#CHECK: stck -1
+#CHECK: error: invalid operand
+#CHECK: stck 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stck 0(%r1,%r2)
+
+ stck -1
+ stck 4096
+ stck 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: stckc -1
+#CHECK: error: invalid operand
+#CHECK: stckc 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stckc 0(%r1,%r2)
+
+ stckc -1
+ stckc 4096
+ stckc 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: stcke -1
+#CHECK: error: invalid operand
+#CHECK: stcke 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stcke 0(%r1,%r2)
+
+ stcke -1
+ stcke 4096
+ stcke 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: stckf -1
+#CHECK: error: invalid operand
+#CHECK: stckf 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stckf 0(%r1,%r2)
+
+ stckf -1
+ stckf 4096
+ stckf 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: stcm %r0, 0, -1
#CHECK: error: invalid operand
#CHECK: stcm %r0, 0, 4096
@@ -5995,6 +6618,50 @@
stcmy %r0, 16, 0
#CHECK: error: invalid operand
+#CHECK: stcps -1
+#CHECK: error: invalid operand
+#CHECK: stcps 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stcps 0(%r1,%r2)
+
+ stcps -1
+ stcps 4096
+ stcps 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: stcrw -1
+#CHECK: error: invalid operand
+#CHECK: stcrw 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stcrw 0(%r1,%r2)
+
+ stcrw -1
+ stcrw 4096
+ stcrw 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: stctg %c0, %c0, -524289
+#CHECK: error: invalid operand
+#CHECK: stctg %c0, %c0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stctg %c0, %c0, 0(%r1,%r2)
+
+ stctg %c0, %c0, -524289
+ stctg %c0, %c0, 524288
+ stctg %c0, %c0, 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: stctl %c0, %c0, -1
+#CHECK: error: invalid operand
+#CHECK: stctl %c0, %c0, 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stctl %c0, %c0, 0(%r1,%r2)
+
+ stctl %c0, %c0, -1
+ stctl %c0, %c0, 4096
+ stctl %c0, %c0, 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: stcy %r0, -524289
#CHECK: error: invalid operand
#CHECK: stcy %r0, 524288
@@ -6040,6 +6707,28 @@
stfh %r0, 0
#CHECK: error: invalid operand
+#CHECK: stfl -1
+#CHECK: error: invalid operand
+#CHECK: stfl 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stfl 0(%r1,%r2)
+
+ stfl -1
+ stfl 4096
+ stfl 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: stfle -1
+#CHECK: error: invalid operand
+#CHECK: stfle 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stfle 0(%r1,%r2)
+
+ stfle -1
+ stfle 4096
+ stfle 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: stfpc -1
#CHECK: error: invalid operand
#CHECK: stfpc 4096
@@ -6108,6 +6797,17 @@
sthy %r0, 524288
#CHECK: error: invalid operand
+#CHECK: stidp -1
+#CHECK: error: invalid operand
+#CHECK: stidp 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stidp 0(%r1,%r2)
+
+ stidp -1
+ stidp 4096
+ stidp 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: stm %r0, %r0, 4096
#CHECK: error: invalid use of indexed addressing
#CHECK: stm %r0, %r0, 0(%r1,%r2)
@@ -6148,6 +6848,51 @@
stmy %r0, %r0, 524288
stmy %r0, %r0, 0(%r1,%r2)
+#CHECK: error: invalid operand
+#CHECK: stnsm -1, 0
+#CHECK: error: invalid operand
+#CHECK: stnsm 4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stnsm 0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: stnsm 0, -1
+#CHECK: error: invalid operand
+#CHECK: stnsm 0, 256
+
+ stnsm -1, 0
+ stnsm 4096, 0
+ stnsm 0(%r1,%r2), 0
+ stnsm 0, -1
+ stnsm 0, 256
+
+#CHECK: error: invalid operand
+#CHECK: stosm -1, 0
+#CHECK: error: invalid operand
+#CHECK: stosm 4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stosm 0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: stosm 0, -1
+#CHECK: error: invalid operand
+#CHECK: stosm 0, 256
+
+ stosm -1, 0
+ stosm 4096, 0
+ stosm 0(%r1,%r2), 0
+ stosm 0, -1
+ stosm 0, 256
+
+#CHECK: error: invalid operand
+#CHECK: stpt -1
+#CHECK: error: invalid operand
+#CHECK: stpt 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stpt 0(%r1,%r2)
+
+ stpt -1
+ stpt 4096
+ stpt 0(%r1,%r2)
+
#CHECK: error: invalid register pair
#CHECK: stpq %r1, 0
#CHECK: error: invalid operand
@@ -6159,6 +6904,17 @@
stpq %r0, -524289
stpq %r0, 524288
+#CHECK: error: invalid operand
+#CHECK: stpx -1
+#CHECK: error: invalid operand
+#CHECK: stpx 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stpx 0(%r1,%r2)
+
+ stpx -1
+ stpx 4096
+ stpx 0(%r1,%r2)
+
#CHECK: error: invalid use of indexed addressing
#CHECK: strag 160(%r1,%r15),160(%r15)
#CHECK: error: invalid operand
@@ -6207,6 +6963,28 @@
strvg %r0, 524288
#CHECK: error: invalid operand
+#CHECK: stsch -1
+#CHECK: error: invalid operand
+#CHECK: stsch 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stsch 0(%r1,%r2)
+
+ stsch -1
+ stsch 4096
+ stsch 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: stsi -1
+#CHECK: error: invalid operand
+#CHECK: stsi 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stsi 0(%r1,%r2)
+
+ stsi -1
+ stsi 4096
+ stsi 0(%r1,%r2)
+
+#CHECK: error: invalid operand
#CHECK: sty %r0, -524289
#CHECK: error: invalid operand
#CHECK: sty %r0, 524288
@@ -6475,6 +7253,34 @@
tp 0(%r1,%r2)
tp 0(-)
+#CHECK: error: invalid operand
+#CHECK: tpi -1
+#CHECK: error: invalid operand
+#CHECK: tpi 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tpi 0(%r1,%r2)
+
+ tpi -1
+ tpi 4096
+ tpi 0(%r1,%r2)
+
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tprot 160(%r1,%r15),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: tprot -1(%r1),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: tprot 4096(%r1),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: tprot 0(%r1),-1(%r15)
+#CHECK: error: invalid operand
+#CHECK: tprot 0(%r1),4096(%r15)
+
+ tprot 160(%r1,%r15),160(%r15)
+ tprot -1(%r1),160(%r15)
+ tprot 4096(%r1),160(%r15)
+ tprot 0(%r1),-1(%r15)
+ tprot 0(%r1),4096(%r15)
+
#CHECK: error: missing length in address
#CHECK: tr 0, 0
#CHECK: error: missing length in address
@@ -6519,6 +7325,39 @@
tr 0(1,%r2), 0(%r1,%r2)
tr 0(-), 0
+#CHECK: error: invalid operand
+#CHECK: trace %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: trace %r0, %r0, 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: trace %r0, %r0, 0(%r1,%r2)
+
+ trace %r0, %r0, -1
+ trace %r0, %r0, 4096
+ trace %r0, %r0, 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: tracg %r0, %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: tracg %r0, %r0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tracg %r0, %r0, 0(%r1,%r2)
+
+ tracg %r0, %r0, -524289
+ tracg %r0, %r0, 524288
+ tracg %r0, %r0, 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: trap4 -1
+#CHECK: error: invalid operand
+#CHECK: trap4 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: trap4 0(%r1,%r2)
+
+ trap4 -1
+ trap4 4096
+ trap4 0(%r1,%r2)
+
#CHECK: error: invalid register pair
#CHECK: tre %r1, %r0
@@ -6689,6 +7528,17 @@
ts 4096
ts 0(%r1,%r2)
+#CHECK: error: invalid operand
+#CHECK: tsch -1
+#CHECK: error: invalid operand
+#CHECK: tsch 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tsch 0(%r1,%r2)
+
+ tsch -1
+ tsch 4096
+ tsch 0(%r1,%r2)
+
#CHECK: error: missing length in address
#CHECK: unpk 0, 0(1)
#CHECK: error: missing length in address
diff --git a/test/MC/SystemZ/insn-good-z196.s b/test/MC/SystemZ/insn-good-z196.s
index 31d257d7448d..a3a662857048 100644
--- a/test/MC/SystemZ/insn-good-z196.s
+++ b/test/MC/SystemZ/insn-good-z196.s
@@ -46,6 +46,30 @@
agrk %r15,%r0,%r0
agrk %r7,%r8,%r9
+#CHECK: ahhhr %r0, %r0, %r0 # encoding: [0xb9,0xc8,0x00,0x00]
+#CHECK: ahhhr %r0, %r0, %r15 # encoding: [0xb9,0xc8,0xf0,0x00]
+#CHECK: ahhhr %r0, %r15, %r0 # encoding: [0xb9,0xc8,0x00,0x0f]
+#CHECK: ahhhr %r15, %r0, %r0 # encoding: [0xb9,0xc8,0x00,0xf0]
+#CHECK: ahhhr %r7, %r8, %r9 # encoding: [0xb9,0xc8,0x90,0x78]
+
+ ahhhr %r0, %r0, %r0
+ ahhhr %r0, %r0, %r15
+ ahhhr %r0, %r15, %r0
+ ahhhr %r15, %r0, %r0
+ ahhhr %r7, %r8, %r9
+
+#CHECK: ahhlr %r0, %r0, %r0 # encoding: [0xb9,0xd8,0x00,0x00]
+#CHECK: ahhlr %r0, %r0, %r15 # encoding: [0xb9,0xd8,0xf0,0x00]
+#CHECK: ahhlr %r0, %r15, %r0 # encoding: [0xb9,0xd8,0x00,0x0f]
+#CHECK: ahhlr %r15, %r0, %r0 # encoding: [0xb9,0xd8,0x00,0xf0]
+#CHECK: ahhlr %r7, %r8, %r9 # encoding: [0xb9,0xd8,0x90,0x78]
+
+ ahhlr %r0, %r0, %r0
+ ahhlr %r0, %r0, %r15
+ ahhlr %r0, %r15, %r0
+ ahhlr %r15, %r0, %r0
+ ahhlr %r7, %r8, %r9
+
#CHECK: ahik %r0, %r0, -32768 # encoding: [0xec,0x00,0x80,0x00,0x00,0xd8]
#CHECK: ahik %r0, %r0, -1 # encoding: [0xec,0x00,0xff,0xff,0x00,0xd8]
#CHECK: ahik %r0, %r0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0xd8]
@@ -108,6 +132,30 @@
algrk %r15,%r0,%r0
algrk %r7,%r8,%r9
+#CHECK: alhhhr %r0, %r0, %r0 # encoding: [0xb9,0xca,0x00,0x00]
+#CHECK: alhhhr %r0, %r0, %r15 # encoding: [0xb9,0xca,0xf0,0x00]
+#CHECK: alhhhr %r0, %r15, %r0 # encoding: [0xb9,0xca,0x00,0x0f]
+#CHECK: alhhhr %r15, %r0, %r0 # encoding: [0xb9,0xca,0x00,0xf0]
+#CHECK: alhhhr %r7, %r8, %r9 # encoding: [0xb9,0xca,0x90,0x78]
+
+ alhhhr %r0, %r0, %r0
+ alhhhr %r0, %r0, %r15
+ alhhhr %r0, %r15, %r0
+ alhhhr %r15, %r0, %r0
+ alhhhr %r7, %r8, %r9
+
+#CHECK: alhhlr %r0, %r0, %r0 # encoding: [0xb9,0xda,0x00,0x00]
+#CHECK: alhhlr %r0, %r0, %r15 # encoding: [0xb9,0xda,0xf0,0x00]
+#CHECK: alhhlr %r0, %r15, %r0 # encoding: [0xb9,0xda,0x00,0x0f]
+#CHECK: alhhlr %r15, %r0, %r0 # encoding: [0xb9,0xda,0x00,0xf0]
+#CHECK: alhhlr %r7, %r8, %r9 # encoding: [0xb9,0xda,0x90,0x78]
+
+ alhhlr %r0, %r0, %r0
+ alhhlr %r0, %r0, %r15
+ alhhlr %r0, %r15, %r0
+ alhhlr %r15, %r0, %r0
+ alhhlr %r7, %r8, %r9
+
#CHECK: alhsik %r0, %r0, -32768 # encoding: [0xec,0x00,0x80,0x00,0x00,0xda]
#CHECK: alhsik %r0, %r0, -1 # encoding: [0xec,0x00,0xff,0xff,0x00,0xda]
#CHECK: alhsik %r0, %r0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0xda]
@@ -138,6 +186,34 @@
alrk %r15,%r0,%r0
alrk %r7,%r8,%r9
+#CHECK: alsih %r0, -2147483648 # encoding: [0xcc,0x0a,0x80,0x00,0x00,0x00]
+#CHECK: alsih %r0, -1 # encoding: [0xcc,0x0a,0xff,0xff,0xff,0xff]
+#CHECK: alsih %r0, 0 # encoding: [0xcc,0x0a,0x00,0x00,0x00,0x00]
+#CHECK: alsih %r0, 1 # encoding: [0xcc,0x0a,0x00,0x00,0x00,0x01]
+#CHECK: alsih %r0, 2147483647 # encoding: [0xcc,0x0a,0x7f,0xff,0xff,0xff]
+#CHECK: alsih %r15, 0 # encoding: [0xcc,0xfa,0x00,0x00,0x00,0x00]
+
+ alsih %r0, -1 << 31
+ alsih %r0, -1
+ alsih %r0, 0
+ alsih %r0, 1
+ alsih %r0, (1 << 31) - 1
+ alsih %r15, 0
+
+#CHECK: alsihn %r0, -2147483648 # encoding: [0xcc,0x0b,0x80,0x00,0x00,0x00]
+#CHECK: alsihn %r0, -1 # encoding: [0xcc,0x0b,0xff,0xff,0xff,0xff]
+#CHECK: alsihn %r0, 0 # encoding: [0xcc,0x0b,0x00,0x00,0x00,0x00]
+#CHECK: alsihn %r0, 1 # encoding: [0xcc,0x0b,0x00,0x00,0x00,0x01]
+#CHECK: alsihn %r0, 2147483647 # encoding: [0xcc,0x0b,0x7f,0xff,0xff,0xff]
+#CHECK: alsihn %r15, 0 # encoding: [0xcc,0xfb,0x00,0x00,0x00,0x00]
+
+ alsihn %r0, -1 << 31
+ alsihn %r0, -1
+ alsihn %r0, 0
+ alsihn %r0, 1
+ alsihn %r0, (1 << 31) - 1
+ alsihn %r15, 0
+
#CHECK: ark %r0, %r0, %r0 # encoding: [0xb9,0xf8,0x00,0x00]
#CHECK: ark %r0, %r0, %r15 # encoding: [0xb9,0xf8,0xf0,0x00]
#CHECK: ark %r0, %r15, %r0 # encoding: [0xb9,0xf8,0x00,0x0f]
@@ -531,6 +607,26 @@
chf %r0, 524287(%r15,%r1)
chf %r15, 0
+#CHECK: chhr %r0, %r0 # encoding: [0xb9,0xcd,0x00,0x00]
+#CHECK: chhr %r0, %r15 # encoding: [0xb9,0xcd,0x00,0x0f]
+#CHECK: chhr %r15, %r0 # encoding: [0xb9,0xcd,0x00,0xf0]
+#CHECK: chhr %r7, %r8 # encoding: [0xb9,0xcd,0x00,0x78]
+
+ chhr %r0,%r0
+ chhr %r0,%r15
+ chhr %r15,%r0
+ chhr %r7,%r8
+
+#CHECK: chlr %r0, %r0 # encoding: [0xb9,0xdd,0x00,0x00]
+#CHECK: chlr %r0, %r15 # encoding: [0xb9,0xdd,0x00,0x0f]
+#CHECK: chlr %r15, %r0 # encoding: [0xb9,0xdd,0x00,0xf0]
+#CHECK: chlr %r7, %r8 # encoding: [0xb9,0xdd,0x00,0x78]
+
+ chlr %r0,%r0
+ chlr %r0,%r15
+ chlr %r15,%r0
+ chlr %r7,%r8
+
#CHECK: cih %r0, -2147483648 # encoding: [0xcc,0x0d,0x80,0x00,0x00,0x00]
#CHECK: cih %r0, -1 # encoding: [0xcc,0x0d,0xff,0xff,0xff,0xff]
#CHECK: cih %r0, 0 # encoding: [0xcc,0x0d,0x00,0x00,0x00,0x00]
@@ -707,6 +803,26 @@
clhf %r0, 524287(%r15,%r1)
clhf %r15, 0
+#CHECK: clhhr %r0, %r0 # encoding: [0xb9,0xcf,0x00,0x00]
+#CHECK: clhhr %r0, %r15 # encoding: [0xb9,0xcf,0x00,0x0f]
+#CHECK: clhhr %r15, %r0 # encoding: [0xb9,0xcf,0x00,0xf0]
+#CHECK: clhhr %r7, %r8 # encoding: [0xb9,0xcf,0x00,0x78]
+
+ clhhr %r0,%r0
+ clhhr %r0,%r15
+ clhhr %r15,%r0
+ clhhr %r7,%r8
+
+#CHECK: clhlr %r0, %r0 # encoding: [0xb9,0xdf,0x00,0x00]
+#CHECK: clhlr %r0, %r15 # encoding: [0xb9,0xdf,0x00,0x0f]
+#CHECK: clhlr %r15, %r0 # encoding: [0xb9,0xdf,0x00,0xf0]
+#CHECK: clhlr %r7, %r8 # encoding: [0xb9,0xdf,0x00,0x78]
+
+ clhlr %r0,%r0
+ clhlr %r0,%r15
+ clhlr %r15,%r0
+ clhlr %r7,%r8
+
#CHECK: clih %r0, 0 # encoding: [0xcc,0x0f,0x00,0x00,0x00,0x00]
#CHECK: clih %r0, 1 # encoding: [0xcc,0x0f,0x00,0x00,0x00,0x01]
#CHECK: clih %r0, 4294967295 # encoding: [0xcc,0x0f,0xff,0xff,0xff,0xff]
@@ -1645,6 +1761,10 @@
pcc
+#CHECK: pckmo # encoding: [0xb9,0x28,0x00,0x00]
+
+ pckmo
+
#CHECK: popcnt %r0, %r0 # encoding: [0xb9,0xe1,0x00,0x00]
#CHECK: popcnt %r0, %r15 # encoding: [0xb9,0xe1,0x00,0x0f]
#CHECK: popcnt %r15, %r0 # encoding: [0xb9,0xe1,0x00,0xf0]
@@ -1687,6 +1807,18 @@
risblg %r15,%r0,0,0,0
risblg %r4,%r5,6,7,8
+#CHECK: rrbm %r0, %r0 # encoding: [0xb9,0xae,0x00,0x00]
+#CHECK: rrbm %r0, %r15 # encoding: [0xb9,0xae,0x00,0x0f]
+#CHECK: rrbm %r15, %r0 # encoding: [0xb9,0xae,0x00,0xf0]
+#CHECK: rrbm %r7, %r8 # encoding: [0xb9,0xae,0x00,0x78]
+#CHECK: rrbm %r15, %r15 # encoding: [0xb9,0xae,0x00,0xff]
+
+ rrbm %r0,%r0
+ rrbm %r0,%r15
+ rrbm %r15,%r0
+ rrbm %r7,%r8
+ rrbm %r15,%r15
+
#CHECK: sdtra %f0, %f0, %f0, 0 # encoding: [0xb3,0xd3,0x00,0x00]
#CHECK: sdtra %f0, %f0, %f0, 15 # encoding: [0xb3,0xd3,0x0f,0x00]
#CHECK: sdtra %f0, %f0, %f15, 0 # encoding: [0xb3,0xd3,0xf0,0x00]
@@ -1713,6 +1845,30 @@
sgrk %r15,%r0,%r0
sgrk %r7,%r8,%r9
+#CHECK: shhhr %r0, %r0, %r0 # encoding: [0xb9,0xc9,0x00,0x00]
+#CHECK: shhhr %r0, %r0, %r15 # encoding: [0xb9,0xc9,0xf0,0x00]
+#CHECK: shhhr %r0, %r15, %r0 # encoding: [0xb9,0xc9,0x00,0x0f]
+#CHECK: shhhr %r15, %r0, %r0 # encoding: [0xb9,0xc9,0x00,0xf0]
+#CHECK: shhhr %r7, %r8, %r9 # encoding: [0xb9,0xc9,0x90,0x78]
+
+ shhhr %r0, %r0, %r0
+ shhhr %r0, %r0, %r15
+ shhhr %r0, %r15, %r0
+ shhhr %r15, %r0, %r0
+ shhhr %r7, %r8, %r9
+
+#CHECK: shhlr %r0, %r0, %r0 # encoding: [0xb9,0xd9,0x00,0x00]
+#CHECK: shhlr %r0, %r0, %r15 # encoding: [0xb9,0xd9,0xf0,0x00]
+#CHECK: shhlr %r0, %r15, %r0 # encoding: [0xb9,0xd9,0x00,0x0f]
+#CHECK: shhlr %r15, %r0, %r0 # encoding: [0xb9,0xd9,0x00,0xf0]
+#CHECK: shhlr %r7, %r8, %r9 # encoding: [0xb9,0xd9,0x90,0x78]
+
+ shhlr %r0, %r0, %r0
+ shhlr %r0, %r0, %r15
+ shhlr %r0, %r15, %r0
+ shhlr %r15, %r0, %r0
+ shhlr %r7, %r8, %r9
+
#CHECK: slak %r0, %r0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0xdd]
#CHECK: slak %r15, %r1, 0 # encoding: [0xeb,0xf1,0x00,0x00,0x00,0xdd]
#CHECK: slak %r1, %r15, 0 # encoding: [0xeb,0x1f,0x00,0x00,0x00,0xdd]
@@ -1751,6 +1907,30 @@
slgrk %r15,%r0,%r0
slgrk %r7,%r8,%r9
+#CHECK: slhhhr %r0, %r0, %r0 # encoding: [0xb9,0xcb,0x00,0x00]
+#CHECK: slhhhr %r0, %r0, %r15 # encoding: [0xb9,0xcb,0xf0,0x00]
+#CHECK: slhhhr %r0, %r15, %r0 # encoding: [0xb9,0xcb,0x00,0x0f]
+#CHECK: slhhhr %r15, %r0, %r0 # encoding: [0xb9,0xcb,0x00,0xf0]
+#CHECK: slhhhr %r7, %r8, %r9 # encoding: [0xb9,0xcb,0x90,0x78]
+
+ slhhhr %r0, %r0, %r0
+ slhhhr %r0, %r0, %r15
+ slhhhr %r0, %r15, %r0
+ slhhhr %r15, %r0, %r0
+ slhhhr %r7, %r8, %r9
+
+#CHECK: slhhlr %r0, %r0, %r0 # encoding: [0xb9,0xdb,0x00,0x00]
+#CHECK: slhhlr %r0, %r0, %r15 # encoding: [0xb9,0xdb,0xf0,0x00]
+#CHECK: slhhlr %r0, %r15, %r0 # encoding: [0xb9,0xdb,0x00,0x0f]
+#CHECK: slhhlr %r15, %r0, %r0 # encoding: [0xb9,0xdb,0x00,0xf0]
+#CHECK: slhhlr %r7, %r8, %r9 # encoding: [0xb9,0xdb,0x90,0x78]
+
+ slhhlr %r0, %r0, %r0
+ slhhlr %r0, %r0, %r15
+ slhhlr %r0, %r15, %r0
+ slhhlr %r15, %r0, %r0
+ slhhlr %r7, %r8, %r9
+
#CHECK: sllk %r0, %r0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0xdf]
#CHECK: sllk %r15, %r1, 0 # encoding: [0xeb,0xf1,0x00,0x00,0x00,0xdf]
#CHECK: sllk %r1, %r15, 0 # encoding: [0xeb,0x1f,0x00,0x00,0x00,0xdf]
diff --git a/test/MC/SystemZ/insn-good-zEC12.s b/test/MC/SystemZ/insn-good-zEC12.s
index 2fe6c46ad908..1f1bfb883bdd 100644
--- a/test/MC/SystemZ/insn-good-zEC12.s
+++ b/test/MC/SystemZ/insn-good-zEC12.s
@@ -198,6 +198,20 @@
clgtnl %r0, 0(%r15)
clgtnh %r0, 0(%r15)
+#CHECK: crdte %r0, %r0, %r0 # encoding: [0xb9,0x8f,0x00,0x00]
+#CHECK: crdte %r0, %r0, %r14 # encoding: [0xb9,0x8f,0x00,0x0e]
+#CHECK: crdte %r0, %r15, %r0 # encoding: [0xb9,0x8f,0xf0,0x00]
+#CHECK: crdte %r14, %r0, %r0 # encoding: [0xb9,0x8f,0x00,0xe0]
+#CHECK: crdte %r0, %r0, %r0, 15 # encoding: [0xb9,0x8f,0x0f,0x00]
+#CHECK: crdte %r4, %r5, %r6, 7 # encoding: [0xb9,0x8f,0x57,0x46]
+
+ crdte %r0, %r0, %r0
+ crdte %r0, %r0, %r14
+ crdte %r0, %r15, %r0
+ crdte %r14, %r0, %r0
+ crdte %r0, %r0, %r0, 15
+ crdte %r4, %r5, %r6, 7
+
#CHECK: cxzt %f0, 0(1), 0 # encoding: [0xed,0x00,0x00,0x00,0x00,0xab]
#CHECK: cxzt %f13, 0(1), 0 # encoding: [0xed,0x00,0x00,0x00,0xd0,0xab]
#CHECK: cxzt %f0, 0(1), 15 # encoding: [0xed,0x00,0x00,0x00,0x0f,0xab]
diff --git a/test/MC/SystemZ/insn-good.s b/test/MC/SystemZ/insn-good.s
index 73162e4eea71..5b93ef917fd3 100644
--- a/test/MC/SystemZ/insn-good.s
+++ b/test/MC/SystemZ/insn-good.s
@@ -741,6 +741,16 @@
ay %r0, 524287(%r15,%r1)
ay %r15, 0
+#CHECK: bakr %r0, %r0 # encoding: [0xb2,0x40,0x00,0x00]
+#CHECK: bakr %r0, %r15 # encoding: [0xb2,0x40,0x00,0x0f]
+#CHECK: bakr %r15, %r0 # encoding: [0xb2,0x40,0x00,0xf0]
+#CHECK: bakr %r7, %r8 # encoding: [0xb2,0x40,0x00,0x78]
+
+ bakr %r0,%r0
+ bakr %r0,%r15
+ bakr %r15,%r0
+ bakr %r7,%r8
+
#CHECK: bal %r0, 0 # encoding: [0x45,0x00,0x00,0x00]
#CHECK: bal %r1, 4095 # encoding: [0x45,0x10,0x0f,0xff]
#CHECK: bal %r2, 0(%r1) # encoding: [0x45,0x20,0x10,0x00]
@@ -799,6 +809,26 @@
bassm %r14,%r9
bassm %r15,%r1
+#CHECK: bsa %r0, %r0 # encoding: [0xb2,0x5a,0x00,0x00]
+#CHECK: bsa %r0, %r15 # encoding: [0xb2,0x5a,0x00,0x0f]
+#CHECK: bsa %r15, %r0 # encoding: [0xb2,0x5a,0x00,0xf0]
+#CHECK: bsa %r7, %r8 # encoding: [0xb2,0x5a,0x00,0x78]
+
+ bsa %r0,%r0
+ bsa %r0,%r15
+ bsa %r15,%r0
+ bsa %r7,%r8
+
+#CHECK: bsg %r0, %r0 # encoding: [0xb2,0x58,0x00,0x00]
+#CHECK: bsg %r0, %r15 # encoding: [0xb2,0x58,0x00,0x0f]
+#CHECK: bsg %r15, %r0 # encoding: [0xb2,0x58,0x00,0xf0]
+#CHECK: bsg %r7, %r8 # encoding: [0xb2,0x58,0x00,0x78]
+
+ bsg %r0,%r0
+ bsg %r0,%r15
+ bsg %r15,%r0
+ bsg %r7,%r8
+
#CHECK: bsm %r0, %r1 # encoding: [0x0b,0x01]
#CHECK: bsm %r0, %r15 # encoding: [0x0b,0x0f]
#CHECK: bsm %r14, %r9 # encoding: [0x0b,0xe9]
@@ -6257,6 +6287,10 @@
cs %r0, %r15, 0
cs %r15, %r0, 0
+#CHECK: csch # encoding: [0xb2,0x30,0x00,0x00]
+
+ csch
+
#CHECK: csdtr %r0, %f0, 0 # encoding: [0xb3,0xe3,0x00,0x00]
#CHECK: csdtr %r0, %f15, 0 # encoding: [0xb3,0xe3,0x00,0x0f]
#CHECK: csdtr %r0, %f0, 15 # encoding: [0xb3,0xe3,0x0f,0x00]
@@ -6293,6 +6327,26 @@
csg %r0, %r15, 0
csg %r15, %r0, 0
+#CHECK: csp %r0, %r0 # encoding: [0xb2,0x50,0x00,0x00]
+#CHECK: csp %r0, %r15 # encoding: [0xb2,0x50,0x00,0x0f]
+#CHECK: csp %r14, %r0 # encoding: [0xb2,0x50,0x00,0xe0]
+#CHECK: csp %r6, %r8 # encoding: [0xb2,0x50,0x00,0x68]
+
+ csp %r0,%r0
+ csp %r0,%r15
+ csp %r14,%r0
+ csp %r6,%r8
+
+#CHECK: cspg %r0, %r0 # encoding: [0xb9,0x8a,0x00,0x00]
+#CHECK: cspg %r0, %r15 # encoding: [0xb9,0x8a,0x00,0x0f]
+#CHECK: cspg %r14, %r0 # encoding: [0xb9,0x8a,0x00,0xe0]
+#CHECK: cspg %r6, %r8 # encoding: [0xb9,0x8a,0x00,0x68]
+
+ cspg %r0,%r0
+ cspg %r0,%r15
+ cspg %r14,%r0
+ cspg %r6,%r8
+
#CHECK: csst 0, 0, %r0 # encoding: [0xc8,0x02,0x00,0x00,0x00,0x00]
#CHECK: csst 0(%r1), 0(%r15), %r2 # encoding: [0xc8,0x22,0x10,0x00,0xf0,0x00]
#CHECK: csst 1(%r1), 0(%r15), %r2 # encoding: [0xc8,0x22,0x10,0x01,0xf0,0x00]
@@ -6869,6 +6923,28 @@
der %f7, %f8
der %f15, %f0
+#CHECK: diag %r0, %r0, 0 # encoding: [0x83,0x00,0x00,0x00]
+#CHECK: diag %r0, %r15, 0 # encoding: [0x83,0x0f,0x00,0x00]
+#CHECK: diag %r14, %r15, 0 # encoding: [0x83,0xef,0x00,0x00]
+#CHECK: diag %r15, %r15, 0 # encoding: [0x83,0xff,0x00,0x00]
+#CHECK: diag %r0, %r0, 4095 # encoding: [0x83,0x00,0x0f,0xff]
+#CHECK: diag %r0, %r0, 1 # encoding: [0x83,0x00,0x00,0x01]
+#CHECK: diag %r0, %r0, 0(%r1) # encoding: [0x83,0x00,0x10,0x00]
+#CHECK: diag %r0, %r0, 0(%r15) # encoding: [0x83,0x00,0xf0,0x00]
+#CHECK: diag %r0, %r0, 4095(%r1) # encoding: [0x83,0x00,0x1f,0xff]
+#CHECK: diag %r0, %r0, 4095(%r15) # encoding: [0x83,0x00,0xff,0xff]
+
+ diag %r0,%r0,0
+ diag %r0,%r15,0
+ diag %r14,%r15,0
+ diag %r15,%r15,0
+ diag %r0,%r0,4095
+ diag %r0,%r0,1
+ diag %r0,%r0,0(%r1)
+ diag %r0,%r0,0(%r15)
+ diag %r0,%r0,4095(%r1)
+ diag %r0,%r0,4095(%r15)
+
#CHECK: didbr %f0, %f0, %f0, 0 # encoding: [0xb3,0x5b,0x00,0x00]
#CHECK: didbr %f0, %f0, %f0, 15 # encoding: [0xb3,0x5b,0x0f,0x00]
#CHECK: didbr %f0, %f0, %f15, 0 # encoding: [0xb3,0x5b,0x00,0x0f]
@@ -7137,6 +7213,26 @@
ecag %r0,%r0,524287(%r1)
ecag %r0,%r0,524287(%r15)
+#CHECK: ecctr %r0, %r0 # encoding: [0xb2,0xe4,0x00,0x00]
+#CHECK: ecctr %r0, %r15 # encoding: [0xb2,0xe4,0x00,0x0f]
+#CHECK: ecctr %r15, %r0 # encoding: [0xb2,0xe4,0x00,0xf0]
+#CHECK: ecctr %r7, %r8 # encoding: [0xb2,0xe4,0x00,0x78]
+
+ ecctr %r0,%r0
+ ecctr %r0,%r15
+ ecctr %r15,%r0
+ ecctr %r7,%r8
+
+#CHECK: ecpga %r0, %r0 # encoding: [0xb2,0xed,0x00,0x00]
+#CHECK: ecpga %r0, %r15 # encoding: [0xb2,0xed,0x00,0x0f]
+#CHECK: ecpga %r15, %r0 # encoding: [0xb2,0xed,0x00,0xf0]
+#CHECK: ecpga %r7, %r8 # encoding: [0xb2,0xed,0x00,0x78]
+
+ ecpga %r0,%r0
+ ecpga %r0,%r15
+ ecpga %r15,%r0
+ ecpga %r7,%r8
+
#CHECK: ectg 0, 0, %r0 # encoding: [0xc8,0x01,0x00,0x00,0x00,0x00]
#CHECK: ectg 0(%r1), 0(%r15), %r2 # encoding: [0xc8,0x21,0x10,0x00,0xf0,0x00]
#CHECK: ectg 1(%r1), 0(%r15), %r2 # encoding: [0xc8,0x21,0x10,0x01,0xf0,0x00]
@@ -7231,6 +7327,32 @@
efpc %r1
efpc %r15
+#CHECK: epar %r0 # encoding: [0xb2,0x26,0x00,0x00]
+#CHECK: epar %r1 # encoding: [0xb2,0x26,0x00,0x10]
+#CHECK: epar %r15 # encoding: [0xb2,0x26,0x00,0xf0]
+
+ epar %r0
+ epar %r1
+ epar %r15
+
+#CHECK: epair %r0 # encoding: [0xb9,0x9a,0x00,0x00]
+#CHECK: epair %r1 # encoding: [0xb9,0x9a,0x00,0x10]
+#CHECK: epair %r15 # encoding: [0xb9,0x9a,0x00,0xf0]
+
+ epair %r0
+ epair %r1
+ epair %r15
+
+#CHECK: epctr %r0, %r0 # encoding: [0xb2,0xe5,0x00,0x00]
+#CHECK: epctr %r0, %r15 # encoding: [0xb2,0xe5,0x00,0x0f]
+#CHECK: epctr %r15, %r0 # encoding: [0xb2,0xe5,0x00,0xf0]
+#CHECK: epctr %r7, %r8 # encoding: [0xb2,0xe5,0x00,0x78]
+
+ epctr %r0,%r0
+ epctr %r0,%r15
+ epctr %r15,%r0
+ epctr %r7,%r8
+
#CHECK: epsw %r0, %r8 # encoding: [0xb9,0x8d,0x00,0x08]
#CHECK: epsw %r0, %r15 # encoding: [0xb9,0x8d,0x00,0x0f]
#CHECK: epsw %r15, %r0 # encoding: [0xb9,0x8d,0x00,0xf0]
@@ -7241,6 +7363,42 @@
epsw %r15, %r0
epsw %r15, %r8
+#CHECK: ereg %r0, %r0 # encoding: [0xb2,0x49,0x00,0x00]
+#CHECK: ereg %r0, %r15 # encoding: [0xb2,0x49,0x00,0x0f]
+#CHECK: ereg %r15, %r0 # encoding: [0xb2,0x49,0x00,0xf0]
+#CHECK: ereg %r7, %r8 # encoding: [0xb2,0x49,0x00,0x78]
+
+ ereg %r0,%r0
+ ereg %r0,%r15
+ ereg %r15,%r0
+ ereg %r7,%r8
+
+#CHECK: eregg %r0, %r0 # encoding: [0xb9,0x0e,0x00,0x00]
+#CHECK: eregg %r0, %r15 # encoding: [0xb9,0x0e,0x00,0x0f]
+#CHECK: eregg %r15, %r0 # encoding: [0xb9,0x0e,0x00,0xf0]
+#CHECK: eregg %r7, %r8 # encoding: [0xb9,0x0e,0x00,0x78]
+
+ eregg %r0,%r0
+ eregg %r0,%r15
+ eregg %r15,%r0
+ eregg %r7,%r8
+
+#CHECK: esar %r0 # encoding: [0xb2,0x27,0x00,0x00]
+#CHECK: esar %r1 # encoding: [0xb2,0x27,0x00,0x10]
+#CHECK: esar %r15 # encoding: [0xb2,0x27,0x00,0xf0]
+
+ esar %r0
+ esar %r1
+ esar %r15
+
+#CHECK: esair %r0 # encoding: [0xb9,0x9b,0x00,0x00]
+#CHECK: esair %r1 # encoding: [0xb9,0x9b,0x00,0x10]
+#CHECK: esair %r15 # encoding: [0xb9,0x9b,0x00,0xf0]
+
+ esair %r0
+ esair %r1
+ esair %r15
+
#CHECK: esdtr %f0, %f9 # encoding: [0xb3,0xe7,0x00,0x09]
#CHECK: esdtr %f0, %f15 # encoding: [0xb3,0xe7,0x00,0x0f]
#CHECK: esdtr %f15, %f0 # encoding: [0xb3,0xe7,0x00,0xf0]
@@ -7251,6 +7409,24 @@
esdtr %f15,%f0
esdtr %f15,%f9
+#CHECK: esea %r0 # encoding: [0xb9,0x9d,0x00,0x00]
+#CHECK: esea %r1 # encoding: [0xb9,0x9d,0x00,0x10]
+#CHECK: esea %r15 # encoding: [0xb9,0x9d,0x00,0xf0]
+
+ esea %r0
+ esea %r1
+ esea %r15
+
+#CHECK: esta %r0, %r0 # encoding: [0xb2,0x4a,0x00,0x00]
+#CHECK: esta %r0, %r15 # encoding: [0xb2,0x4a,0x00,0x0f]
+#CHECK: esta %r14, %r0 # encoding: [0xb2,0x4a,0x00,0xe0]
+#CHECK: esta %r6, %r8 # encoding: [0xb2,0x4a,0x00,0x68]
+
+ esta %r0,%r0
+ esta %r0,%r15
+ esta %r14,%r0
+ esta %r6,%r8
+
#CHECK: esxtr %f0, %f8 # encoding: [0xb3,0xef,0x00,0x08]
#CHECK: esxtr %f0, %f13 # encoding: [0xb3,0xef,0x00,0x0d]
#CHECK: esxtr %f13, %f0 # encoding: [0xb3,0xef,0x00,0xd0]
@@ -7438,6 +7614,18 @@
her %f7, %f8
her %f15, %f0
+#CHECK: hsch # encoding: [0xb2,0x31,0x00,0x00]
+
+ hsch
+
+#CHECK: iac %r0 # encoding: [0xb2,0x24,0x00,0x00]
+#CHECK: iac %r1 # encoding: [0xb2,0x24,0x00,0x10]
+#CHECK: iac %r15 # encoding: [0xb2,0x24,0x00,0xf0]
+
+ iac %r0
+ iac %r1
+ iac %r15
+
#CHECK: ic %r0, 0 # encoding: [0x43,0x00,0x00,0x00]
#CHECK: ic %r0, 4095 # encoding: [0x43,0x00,0x0f,0xff]
#CHECK: ic %r0, 0(%r1) # encoding: [0x43,0x00,0x10,0x00]
@@ -7536,6 +7724,20 @@
icy %r0, 524287(%r15,%r1)
icy %r15, 0
+#CHECK: idte %r0, %r0, %r0 # encoding: [0xb9,0x8e,0x00,0x00]
+#CHECK: idte %r0, %r0, %r15 # encoding: [0xb9,0x8e,0x00,0x0f]
+#CHECK: idte %r0, %r15, %r0 # encoding: [0xb9,0x8e,0xf0,0x00]
+#CHECK: idte %r15, %r0, %r0 # encoding: [0xb9,0x8e,0x00,0xf0]
+#CHECK: idte %r0, %r0, %r0, 15 # encoding: [0xb9,0x8e,0x0f,0x00]
+#CHECK: idte %r4, %r5, %r6, 7 # encoding: [0xb9,0x8e,0x57,0x46]
+
+ idte %r0, %r0, %r0
+ idte %r0, %r0, %r15
+ idte %r0, %r15, %r0
+ idte %r15, %r0, %r0
+ idte %r0, %r0, %r0, 15
+ idte %r4, %r5, %r6, 7
+
#CHECK: iedtr %f0, %f0, %f0 # encoding: [0xb3,0xf6,0x00,0x00]
#CHECK: iedtr %f0, %f0, %f15 # encoding: [0xb3,0xf6,0x00,0x0f]
#CHECK: iedtr %f0, %f15, %f0 # encoding: [0xb3,0xf6,0xf0,0x00]
@@ -7620,6 +7822,10 @@
iill %r0, 0xffff
iill %r15, 0
+#CHECK: ipk # encoding: [0xb2,0x0b,0x00,0x00]
+
+ ipk
+
#CHECK: ipm %r0 # encoding: [0xb2,0x22,0x00,0x00]
#CHECK: ipm %r1 # encoding: [0xb2,0x22,0x00,0x10]
#CHECK: ipm %r15 # encoding: [0xb2,0x22,0x00,0xf0]
@@ -7628,6 +7834,40 @@
ipm %r1
ipm %r15
+#CHECK: ipte %r0, %r0 # encoding: [0xb2,0x21,0x00,0x00]
+#CHECK: ipte %r0, %r15 # encoding: [0xb2,0x21,0x00,0x0f]
+#CHECK: ipte %r15, %r0 # encoding: [0xb2,0x21,0x00,0xf0]
+#CHECK: ipte %r0, %r0, %r15 # encoding: [0xb2,0x21,0xf0,0x00]
+#CHECK: ipte %r0, %r0, %r0, 15 # encoding: [0xb2,0x21,0x0f,0x00]
+#CHECK: ipte %r7, %r8, %r9, 10 # encoding: [0xb2,0x21,0x9a,0x78]
+
+ ipte %r0, %r0
+ ipte %r0, %r15
+ ipte %r15, %r0
+ ipte %r0, %r0, %r15
+ ipte %r0, %r0, %r0, 15
+ ipte %r7, %r8, %r9, 10
+
+#CHECK: iske %r0, %r0 # encoding: [0xb2,0x29,0x00,0x00]
+#CHECK: iske %r0, %r15 # encoding: [0xb2,0x29,0x00,0x0f]
+#CHECK: iske %r15, %r0 # encoding: [0xb2,0x29,0x00,0xf0]
+#CHECK: iske %r7, %r8 # encoding: [0xb2,0x29,0x00,0x78]
+
+ iske %r0,%r0
+ iske %r0,%r15
+ iske %r15,%r0
+ iske %r7,%r8
+
+#CHECK: ivsk %r0, %r0 # encoding: [0xb2,0x23,0x00,0x00]
+#CHECK: ivsk %r0, %r15 # encoding: [0xb2,0x23,0x00,0x0f]
+#CHECK: ivsk %r15, %r0 # encoding: [0xb2,0x23,0x00,0xf0]
+#CHECK: ivsk %r7, %r8 # encoding: [0xb2,0x23,0x00,0x78]
+
+ ivsk %r0,%r0
+ ivsk %r0,%r15
+ ivsk %r15,%r0
+ ivsk %r7,%r8
+
#CHECK: kdb %f0, 0 # encoding: [0xed,0x00,0x00,0x00,0x00,0x18]
#CHECK: kdb %f0, 4095 # encoding: [0xed,0x00,0x0f,0xff,0x00,0x18]
#CHECK: kdb %f0, 0(%r1) # encoding: [0xed,0x00,0x10,0x00,0x00,0x18]
@@ -7917,6 +8157,28 @@
larl %r7,frob@PLT
larl %r8,frob@PLT
+#CHECK: lasp 0, 0 # encoding: [0xe5,0x00,0x00,0x00,0x00,0x00]
+#CHECK: lasp 0(%r1), 0(%r2) # encoding: [0xe5,0x00,0x10,0x00,0x20,0x00]
+#CHECK: lasp 160(%r1), 320(%r15) # encoding: [0xe5,0x00,0x10,0xa0,0xf1,0x40]
+#CHECK: lasp 0(%r1), 4095 # encoding: [0xe5,0x00,0x10,0x00,0x0f,0xff]
+#CHECK: lasp 0(%r1), 4095(%r2) # encoding: [0xe5,0x00,0x10,0x00,0x2f,0xff]
+#CHECK: lasp 0(%r1), 4095(%r15) # encoding: [0xe5,0x00,0x10,0x00,0xff,0xff]
+#CHECK: lasp 0(%r1), 0 # encoding: [0xe5,0x00,0x10,0x00,0x00,0x00]
+#CHECK: lasp 0(%r15), 0 # encoding: [0xe5,0x00,0xf0,0x00,0x00,0x00]
+#CHECK: lasp 4095(%r1), 0 # encoding: [0xe5,0x00,0x1f,0xff,0x00,0x00]
+#CHECK: lasp 4095(%r15), 0 # encoding: [0xe5,0x00,0xff,0xff,0x00,0x00]
+
+ lasp 0, 0
+ lasp 0(%r1), 0(%r2)
+ lasp 160(%r1), 320(%r15)
+ lasp 0(%r1), 4095
+ lasp 0(%r1), 4095(%r2)
+ lasp 0(%r1), 4095(%r15)
+ lasp 0(%r1), 0
+ lasp 0(%r15), 0
+ lasp 4095(%r1), 0
+ lasp 4095(%r15), 0
+
#CHECK: lay %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x71]
#CHECK: lay %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x71]
#CHECK: lay %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x71]
@@ -7969,6 +8231,20 @@
lbr %r7, %r8
lbr %r15, %r0
+#CHECK: lcctl 0 # encoding: [0xb2,0x84,0x00,0x00]
+#CHECK: lcctl 0(%r1) # encoding: [0xb2,0x84,0x10,0x00]
+#CHECK: lcctl 0(%r15) # encoding: [0xb2,0x84,0xf0,0x00]
+#CHECK: lcctl 4095 # encoding: [0xb2,0x84,0x0f,0xff]
+#CHECK: lcctl 4095(%r1) # encoding: [0xb2,0x84,0x1f,0xff]
+#CHECK: lcctl 4095(%r15) # encoding: [0xb2,0x84,0xff,0xff]
+
+ lcctl 0
+ lcctl 0(%r1)
+ lcctl 0(%r15)
+ lcctl 4095
+ lcctl 4095(%r1)
+ lcctl 4095(%r15)
+
#CHECK: lcdbr %f0, %f9 # encoding: [0xb3,0x13,0x00,0x09]
#CHECK: lcdbr %f0, %f15 # encoding: [0xb3,0x13,0x00,0x0f]
#CHECK: lcdbr %f15, %f0 # encoding: [0xb3,0x13,0x00,0xf0]
@@ -8039,6 +8315,56 @@
lcr %r15,%r0
lcr %r7,%r8
+#CHECK: lctl %c0, %c0, 0 # encoding: [0xb7,0x00,0x00,0x00]
+#CHECK: lctl %c0, %c15, 0 # encoding: [0xb7,0x0f,0x00,0x00]
+#CHECK: lctl %c14, %c15, 0 # encoding: [0xb7,0xef,0x00,0x00]
+#CHECK: lctl %c15, %c15, 0 # encoding: [0xb7,0xff,0x00,0x00]
+#CHECK: lctl %c0, %c0, 4095 # encoding: [0xb7,0x00,0x0f,0xff]
+#CHECK: lctl %c0, %c0, 1 # encoding: [0xb7,0x00,0x00,0x01]
+#CHECK: lctl %c0, %c0, 0(%r1) # encoding: [0xb7,0x00,0x10,0x00]
+#CHECK: lctl %c0, %c0, 0(%r15) # encoding: [0xb7,0x00,0xf0,0x00]
+#CHECK: lctl %c0, %c0, 4095(%r1) # encoding: [0xb7,0x00,0x1f,0xff]
+#CHECK: lctl %c0, %c0, 4095(%r15) # encoding: [0xb7,0x00,0xff,0xff]
+
+ lctl %c0,%c0,0
+ lctl %c0,%c15,0
+ lctl %c14,%c15,0
+ lctl %c15,%c15,0
+ lctl %c0,%c0,4095
+ lctl %c0,%c0,1
+ lctl %c0,%c0,0(%r1)
+ lctl %c0,%c0,0(%r15)
+ lctl %c0,%c0,4095(%r1)
+ lctl %c0,%c0,4095(%r15)
+
+#CHECK: lctlg %c0, %c0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0x2f]
+#CHECK: lctlg %c0, %c15, 0 # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x2f]
+#CHECK: lctlg %c14, %c15, 0 # encoding: [0xeb,0xef,0x00,0x00,0x00,0x2f]
+#CHECK: lctlg %c15, %c15, 0 # encoding: [0xeb,0xff,0x00,0x00,0x00,0x2f]
+#CHECK: lctlg %c0, %c0, -524288 # encoding: [0xeb,0x00,0x00,0x00,0x80,0x2f]
+#CHECK: lctlg %c0, %c0, -1 # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x2f]
+#CHECK: lctlg %c0, %c0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0x2f]
+#CHECK: lctlg %c0, %c0, 1 # encoding: [0xeb,0x00,0x00,0x01,0x00,0x2f]
+#CHECK: lctlg %c0, %c0, 524287 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x2f]
+#CHECK: lctlg %c0, %c0, 0(%r1) # encoding: [0xeb,0x00,0x10,0x00,0x00,0x2f]
+#CHECK: lctlg %c0, %c0, 0(%r15) # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x2f]
+#CHECK: lctlg %c0, %c0, 524287(%r1) # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x2f]
+#CHECK: lctlg %c0, %c0, 524287(%r15) # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x2f]
+
+ lctlg %c0,%c0,0
+ lctlg %c0,%c15,0
+ lctlg %c14,%c15,0
+ lctlg %c15,%c15,0
+ lctlg %c0,%c0,-524288
+ lctlg %c0,%c0,-1
+ lctlg %c0,%c0,0
+ lctlg %c0,%c0,1
+ lctlg %c0,%c0,524287
+ lctlg %c0,%c0,0(%r1)
+ lctlg %c0,%c0,0(%r15)
+ lctlg %c0,%c0,524287(%r1)
+ lctlg %c0,%c0,524287(%r15)
+
#CHECK: lcxbr %f0, %f8 # encoding: [0xb3,0x43,0x00,0x08]
#CHECK: lcxbr %f0, %f13 # encoding: [0xb3,0x43,0x00,0x0d]
#CHECK: lcxbr %f13, %f0 # encoding: [0xb3,0x43,0x00,0xd0]
@@ -9291,6 +9617,20 @@
lnxr %f13,%f0
lnxr %f13,%f9
+#CHECK: lpctl 0 # encoding: [0xb2,0x85,0x00,0x00]
+#CHECK: lpctl 0(%r1) # encoding: [0xb2,0x85,0x10,0x00]
+#CHECK: lpctl 0(%r15) # encoding: [0xb2,0x85,0xf0,0x00]
+#CHECK: lpctl 4095 # encoding: [0xb2,0x85,0x0f,0xff]
+#CHECK: lpctl 4095(%r1) # encoding: [0xb2,0x85,0x1f,0xff]
+#CHECK: lpctl 4095(%r15) # encoding: [0xb2,0x85,0xff,0xff]
+
+ lpctl 0
+ lpctl 0(%r1)
+ lpctl 0(%r15)
+ lpctl 4095
+ lpctl 4095(%r1)
+ lpctl 4095(%r15)
+
#CHECK: lpdbr %f0, %f9 # encoding: [0xb3,0x10,0x00,0x09]
#CHECK: lpdbr %f0, %f15 # encoding: [0xb3,0x10,0x00,0x0f]
#CHECK: lpdbr %f15, %f0 # encoding: [0xb3,0x10,0x00,0xf0]
@@ -9351,6 +9691,20 @@
lpgr %r15,%r0
lpgr %r7,%r8
+#CHECK: lpp 0 # encoding: [0xb2,0x80,0x00,0x00]
+#CHECK: lpp 0(%r1) # encoding: [0xb2,0x80,0x10,0x00]
+#CHECK: lpp 0(%r15) # encoding: [0xb2,0x80,0xf0,0x00]
+#CHECK: lpp 4095 # encoding: [0xb2,0x80,0x0f,0xff]
+#CHECK: lpp 4095(%r1) # encoding: [0xb2,0x80,0x1f,0xff]
+#CHECK: lpp 4095(%r15) # encoding: [0xb2,0x80,0xff,0xff]
+
+ lpp 0
+ lpp 0(%r1)
+ lpp 0(%r15)
+ lpp 4095
+ lpp 4095(%r1)
+ lpp 4095(%r15)
+
#CHECK: lpq %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x8f]
#CHECK: lpq %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x8f]
#CHECK: lpq %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x8f]
@@ -9383,6 +9737,48 @@
lpr %r15,%r0
lpr %r7,%r8
+#CHECK: lpsw 0 # encoding: [0x82,0x00,0x00,0x00]
+#CHECK: lpsw 0(%r1) # encoding: [0x82,0x00,0x10,0x00]
+#CHECK: lpsw 0(%r15) # encoding: [0x82,0x00,0xf0,0x00]
+#CHECK: lpsw 4095 # encoding: [0x82,0x00,0x0f,0xff]
+#CHECK: lpsw 4095(%r1) # encoding: [0x82,0x00,0x1f,0xff]
+#CHECK: lpsw 4095(%r15) # encoding: [0x82,0x00,0xff,0xff]
+
+ lpsw 0
+ lpsw 0(%r1)
+ lpsw 0(%r15)
+ lpsw 4095
+ lpsw 4095(%r1)
+ lpsw 4095(%r15)
+
+#CHECK: lpswe 0 # encoding: [0xb2,0xb2,0x00,0x00]
+#CHECK: lpswe 0(%r1) # encoding: [0xb2,0xb2,0x10,0x00]
+#CHECK: lpswe 0(%r15) # encoding: [0xb2,0xb2,0xf0,0x00]
+#CHECK: lpswe 4095 # encoding: [0xb2,0xb2,0x0f,0xff]
+#CHECK: lpswe 4095(%r1) # encoding: [0xb2,0xb2,0x1f,0xff]
+#CHECK: lpswe 4095(%r15) # encoding: [0xb2,0xb2,0xff,0xff]
+
+ lpswe 0
+ lpswe 0(%r1)
+ lpswe 0(%r15)
+ lpswe 4095
+ lpswe 4095(%r1)
+ lpswe 4095(%r15)
+
+#CHECK: lptea %r0, %r0, %r0, 0 # encoding: [0xb9,0xaa,0x00,0x00]
+#CHECK: lptea %r0, %r0, %r0, 15 # encoding: [0xb9,0xaa,0x0f,0x00]
+#CHECK: lptea %r0, %r0, %r15, 0 # encoding: [0xb9,0xaa,0x00,0x0f]
+#CHECK: lptea %r0, %r15, %r0, 0 # encoding: [0xb9,0xaa,0xf0,0x00]
+#CHECK: lptea %r4, %r5, %r6, 7 # encoding: [0xb9,0xaa,0x57,0x46]
+#CHECK: lptea %r15, %r0, %r0, 0 # encoding: [0xb9,0xaa,0x00,0xf0]
+
+ lptea %r0, %r0, %r0, 0
+ lptea %r0, %r0, %r0, 15
+ lptea %r0, %r0, %r15, 0
+ lptea %r0, %r15, %r0, 0
+ lptea %r4, %r5, %r6, 7
+ lptea %r15, %r0, %r0, 0
+
#CHECK: lpxbr %f0, %f8 # encoding: [0xb3,0x40,0x00,0x08]
#CHECK: lpxbr %f0, %f13 # encoding: [0xb3,0x40,0x00,0x0d]
#CHECK: lpxbr %f13, %f0 # encoding: [0xb3,0x40,0x00,0xd0]
@@ -9413,6 +9809,66 @@
lr %r15,%r0
lr %r15,%r9
+#CHECK: lra %r0, 0 # encoding: [0xb1,0x00,0x00,0x00]
+#CHECK: lra %r0, 4095 # encoding: [0xb1,0x00,0x0f,0xff]
+#CHECK: lra %r0, 0(%r1) # encoding: [0xb1,0x00,0x10,0x00]
+#CHECK: lra %r0, 0(%r15) # encoding: [0xb1,0x00,0xf0,0x00]
+#CHECK: lra %r0, 4095(%r1,%r15) # encoding: [0xb1,0x01,0xff,0xff]
+#CHECK: lra %r0, 4095(%r15,%r1) # encoding: [0xb1,0x0f,0x1f,0xff]
+#CHECK: lra %r15, 0 # encoding: [0xb1,0xf0,0x00,0x00]
+
+ lra %r0, 0
+ lra %r0, 4095
+ lra %r0, 0(%r1)
+ lra %r0, 0(%r15)
+ lra %r0, 4095(%r1,%r15)
+ lra %r0, 4095(%r15,%r1)
+ lra %r15, 0
+
+#CHECK: lrag %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x03]
+#CHECK: lrag %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x03]
+#CHECK: lrag %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x03]
+#CHECK: lrag %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x03]
+#CHECK: lrag %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x03]
+#CHECK: lrag %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x03]
+#CHECK: lrag %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x03]
+#CHECK: lrag %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x03]
+#CHECK: lrag %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x03]
+#CHECK: lrag %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x03]
+
+ lrag %r0, -524288
+ lrag %r0, -1
+ lrag %r0, 0
+ lrag %r0, 1
+ lrag %r0, 524287
+ lrag %r0, 0(%r1)
+ lrag %r0, 0(%r15)
+ lrag %r0, 524287(%r1,%r15)
+ lrag %r0, 524287(%r15,%r1)
+ lrag %r15, 0
+
+#CHECK: lray %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x13]
+#CHECK: lray %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x13]
+#CHECK: lray %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x13]
+#CHECK: lray %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x13]
+#CHECK: lray %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x13]
+#CHECK: lray %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x13]
+#CHECK: lray %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x13]
+#CHECK: lray %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x13]
+#CHECK: lray %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x13]
+#CHECK: lray %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x13]
+
+ lray %r0, -524288
+ lray %r0, -1
+ lray %r0, 0
+ lray %r0, 1
+ lray %r0, 524287
+ lray %r0, 0(%r1)
+ lray %r0, 0(%r15)
+ lray %r0, 524287(%r1,%r15)
+ lray %r0, 524287(%r15,%r1)
+ lray %r15, 0
+
#CHECK: lrdr %f0, %f0 # encoding: [0x25,0x00]
#CHECK: lrdr %f0, %f13 # encoding: [0x25,0x0d]
#CHECK: lrdr %f7, %f8 # encoding: [0x25,0x78]
@@ -9564,6 +10020,20 @@
lrvr %r7,%r8
lrvr %r15,%r15
+#CHECK: lsctl 0 # encoding: [0xb2,0x87,0x00,0x00]
+#CHECK: lsctl 0(%r1) # encoding: [0xb2,0x87,0x10,0x00]
+#CHECK: lsctl 0(%r15) # encoding: [0xb2,0x87,0xf0,0x00]
+#CHECK: lsctl 4095 # encoding: [0xb2,0x87,0x0f,0xff]
+#CHECK: lsctl 4095(%r1) # encoding: [0xb2,0x87,0x1f,0xff]
+#CHECK: lsctl 4095(%r15) # encoding: [0xb2,0x87,0xff,0xff]
+
+ lsctl 0
+ lsctl 0(%r1)
+ lsctl 0(%r15)
+ lsctl 4095
+ lsctl 4095(%r1)
+ lsctl 4095(%r15)
+
#CHECK: lt %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x12]
#CHECK: lt %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x12]
#CHECK: lt %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x12]
@@ -9740,6 +10210,26 @@
ltxtr %f13,%f0
ltxtr %f13,%f9
+#CHECK: lura %r0, %r0 # encoding: [0xb2,0x4b,0x00,0x00]
+#CHECK: lura %r0, %r15 # encoding: [0xb2,0x4b,0x00,0x0f]
+#CHECK: lura %r15, %r0 # encoding: [0xb2,0x4b,0x00,0xf0]
+#CHECK: lura %r7, %r8 # encoding: [0xb2,0x4b,0x00,0x78]
+
+ lura %r0,%r0
+ lura %r0,%r15
+ lura %r15,%r0
+ lura %r7,%r8
+
+#CHECK: lurag %r0, %r0 # encoding: [0xb9,0x05,0x00,0x00]
+#CHECK: lurag %r0, %r15 # encoding: [0xb9,0x05,0x00,0x0f]
+#CHECK: lurag %r15, %r0 # encoding: [0xb9,0x05,0x00,0xf0]
+#CHECK: lurag %r7, %r8 # encoding: [0xb9,0x05,0x00,0x78]
+
+ lurag %r0,%r0
+ lurag %r0,%r15
+ lurag %r15,%r0
+ lurag %r7,%r8
+
#CHECK: lxd %f0, 4095 # encoding: [0xed,0x00,0x0f,0xff,0x00,0x25]
#CHECK: lxd %f0, 0(%r1) # encoding: [0xed,0x00,0x10,0x00,0x00,0x25]
#CHECK: lxd %f0, 0(%r15) # encoding: [0xed,0x00,0xf0,0x00,0x00,0x25]
@@ -10578,6 +11068,20 @@
ms %r0, 4095(%r15,%r1)
ms %r15, 0
+#CHECK: msch 0 # encoding: [0xb2,0x32,0x00,0x00]
+#CHECK: msch 0(%r1) # encoding: [0xb2,0x32,0x10,0x00]
+#CHECK: msch 0(%r15) # encoding: [0xb2,0x32,0xf0,0x00]
+#CHECK: msch 4095 # encoding: [0xb2,0x32,0x0f,0xff]
+#CHECK: msch 4095(%r1) # encoding: [0xb2,0x32,0x1f,0xff]
+#CHECK: msch 4095(%r15) # encoding: [0xb2,0x32,0xff,0xff]
+
+ msch 0
+ msch 0(%r1)
+ msch 0(%r15)
+ msch 4095
+ msch 4095(%r1)
+ msch 4095(%r15)
+
#CHECK: msd %f0, %f0, 0 # encoding: [0xed,0x00,0x00,0x00,0x00,0x3f]
#CHECK: msd %f0, %f0, 4095 # encoding: [0xed,0x00,0x0f,0xff,0x00,0x3f]
#CHECK: msd %f0, %f0, 0(%r1) # encoding: [0xed,0x00,0x10,0x00,0x00,0x3f]
@@ -10816,6 +11320,14 @@
msr %r15,%r0
msr %r7,%r8
+#CHECK: msta %r0 # encoding: [0xb2,0x47,0x00,0x00]
+#CHECK: msta %r2 # encoding: [0xb2,0x47,0x00,0x20]
+#CHECK: msta %r14 # encoding: [0xb2,0x47,0x00,0xe0]
+
+ msta %r0
+ msta %r2
+ msta %r14
+
#CHECK: msy %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x51]
#CHECK: msy %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x51]
#CHECK: msy %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x51]
@@ -10864,6 +11376,28 @@
mvc 0(256,%r1), 0
mvc 0(256,%r15), 0
+#CHECK: mvcdk 0, 0 # encoding: [0xe5,0x0f,0x00,0x00,0x00,0x00]
+#CHECK: mvcdk 0(%r1), 0(%r2) # encoding: [0xe5,0x0f,0x10,0x00,0x20,0x00]
+#CHECK: mvcdk 160(%r1), 320(%r15) # encoding: [0xe5,0x0f,0x10,0xa0,0xf1,0x40]
+#CHECK: mvcdk 0(%r1), 4095 # encoding: [0xe5,0x0f,0x10,0x00,0x0f,0xff]
+#CHECK: mvcdk 0(%r1), 4095(%r2) # encoding: [0xe5,0x0f,0x10,0x00,0x2f,0xff]
+#CHECK: mvcdk 0(%r1), 4095(%r15) # encoding: [0xe5,0x0f,0x10,0x00,0xff,0xff]
+#CHECK: mvcdk 0(%r1), 0 # encoding: [0xe5,0x0f,0x10,0x00,0x00,0x00]
+#CHECK: mvcdk 0(%r15), 0 # encoding: [0xe5,0x0f,0xf0,0x00,0x00,0x00]
+#CHECK: mvcdk 4095(%r1), 0 # encoding: [0xe5,0x0f,0x1f,0xff,0x00,0x00]
+#CHECK: mvcdk 4095(%r15), 0 # encoding: [0xe5,0x0f,0xff,0xff,0x00,0x00]
+
+ mvcdk 0, 0
+ mvcdk 0(%r1), 0(%r2)
+ mvcdk 160(%r1), 320(%r15)
+ mvcdk 0(%r1), 4095
+ mvcdk 0(%r1), 4095(%r2)
+ mvcdk 0(%r1), 4095(%r15)
+ mvcdk 0(%r1), 0
+ mvcdk 0(%r15), 0
+ mvcdk 4095(%r1), 0
+ mvcdk 4095(%r15), 0
+
#CHECK: mvcin 0(1), 0 # encoding: [0xe8,0x00,0x00,0x00,0x00,0x00]
#CHECK: mvcin 0(1), 0(%r1) # encoding: [0xe8,0x00,0x00,0x00,0x10,0x00]
#CHECK: mvcin 0(1), 0(%r15) # encoding: [0xe8,0x00,0x00,0x00,0xf0,0x00]
@@ -10966,6 +11500,98 @@
mvclu %r0, %r0, 524287(%r1)
mvclu %r14, %r0, 0
+#CHECK: mvcos 0, 0, %r0 # encoding: [0xc8,0x00,0x00,0x00,0x00,0x00]
+#CHECK: mvcos 0(%r1), 0(%r15), %r2 # encoding: [0xc8,0x20,0x10,0x00,0xf0,0x00]
+#CHECK: mvcos 1(%r1), 0(%r15), %r2 # encoding: [0xc8,0x20,0x10,0x01,0xf0,0x00]
+#CHECK: mvcos 4095(%r1), 0(%r15), %r2 # encoding: [0xc8,0x20,0x1f,0xff,0xf0,0x00]
+#CHECK: mvcos 0(%r1), 1(%r15), %r2 # encoding: [0xc8,0x20,0x10,0x00,0xf0,0x01]
+#CHECK: mvcos 0(%r1), 4095(%r15), %r2 # encoding: [0xc8,0x20,0x10,0x00,0xff,0xff]
+
+ mvcos 0, 0, %r0
+ mvcos 0(%r1), 0(%r15), %r2
+ mvcos 1(%r1), 0(%r15), %r2
+ mvcos 4095(%r1), 0(%r15), %r2
+ mvcos 0(%r1), 1(%r15), %r2
+ mvcos 0(%r1), 4095(%r15), %r2
+
+#CHECK: mvcp 0(%r0), 0, %r3 # encoding: [0xda,0x03,0x00,0x00,0x00,0x00]
+#CHECK: mvcp 0(%r1), 0, %r3 # encoding: [0xda,0x13,0x00,0x00,0x00,0x00]
+#CHECK: mvcp 0(%r1), 0(%r1), %r3 # encoding: [0xda,0x13,0x00,0x00,0x10,0x00]
+#CHECK: mvcp 0(%r1), 0(%r15), %r3 # encoding: [0xda,0x13,0x00,0x00,0xf0,0x00]
+#CHECK: mvcp 0(%r1), 4095, %r3 # encoding: [0xda,0x13,0x00,0x00,0x0f,0xff]
+#CHECK: mvcp 0(%r1), 4095(%r1), %r3 # encoding: [0xda,0x13,0x00,0x00,0x1f,0xff]
+#CHECK: mvcp 0(%r1), 4095(%r15), %r3 # encoding: [0xda,0x13,0x00,0x00,0xff,0xff]
+#CHECK: mvcp 0(%r2,%r1), 0, %r3 # encoding: [0xda,0x23,0x10,0x00,0x00,0x00]
+#CHECK: mvcp 0(%r2,%r15), 0, %r3 # encoding: [0xda,0x23,0xf0,0x00,0x00,0x00]
+#CHECK: mvcp 4095(%r2,%r1), 0, %r3 # encoding: [0xda,0x23,0x1f,0xff,0x00,0x00]
+#CHECK: mvcp 4095(%r2,%r15), 0, %r3 # encoding: [0xda,0x23,0xff,0xff,0x00,0x00]
+#CHECK: mvcp 0(%r2,%r1), 0, %r3 # encoding: [0xda,0x23,0x10,0x00,0x00,0x00]
+#CHECK: mvcp 0(%r2,%r15), 0, %r3 # encoding: [0xda,0x23,0xf0,0x00,0x00,0x00]
+
+ mvcp 0(%r0), 0, %r3
+ mvcp 0(%r1), 0, %r3
+ mvcp 0(%r1), 0(%r1), %r3
+ mvcp 0(%r1), 0(%r15), %r3
+ mvcp 0(%r1), 4095, %r3
+ mvcp 0(%r1), 4095(%r1), %r3
+ mvcp 0(%r1), 4095(%r15), %r3
+ mvcp 0(%r2,%r1), 0, %r3
+ mvcp 0(%r2,%r15), 0, %r3
+ mvcp 4095(%r2,%r1), 0, %r3
+ mvcp 4095(%r2,%r15), 0, %r3
+ mvcp 0(%r2,%r1), 0, %r3
+ mvcp 0(%r2,%r15), 0, %r3
+
+#CHECK: mvcs 0(%r0), 0, %r3 # encoding: [0xdb,0x03,0x00,0x00,0x00,0x00]
+#CHECK: mvcs 0(%r1), 0, %r3 # encoding: [0xdb,0x13,0x00,0x00,0x00,0x00]
+#CHECK: mvcs 0(%r1), 0(%r1), %r3 # encoding: [0xdb,0x13,0x00,0x00,0x10,0x00]
+#CHECK: mvcs 0(%r1), 0(%r15), %r3 # encoding: [0xdb,0x13,0x00,0x00,0xf0,0x00]
+#CHECK: mvcs 0(%r1), 4095, %r3 # encoding: [0xdb,0x13,0x00,0x00,0x0f,0xff]
+#CHECK: mvcs 0(%r1), 4095(%r1), %r3 # encoding: [0xdb,0x13,0x00,0x00,0x1f,0xff]
+#CHECK: mvcs 0(%r1), 4095(%r15), %r3 # encoding: [0xdb,0x13,0x00,0x00,0xff,0xff]
+#CHECK: mvcs 0(%r2,%r1), 0, %r3 # encoding: [0xdb,0x23,0x10,0x00,0x00,0x00]
+#CHECK: mvcs 0(%r2,%r15), 0, %r3 # encoding: [0xdb,0x23,0xf0,0x00,0x00,0x00]
+#CHECK: mvcs 4095(%r2,%r1), 0, %r3 # encoding: [0xdb,0x23,0x1f,0xff,0x00,0x00]
+#CHECK: mvcs 4095(%r2,%r15), 0, %r3 # encoding: [0xdb,0x23,0xff,0xff,0x00,0x00]
+#CHECK: mvcs 0(%r2,%r1), 0, %r3 # encoding: [0xdb,0x23,0x10,0x00,0x00,0x00]
+#CHECK: mvcs 0(%r2,%r15), 0, %r3 # encoding: [0xdb,0x23,0xf0,0x00,0x00,0x00]
+
+ mvcs 0(%r0), 0, %r3
+ mvcs 0(%r1), 0, %r3
+ mvcs 0(%r1), 0(%r1), %r3
+ mvcs 0(%r1), 0(%r15), %r3
+ mvcs 0(%r1), 4095, %r3
+ mvcs 0(%r1), 4095(%r1), %r3
+ mvcs 0(%r1), 4095(%r15), %r3
+ mvcs 0(%r2,%r1), 0, %r3
+ mvcs 0(%r2,%r15), 0, %r3
+ mvcs 4095(%r2,%r1), 0, %r3
+ mvcs 4095(%r2,%r15), 0, %r3
+ mvcs 0(%r2,%r1), 0, %r3
+ mvcs 0(%r2,%r15), 0, %r3
+
+#CHECK: mvcsk 0, 0 # encoding: [0xe5,0x0e,0x00,0x00,0x00,0x00]
+#CHECK: mvcsk 0(%r1), 0(%r2) # encoding: [0xe5,0x0e,0x10,0x00,0x20,0x00]
+#CHECK: mvcsk 160(%r1), 320(%r15) # encoding: [0xe5,0x0e,0x10,0xa0,0xf1,0x40]
+#CHECK: mvcsk 0(%r1), 4095 # encoding: [0xe5,0x0e,0x10,0x00,0x0f,0xff]
+#CHECK: mvcsk 0(%r1), 4095(%r2) # encoding: [0xe5,0x0e,0x10,0x00,0x2f,0xff]
+#CHECK: mvcsk 0(%r1), 4095(%r15) # encoding: [0xe5,0x0e,0x10,0x00,0xff,0xff]
+#CHECK: mvcsk 0(%r1), 0 # encoding: [0xe5,0x0e,0x10,0x00,0x00,0x00]
+#CHECK: mvcsk 0(%r15), 0 # encoding: [0xe5,0x0e,0xf0,0x00,0x00,0x00]
+#CHECK: mvcsk 4095(%r1), 0 # encoding: [0xe5,0x0e,0x1f,0xff,0x00,0x00]
+#CHECK: mvcsk 4095(%r15), 0 # encoding: [0xe5,0x0e,0xff,0xff,0x00,0x00]
+
+ mvcsk 0, 0
+ mvcsk 0(%r1), 0(%r2)
+ mvcsk 160(%r1), 320(%r15)
+ mvcsk 0(%r1), 4095
+ mvcsk 0(%r1), 4095(%r2)
+ mvcsk 0(%r1), 4095(%r15)
+ mvcsk 0(%r1), 0
+ mvcsk 0(%r15), 0
+ mvcsk 4095(%r1), 0
+ mvcsk 4095(%r15), 0
+
#CHECK: mvghi 0, 0 # encoding: [0xe5,0x48,0x00,0x00,0x00,0x00]
#CHECK: mvghi 4095, 0 # encoding: [0xe5,0x48,0x0f,0xff,0x00,0x00]
#CHECK: mvghi 0, -32768 # encoding: [0xe5,0x48,0x00,0x00,0x80,0x00]
@@ -11132,6 +11758,16 @@
mvo 0(1), 0(16,%r1)
mvo 0(1), 0(16,%r15)
+#CHECK: mvpg %r0, %r0 # encoding: [0xb2,0x54,0x00,0x00]
+#CHECK: mvpg %r0, %r15 # encoding: [0xb2,0x54,0x00,0x0f]
+#CHECK: mvpg %r15, %r0 # encoding: [0xb2,0x54,0x00,0xf0]
+#CHECK: mvpg %r7, %r8 # encoding: [0xb2,0x54,0x00,0x78]
+
+ mvpg %r0,%r0
+ mvpg %r0,%r15
+ mvpg %r15,%r0
+ mvpg %r7,%r8
+
#CHECK: mvst %r0, %r0 # encoding: [0xb2,0x55,0x00,0x00]
#CHECK: mvst %r0, %r15 # encoding: [0xb2,0x55,0x00,0x0f]
#CHECK: mvst %r15, %r0 # encoding: [0xb2,0x55,0x00,0xf0]
@@ -11790,6 +12426,24 @@
pack 0(1), 0(16,%r1)
pack 0(1), 0(16,%r15)
+#CHECK: palb # encoding: [0xb2,0x48,0x00,0x00]
+
+ palb
+
+#CHECK: pc 0 # encoding: [0xb2,0x18,0x00,0x00]
+#CHECK: pc 0(%r1) # encoding: [0xb2,0x18,0x10,0x00]
+#CHECK: pc 0(%r15) # encoding: [0xb2,0x18,0xf0,0x00]
+#CHECK: pc 4095 # encoding: [0xb2,0x18,0x0f,0xff]
+#CHECK: pc 4095(%r1) # encoding: [0xb2,0x18,0x1f,0xff]
+#CHECK: pc 4095(%r15) # encoding: [0xb2,0x18,0xff,0xff]
+
+ pc 0
+ pc 0(%r1)
+ pc 0(%r15)
+ pc 4095
+ pc 4095(%r1)
+ pc 4095(%r15)
+
#CHECK: pfd 0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x36]
#CHECK: pfd 0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x36]
#CHECK: pfd 0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x36]
@@ -11849,9 +12503,45 @@
pfdrl 7, frob@PLT
pfdrl 8, frob@PLT
+#CHECK: pfmf %r0, %r0 # encoding: [0xb9,0xaf,0x00,0x00]
+#CHECK: pfmf %r0, %r15 # encoding: [0xb9,0xaf,0x00,0x0f]
+#CHECK: pfmf %r15, %r0 # encoding: [0xb9,0xaf,0x00,0xf0]
+#CHECK: pfmf %r7, %r8 # encoding: [0xb9,0xaf,0x00,0x78]
+#CHECK: pfmf %r15, %r15 # encoding: [0xb9,0xaf,0x00,0xff]
+
+ pfmf %r0,%r0
+ pfmf %r0,%r15
+ pfmf %r15,%r0
+ pfmf %r7,%r8
+ pfmf %r15,%r15
+
#CHECK: pfpo # encoding: [0x01,0x0a]
pfpo
+#CHECK: pgin %r0, %r0 # encoding: [0xb2,0x2e,0x00,0x00]
+#CHECK: pgin %r0, %r15 # encoding: [0xb2,0x2e,0x00,0x0f]
+#CHECK: pgin %r15, %r0 # encoding: [0xb2,0x2e,0x00,0xf0]
+#CHECK: pgin %r7, %r8 # encoding: [0xb2,0x2e,0x00,0x78]
+#CHECK: pgin %r15, %r15 # encoding: [0xb2,0x2e,0x00,0xff]
+
+ pgin %r0,%r0
+ pgin %r0,%r15
+ pgin %r15,%r0
+ pgin %r7,%r8
+ pgin %r15,%r15
+
+#CHECK: pgout %r0, %r0 # encoding: [0xb2,0x2f,0x00,0x00]
+#CHECK: pgout %r0, %r15 # encoding: [0xb2,0x2f,0x00,0x0f]
+#CHECK: pgout %r15, %r0 # encoding: [0xb2,0x2f,0x00,0xf0]
+#CHECK: pgout %r7, %r8 # encoding: [0xb2,0x2f,0x00,0x78]
+#CHECK: pgout %r15, %r15 # encoding: [0xb2,0x2f,0x00,0xff]
+
+ pgout %r0,%r0
+ pgout %r0,%r15
+ pgout %r15,%r0
+ pgout %r7,%r8
+ pgout %r15,%r15
+
#CHECK: pka 0, 0(1) # encoding: [0xe9,0x00,0x00,0x00,0x00,0x00]
#CHECK: pka 0, 0(1,%r1) # encoding: [0xe9,0x00,0x00,0x00,0x10,0x00]
#CHECK: pka 0, 0(1,%r15) # encoding: [0xe9,0x00,0x00,0x00,0xf0,0x00]
@@ -11919,8 +12609,45 @@
plo %r2, 0(%r1), %r4, 4095(%r15)
#CHECK: pr # encoding: [0x01,0x01]
+
pr
+#CHECK: pt %r0, %r0 # encoding: [0xb2,0x28,0x00,0x00]
+#CHECK: pt %r0, %r15 # encoding: [0xb2,0x28,0x00,0x0f]
+#CHECK: pt %r15, %r0 # encoding: [0xb2,0x28,0x00,0xf0]
+#CHECK: pt %r7, %r8 # encoding: [0xb2,0x28,0x00,0x78]
+
+ pt %r0,%r0
+ pt %r0,%r15
+ pt %r15,%r0
+ pt %r7,%r8
+
+#CHECK: ptf %r0 # encoding: [0xb9,0xa2,0x00,0x00]
+#CHECK: ptf %r1 # encoding: [0xb9,0xa2,0x00,0x10]
+#CHECK: ptf %r15 # encoding: [0xb9,0xa2,0x00,0xf0]
+
+ ptf %r0
+ ptf %r1
+ ptf %r15
+
+#CHECK: ptff # encoding: [0x01,0x04]
+
+ ptff
+
+#CHECK: pti %r0, %r0 # encoding: [0xb9,0x9e,0x00,0x00]
+#CHECK: pti %r0, %r15 # encoding: [0xb9,0x9e,0x00,0x0f]
+#CHECK: pti %r15, %r0 # encoding: [0xb9,0x9e,0x00,0xf0]
+#CHECK: pti %r7, %r8 # encoding: [0xb9,0x9e,0x00,0x78]
+
+ pti %r0,%r0
+ pti %r0,%r15
+ pti %r15,%r0
+ pti %r7,%r8
+
+#CHECK: ptlb # encoding: [0xb2,0x0d,0x00,0x00]
+
+ ptlb
+
#CHECK: qadtr %f0, %f0, %f0, 0 # encoding: [0xb3,0xf5,0x00,0x00]
#CHECK: qadtr %f0, %f0, %f0, 15 # encoding: [0xb3,0xf5,0x0f,0x00]
#CHECK: qadtr %f0, %f0, %f15, 0 # encoding: [0xb3,0xf5,0x00,0x0f]
@@ -11949,6 +12676,38 @@
qaxtr %f8, %f8, %f8, 8
qaxtr %f13, %f0, %f0, 0
+#CHECK: qctri 0 # encoding: [0xb2,0x8e,0x00,0x00]
+#CHECK: qctri 0(%r1) # encoding: [0xb2,0x8e,0x10,0x00]
+#CHECK: qctri 0(%r15) # encoding: [0xb2,0x8e,0xf0,0x00]
+#CHECK: qctri 4095 # encoding: [0xb2,0x8e,0x0f,0xff]
+#CHECK: qctri 4095(%r1) # encoding: [0xb2,0x8e,0x1f,0xff]
+#CHECK: qctri 4095(%r15) # encoding: [0xb2,0x8e,0xff,0xff]
+
+ qctri 0
+ qctri 0(%r1)
+ qctri 0(%r15)
+ qctri 4095
+ qctri 4095(%r1)
+ qctri 4095(%r15)
+
+#CHECK: qsi 0 # encoding: [0xb2,0x86,0x00,0x00]
+#CHECK: qsi 0(%r1) # encoding: [0xb2,0x86,0x10,0x00]
+#CHECK: qsi 0(%r15) # encoding: [0xb2,0x86,0xf0,0x00]
+#CHECK: qsi 4095 # encoding: [0xb2,0x86,0x0f,0xff]
+#CHECK: qsi 4095(%r1) # encoding: [0xb2,0x86,0x1f,0xff]
+#CHECK: qsi 4095(%r15) # encoding: [0xb2,0x86,0xff,0xff]
+
+ qsi 0
+ qsi 0(%r1)
+ qsi 0(%r15)
+ qsi 4095
+ qsi 4095(%r1)
+ qsi 4095(%r15)
+
+#CHECK: rchp # encoding: [0xb2,0x3b,0x00,0x00]
+
+ rchp
+
#CHECK: risbg %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x55]
#CHECK: risbg %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x55]
#CHECK: risbg %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x55]
@@ -12049,6 +12808,32 @@
rosbg %r15,%r0,0,0,0
rosbg %r4,%r5,6,7,8
+#CHECK: rp 0 # encoding: [0xb2,0x77,0x00,0x00]
+#CHECK: rp 0(%r1) # encoding: [0xb2,0x77,0x10,0x00]
+#CHECK: rp 0(%r15) # encoding: [0xb2,0x77,0xf0,0x00]
+#CHECK: rp 4095 # encoding: [0xb2,0x77,0x0f,0xff]
+#CHECK: rp 4095(%r1) # encoding: [0xb2,0x77,0x1f,0xff]
+#CHECK: rp 4095(%r15) # encoding: [0xb2,0x77,0xff,0xff]
+
+ rp 0
+ rp 0(%r1)
+ rp 0(%r15)
+ rp 4095
+ rp 4095(%r1)
+ rp 4095(%r15)
+
+#CHECK: rrbe %r0, %r0 # encoding: [0xb2,0x2a,0x00,0x00]
+#CHECK: rrbe %r0, %r15 # encoding: [0xb2,0x2a,0x00,0x0f]
+#CHECK: rrbe %r15, %r0 # encoding: [0xb2,0x2a,0x00,0xf0]
+#CHECK: rrbe %r7, %r8 # encoding: [0xb2,0x2a,0x00,0x78]
+#CHECK: rrbe %r15, %r15 # encoding: [0xb2,0x2a,0x00,0xff]
+
+ rrbe %r0,%r0
+ rrbe %r0,%r15
+ rrbe %r15,%r0
+ rrbe %r7,%r8
+ rrbe %r15,%r15
+
#CHECK: rrdtr %f0, %f0, %f0, 0 # encoding: [0xb3,0xf7,0x00,0x00]
#CHECK: rrdtr %f0, %f0, %f0, 15 # encoding: [0xb3,0xf7,0x0f,0x00]
#CHECK: rrdtr %f0, %f0, %f15, 0 # encoding: [0xb3,0xf7,0x00,0x0f]
@@ -12077,6 +12862,10 @@
rrxtr %f8, %f8, %f8, 8
rrxtr %f13, %f0, %f0, 0
+#CHECK: rsch # encoding: [0xb2,0x38,0x00,0x00]
+
+ rsch
+
#CHECK: rxsbg %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x57]
#CHECK: rxsbg %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x57]
#CHECK: rxsbg %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x57]
@@ -12109,6 +12898,38 @@
s %r0, 4095(%r15,%r1)
s %r15, 0
+#CHECK: sac 0 # encoding: [0xb2,0x19,0x00,0x00]
+#CHECK: sac 0(%r1) # encoding: [0xb2,0x19,0x10,0x00]
+#CHECK: sac 0(%r15) # encoding: [0xb2,0x19,0xf0,0x00]
+#CHECK: sac 4095 # encoding: [0xb2,0x19,0x0f,0xff]
+#CHECK: sac 4095(%r1) # encoding: [0xb2,0x19,0x1f,0xff]
+#CHECK: sac 4095(%r15) # encoding: [0xb2,0x19,0xff,0xff]
+
+ sac 0
+ sac 0(%r1)
+ sac 0(%r15)
+ sac 4095
+ sac 4095(%r1)
+ sac 4095(%r15)
+
+#CHECK: sacf 0 # encoding: [0xb2,0x79,0x00,0x00]
+#CHECK: sacf 0(%r1) # encoding: [0xb2,0x79,0x10,0x00]
+#CHECK: sacf 0(%r15) # encoding: [0xb2,0x79,0xf0,0x00]
+#CHECK: sacf 4095 # encoding: [0xb2,0x79,0x0f,0xff]
+#CHECK: sacf 4095(%r1) # encoding: [0xb2,0x79,0x1f,0xff]
+#CHECK: sacf 4095(%r15) # encoding: [0xb2,0x79,0xff,0xff]
+
+ sacf 0
+ sacf 0(%r1)
+ sacf 0(%r15)
+ sacf 4095
+ sacf 4095(%r1)
+ sacf 4095(%r15)
+
+#CHECK: sal # encoding: [0xb2,0x37,0x00,0x00]
+
+ sal
+
#CHECK: sam24 # encoding: [0x01,0x0c]
#CHECK: sam31 # encoding: [0x01,0x0d]
#CHECK: sam64 # encoding: [0x01,0x0e]
@@ -12129,6 +12950,52 @@
sar %a7, %r8
sar %a15, %r15
+#CHECK: scctr %r0, %r0 # encoding: [0xb2,0xe0,0x00,0x00]
+#CHECK: scctr %r0, %r15 # encoding: [0xb2,0xe0,0x00,0x0f]
+#CHECK: scctr %r15, %r0 # encoding: [0xb2,0xe0,0x00,0xf0]
+#CHECK: scctr %r7, %r8 # encoding: [0xb2,0xe0,0x00,0x78]
+
+ scctr %r0,%r0
+ scctr %r0,%r15
+ scctr %r15,%r0
+ scctr %r7,%r8
+
+#CHECK: schm # encoding: [0xb2,0x3c,0x00,0x00]
+
+ schm
+
+#CHECK: sck 0 # encoding: [0xb2,0x04,0x00,0x00]
+#CHECK: sck 0(%r1) # encoding: [0xb2,0x04,0x10,0x00]
+#CHECK: sck 0(%r15) # encoding: [0xb2,0x04,0xf0,0x00]
+#CHECK: sck 4095 # encoding: [0xb2,0x04,0x0f,0xff]
+#CHECK: sck 4095(%r1) # encoding: [0xb2,0x04,0x1f,0xff]
+#CHECK: sck 4095(%r15) # encoding: [0xb2,0x04,0xff,0xff]
+
+ sck 0
+ sck 0(%r1)
+ sck 0(%r15)
+ sck 4095
+ sck 4095(%r1)
+ sck 4095(%r15)
+
+#CHECK: sckc 0 # encoding: [0xb2,0x06,0x00,0x00]
+#CHECK: sckc 0(%r1) # encoding: [0xb2,0x06,0x10,0x00]
+#CHECK: sckc 0(%r15) # encoding: [0xb2,0x06,0xf0,0x00]
+#CHECK: sckc 4095 # encoding: [0xb2,0x06,0x0f,0xff]
+#CHECK: sckc 4095(%r1) # encoding: [0xb2,0x06,0x1f,0xff]
+#CHECK: sckc 4095(%r15) # encoding: [0xb2,0x06,0xff,0xff]
+
+ sckc 0
+ sckc 0(%r1)
+ sckc 0(%r15)
+ sckc 4095
+ sckc 4095(%r1)
+ sckc 4095(%r15)
+
+#CHECK: sckpf # encoding: [0x01,0x07]
+
+ sckpf
+
#CHECK: sd %f0, 0 # encoding: [0x6b,0x00,0x00,0x00]
#CHECK: sd %f0, 4095 # encoding: [0x6b,0x00,0x0f,0xff]
#CHECK: sd %f0, 0(%r1) # encoding: [0x6b,0x00,0x10,0x00]
@@ -12363,6 +13230,56 @@
shy %r0, 524287(%r15,%r1)
shy %r15, 0
+#CHECK: sie 0 # encoding: [0xb2,0x14,0x00,0x00]
+#CHECK: sie 0(%r1) # encoding: [0xb2,0x14,0x10,0x00]
+#CHECK: sie 0(%r15) # encoding: [0xb2,0x14,0xf0,0x00]
+#CHECK: sie 4095 # encoding: [0xb2,0x14,0x0f,0xff]
+#CHECK: sie 4095(%r1) # encoding: [0xb2,0x14,0x1f,0xff]
+#CHECK: sie 4095(%r15) # encoding: [0xb2,0x14,0xff,0xff]
+
+ sie 0
+ sie 0(%r1)
+ sie 0(%r15)
+ sie 4095
+ sie 4095(%r1)
+ sie 4095(%r15)
+
+#CHECK: siga 0 # encoding: [0xb2,0x74,0x00,0x00]
+#CHECK: siga 0(%r1) # encoding: [0xb2,0x74,0x10,0x00]
+#CHECK: siga 0(%r15) # encoding: [0xb2,0x74,0xf0,0x00]
+#CHECK: siga 4095 # encoding: [0xb2,0x74,0x0f,0xff]
+#CHECK: siga 4095(%r1) # encoding: [0xb2,0x74,0x1f,0xff]
+#CHECK: siga 4095(%r15) # encoding: [0xb2,0x74,0xff,0xff]
+
+ siga 0
+ siga 0(%r1)
+ siga 0(%r15)
+ siga 4095
+ siga 4095(%r1)
+ siga 4095(%r15)
+
+#CHECK: sigp %r0, %r0, 0 # encoding: [0xae,0x00,0x00,0x00]
+#CHECK: sigp %r0, %r15, 0 # encoding: [0xae,0x0f,0x00,0x00]
+#CHECK: sigp %r14, %r15, 0 # encoding: [0xae,0xef,0x00,0x00]
+#CHECK: sigp %r15, %r15, 0 # encoding: [0xae,0xff,0x00,0x00]
+#CHECK: sigp %r0, %r0, 4095 # encoding: [0xae,0x00,0x0f,0xff]
+#CHECK: sigp %r0, %r0, 1 # encoding: [0xae,0x00,0x00,0x01]
+#CHECK: sigp %r0, %r0, 0(%r1) # encoding: [0xae,0x00,0x10,0x00]
+#CHECK: sigp %r0, %r0, 0(%r15) # encoding: [0xae,0x00,0xf0,0x00]
+#CHECK: sigp %r0, %r0, 4095(%r1) # encoding: [0xae,0x00,0x1f,0xff]
+#CHECK: sigp %r0, %r0, 4095(%r15) # encoding: [0xae,0x00,0xff,0xff]
+
+ sigp %r0,%r0,0
+ sigp %r0,%r15,0
+ sigp %r14,%r15,0
+ sigp %r15,%r15,0
+ sigp %r0,%r0,4095
+ sigp %r0,%r0,1
+ sigp %r0,%r0,0(%r1)
+ sigp %r0,%r0,0(%r15)
+ sigp %r0,%r0,4095(%r1)
+ sigp %r0,%r0,4095(%r15)
+
#CHECK: sl %r0, 0 # encoding: [0x5f,0x00,0x00,0x00]
#CHECK: sl %r0, 4095 # encoding: [0x5f,0x00,0x0f,0xff]
#CHECK: sl %r0, 0(%r1) # encoding: [0x5f,0x00,0x10,0x00]
@@ -12749,6 +13666,30 @@
sp 0(1), 0(16,%r1)
sp 0(1), 0(16,%r15)
+#CHECK: spctr %r0, %r0 # encoding: [0xb2,0xe1,0x00,0x00]
+#CHECK: spctr %r0, %r15 # encoding: [0xb2,0xe1,0x00,0x0f]
+#CHECK: spctr %r15, %r0 # encoding: [0xb2,0xe1,0x00,0xf0]
+#CHECK: spctr %r7, %r8 # encoding: [0xb2,0xe1,0x00,0x78]
+
+ spctr %r0,%r0
+ spctr %r0,%r15
+ spctr %r15,%r0
+ spctr %r7,%r8
+
+#CHECK: spka 0 # encoding: [0xb2,0x0a,0x00,0x00]
+#CHECK: spka 0(%r1) # encoding: [0xb2,0x0a,0x10,0x00]
+#CHECK: spka 0(%r15) # encoding: [0xb2,0x0a,0xf0,0x00]
+#CHECK: spka 4095 # encoding: [0xb2,0x0a,0x0f,0xff]
+#CHECK: spka 4095(%r1) # encoding: [0xb2,0x0a,0x1f,0xff]
+#CHECK: spka 4095(%r15) # encoding: [0xb2,0x0a,0xff,0xff]
+
+ spka 0
+ spka 0(%r1)
+ spka 0(%r15)
+ spka 4095
+ spka 4095(%r1)
+ spka 4095(%r15)
+
#CHECK: spm %r0 # encoding: [0x04,0x00]
#CHECK: spm %r1 # encoding: [0x04,0x10]
#CHECK: spm %r15 # encoding: [0x04,0xf0]
@@ -12757,6 +13698,34 @@
spm %r1
spm %r15
+#CHECK: spt 0 # encoding: [0xb2,0x08,0x00,0x00]
+#CHECK: spt 0(%r1) # encoding: [0xb2,0x08,0x10,0x00]
+#CHECK: spt 0(%r15) # encoding: [0xb2,0x08,0xf0,0x00]
+#CHECK: spt 4095 # encoding: [0xb2,0x08,0x0f,0xff]
+#CHECK: spt 4095(%r1) # encoding: [0xb2,0x08,0x1f,0xff]
+#CHECK: spt 4095(%r15) # encoding: [0xb2,0x08,0xff,0xff]
+
+ spt 0
+ spt 0(%r1)
+ spt 0(%r15)
+ spt 4095
+ spt 4095(%r1)
+ spt 4095(%r15)
+
+#CHECK: spx 0 # encoding: [0xb2,0x10,0x00,0x00]
+#CHECK: spx 0(%r1) # encoding: [0xb2,0x10,0x10,0x00]
+#CHECK: spx 0(%r15) # encoding: [0xb2,0x10,0xf0,0x00]
+#CHECK: spx 4095 # encoding: [0xb2,0x10,0x0f,0xff]
+#CHECK: spx 4095(%r1) # encoding: [0xb2,0x10,0x1f,0xff]
+#CHECK: spx 4095(%r15) # encoding: [0xb2,0x10,0xff,0xff]
+
+ spx 0
+ spx 0(%r1)
+ spx 0(%r15)
+ spx 4095
+ spx 4095(%r1)
+ spx 4095(%r15)
+
#CHECK: sqd %f0, 0 # encoding: [0xed,0x00,0x00,0x00,0x00,0x35]
#CHECK: sqd %f0, 4095 # encoding: [0xed,0x00,0x0f,0xff,0x00,0x35]
#CHECK: sqd %f0, 0(%r1) # encoding: [0xed,0x00,0x10,0x00,0x00,0x35]
@@ -13131,6 +14100,62 @@
srxt %f13, %f0, 0
srxt %f13, %f13, 0
+#CHECK: ssar %r0 # encoding: [0xb2,0x25,0x00,0x00]
+#CHECK: ssar %r1 # encoding: [0xb2,0x25,0x00,0x10]
+#CHECK: ssar %r15 # encoding: [0xb2,0x25,0x00,0xf0]
+
+ ssar %r0
+ ssar %r1
+ ssar %r15
+
+#CHECK: ssair %r0 # encoding: [0xb9,0x9f,0x00,0x00]
+#CHECK: ssair %r1 # encoding: [0xb9,0x9f,0x00,0x10]
+#CHECK: ssair %r15 # encoding: [0xb9,0x9f,0x00,0xf0]
+
+ ssair %r0
+ ssair %r1
+ ssair %r15
+
+#CHECK: ssch 0 # encoding: [0xb2,0x33,0x00,0x00]
+#CHECK: ssch 0(%r1) # encoding: [0xb2,0x33,0x10,0x00]
+#CHECK: ssch 0(%r15) # encoding: [0xb2,0x33,0xf0,0x00]
+#CHECK: ssch 4095 # encoding: [0xb2,0x33,0x0f,0xff]
+#CHECK: ssch 4095(%r1) # encoding: [0xb2,0x33,0x1f,0xff]
+#CHECK: ssch 4095(%r15) # encoding: [0xb2,0x33,0xff,0xff]
+
+ ssch 0
+ ssch 0(%r1)
+ ssch 0(%r15)
+ ssch 4095
+ ssch 4095(%r1)
+ ssch 4095(%r15)
+
+#CHECK: sske %r0, %r0 # encoding: [0xb2,0x2b,0x00,0x00]
+#CHECK: sske %r0, %r15 # encoding: [0xb2,0x2b,0x00,0x0f]
+#CHECK: sske %r15, %r0 # encoding: [0xb2,0x2b,0x00,0xf0]
+#CHECK: sske %r0, %r0, 15 # encoding: [0xb2,0x2b,0xf0,0x00]
+#CHECK: sske %r4, %r6, 7 # encoding: [0xb2,0x2b,0x70,0x46]
+
+ sske %r0, %r0
+ sske %r0, %r15
+ sske %r15, %r0
+ sske %r0, %r0, 15
+ sske %r4, %r6, 7
+
+#CHECK: ssm 0 # encoding: [0x80,0x00,0x00,0x00]
+#CHECK: ssm 0(%r1) # encoding: [0x80,0x00,0x10,0x00]
+#CHECK: ssm 0(%r15) # encoding: [0x80,0x00,0xf0,0x00]
+#CHECK: ssm 4095 # encoding: [0x80,0x00,0x0f,0xff]
+#CHECK: ssm 4095(%r1) # encoding: [0x80,0x00,0x1f,0xff]
+#CHECK: ssm 4095(%r15) # encoding: [0x80,0x00,0xff,0xff]
+
+ ssm 0
+ ssm 0(%r1)
+ ssm 0(%r15)
+ ssm 4095
+ ssm 4095(%r1)
+ ssm 4095(%r15)
+
#CHECK: st %r0, 0 # encoding: [0x50,0x00,0x00,0x00]
#CHECK: st %r0, 4095 # encoding: [0x50,0x00,0x0f,0xff]
#CHECK: st %r0, 0(%r1) # encoding: [0x50,0x00,0x10,0x00]
@@ -13197,6 +14222,20 @@
stamy %a0,%a0,524287(%r1)
stamy %a0,%a0,524287(%r15)
+#CHECK: stap 0 # encoding: [0xb2,0x12,0x00,0x00]
+#CHECK: stap 0(%r1) # encoding: [0xb2,0x12,0x10,0x00]
+#CHECK: stap 0(%r15) # encoding: [0xb2,0x12,0xf0,0x00]
+#CHECK: stap 4095 # encoding: [0xb2,0x12,0x0f,0xff]
+#CHECK: stap 4095(%r1) # encoding: [0xb2,0x12,0x1f,0xff]
+#CHECK: stap 4095(%r15) # encoding: [0xb2,0x12,0xff,0xff]
+
+ stap 0
+ stap 0(%r1)
+ stap 0(%r15)
+ stap 4095
+ stap 4095(%r1)
+ stap 4095(%r15)
+
#CHECK: stc %r0, 0 # encoding: [0x42,0x00,0x00,0x00]
#CHECK: stc %r0, 4095 # encoding: [0x42,0x00,0x0f,0xff]
#CHECK: stc %r0, 0(%r1) # encoding: [0x42,0x00,0x10,0x00]
@@ -13227,6 +14266,20 @@
stck 4095(%r1)
stck 4095(%r15)
+#CHECK: stckc 0 # encoding: [0xb2,0x07,0x00,0x00]
+#CHECK: stckc 0(%r1) # encoding: [0xb2,0x07,0x10,0x00]
+#CHECK: stckc 0(%r15) # encoding: [0xb2,0x07,0xf0,0x00]
+#CHECK: stckc 4095 # encoding: [0xb2,0x07,0x0f,0xff]
+#CHECK: stckc 4095(%r1) # encoding: [0xb2,0x07,0x1f,0xff]
+#CHECK: stckc 4095(%r15) # encoding: [0xb2,0x07,0xff,0xff]
+
+ stckc 0
+ stckc 0(%r1)
+ stckc 0(%r15)
+ stckc 4095
+ stckc 4095(%r1)
+ stckc 4095(%r15)
+
#CHECK: stcke 0 # encoding: [0xb2,0x78,0x00,0x00]
#CHECK: stcke 0(%r1) # encoding: [0xb2,0x78,0x10,0x00]
#CHECK: stcke 0(%r15) # encoding: [0xb2,0x78,0xf0,0x00]
@@ -13315,6 +14368,84 @@
stcmy %r0, 0, 524287(%r1)
stcmy %r15, 0, 0
+#CHECK: stcps 0 # encoding: [0xb2,0x3a,0x00,0x00]
+#CHECK: stcps 0(%r1) # encoding: [0xb2,0x3a,0x10,0x00]
+#CHECK: stcps 0(%r15) # encoding: [0xb2,0x3a,0xf0,0x00]
+#CHECK: stcps 4095 # encoding: [0xb2,0x3a,0x0f,0xff]
+#CHECK: stcps 4095(%r1) # encoding: [0xb2,0x3a,0x1f,0xff]
+#CHECK: stcps 4095(%r15) # encoding: [0xb2,0x3a,0xff,0xff]
+
+ stcps 0
+ stcps 0(%r1)
+ stcps 0(%r15)
+ stcps 4095
+ stcps 4095(%r1)
+ stcps 4095(%r15)
+
+#CHECK: stcrw 0 # encoding: [0xb2,0x39,0x00,0x00]
+#CHECK: stcrw 0(%r1) # encoding: [0xb2,0x39,0x10,0x00]
+#CHECK: stcrw 0(%r15) # encoding: [0xb2,0x39,0xf0,0x00]
+#CHECK: stcrw 4095 # encoding: [0xb2,0x39,0x0f,0xff]
+#CHECK: stcrw 4095(%r1) # encoding: [0xb2,0x39,0x1f,0xff]
+#CHECK: stcrw 4095(%r15) # encoding: [0xb2,0x39,0xff,0xff]
+
+ stcrw 0
+ stcrw 0(%r1)
+ stcrw 0(%r15)
+ stcrw 4095
+ stcrw 4095(%r1)
+ stcrw 4095(%r15)
+
+#CHECK: stctg %c0, %c0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0x25]
+#CHECK: stctg %c0, %c15, 0 # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x25]
+#CHECK: stctg %c14, %c15, 0 # encoding: [0xeb,0xef,0x00,0x00,0x00,0x25]
+#CHECK: stctg %c15, %c15, 0 # encoding: [0xeb,0xff,0x00,0x00,0x00,0x25]
+#CHECK: stctg %c0, %c0, -524288 # encoding: [0xeb,0x00,0x00,0x00,0x80,0x25]
+#CHECK: stctg %c0, %c0, -1 # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x25]
+#CHECK: stctg %c0, %c0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0x25]
+#CHECK: stctg %c0, %c0, 1 # encoding: [0xeb,0x00,0x00,0x01,0x00,0x25]
+#CHECK: stctg %c0, %c0, 524287 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x25]
+#CHECK: stctg %c0, %c0, 0(%r1) # encoding: [0xeb,0x00,0x10,0x00,0x00,0x25]
+#CHECK: stctg %c0, %c0, 0(%r15) # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x25]
+#CHECK: stctg %c0, %c0, 524287(%r1) # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x25]
+#CHECK: stctg %c0, %c0, 524287(%r15) # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x25]
+
+ stctg %c0,%c0,0
+ stctg %c0,%c15,0
+ stctg %c14,%c15,0
+ stctg %c15,%c15,0
+ stctg %c0,%c0,-524288
+ stctg %c0,%c0,-1
+ stctg %c0,%c0,0
+ stctg %c0,%c0,1
+ stctg %c0,%c0,524287
+ stctg %c0,%c0,0(%r1)
+ stctg %c0,%c0,0(%r15)
+ stctg %c0,%c0,524287(%r1)
+ stctg %c0,%c0,524287(%r15)
+
+#CHECK: stctl %c0, %c0, 0 # encoding: [0xb6,0x00,0x00,0x00]
+#CHECK: stctl %c0, %c15, 0 # encoding: [0xb6,0x0f,0x00,0x00]
+#CHECK: stctl %c14, %c15, 0 # encoding: [0xb6,0xef,0x00,0x00]
+#CHECK: stctl %c15, %c15, 0 # encoding: [0xb6,0xff,0x00,0x00]
+#CHECK: stctl %c0, %c0, 4095 # encoding: [0xb6,0x00,0x0f,0xff]
+#CHECK: stctl %c0, %c0, 1 # encoding: [0xb6,0x00,0x00,0x01]
+#CHECK: stctl %c0, %c0, 0(%r1) # encoding: [0xb6,0x00,0x10,0x00]
+#CHECK: stctl %c0, %c0, 0(%r15) # encoding: [0xb6,0x00,0xf0,0x00]
+#CHECK: stctl %c0, %c0, 4095(%r1) # encoding: [0xb6,0x00,0x1f,0xff]
+#CHECK: stctl %c0, %c0, 4095(%r15) # encoding: [0xb6,0x00,0xff,0xff]
+
+ stctl %c0,%c0,0
+ stctl %c0,%c15,0
+ stctl %c14,%c15,0
+ stctl %c15,%c15,0
+ stctl %c0,%c0,4095
+ stctl %c0,%c0,1
+ stctl %c0,%c0,0(%r1)
+ stctl %c0,%c0,0(%r15)
+ stctl %c0,%c0,4095(%r1)
+ stctl %c0,%c0,4095(%r15)
+
#CHECK: stcy %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x72]
#CHECK: stcy %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x72]
#CHECK: stcy %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x72]
@@ -13413,6 +14544,20 @@
stey %f0, 524287(%r15,%r1)
stey %f15, 0
+#CHECK: stfl 0 # encoding: [0xb2,0xb1,0x00,0x00]
+#CHECK: stfl 0(%r1) # encoding: [0xb2,0xb1,0x10,0x00]
+#CHECK: stfl 0(%r15) # encoding: [0xb2,0xb1,0xf0,0x00]
+#CHECK: stfl 4095 # encoding: [0xb2,0xb1,0x0f,0xff]
+#CHECK: stfl 4095(%r1) # encoding: [0xb2,0xb1,0x1f,0xff]
+#CHECK: stfl 4095(%r15) # encoding: [0xb2,0xb1,0xff,0xff]
+
+ stfl 0
+ stfl 0(%r1)
+ stfl 0(%r15)
+ stfl 4095
+ stfl 4095(%r1)
+ stfl 4095(%r15)
+
#CHECK: stfle 0 # encoding: [0xb2,0xb0,0x00,0x00]
#CHECK: stfle 0(%r1) # encoding: [0xb2,0xb0,0x10,0x00]
#CHECK: stfle 0(%r15) # encoding: [0xb2,0xb0,0xf0,0x00]
@@ -13575,6 +14720,20 @@
sthy %r0, 524287(%r15,%r1)
sthy %r15, 0
+#CHECK: stidp 0 # encoding: [0xb2,0x02,0x00,0x00]
+#CHECK: stidp 0(%r1) # encoding: [0xb2,0x02,0x10,0x00]
+#CHECK: stidp 0(%r15) # encoding: [0xb2,0x02,0xf0,0x00]
+#CHECK: stidp 4095 # encoding: [0xb2,0x02,0x0f,0xff]
+#CHECK: stidp 4095(%r1) # encoding: [0xb2,0x02,0x1f,0xff]
+#CHECK: stidp 4095(%r15) # encoding: [0xb2,0x02,0xff,0xff]
+
+ stidp 0
+ stidp 0(%r1)
+ stidp 0(%r15)
+ stidp 4095
+ stidp 4095(%r1)
+ stidp 4095(%r15)
+
#CHECK: stm %r0, %r0, 0 # encoding: [0x90,0x00,0x00,0x00]
#CHECK: stm %r0, %r15, 0 # encoding: [0x90,0x0f,0x00,0x00]
#CHECK: stm %r14, %r15, 0 # encoding: [0x90,0xef,0x00,0x00]
@@ -13681,6 +14840,52 @@
stmy %r0,%r0,524287(%r1)
stmy %r0,%r0,524287(%r15)
+#CHECK: stnsm 0, 0 # encoding: [0xac,0x00,0x00,0x00]
+#CHECK: stnsm 4095, 0 # encoding: [0xac,0x00,0x0f,0xff]
+#CHECK: stnsm 0, 255 # encoding: [0xac,0xff,0x00,0x00]
+#CHECK: stnsm 0(%r1), 42 # encoding: [0xac,0x2a,0x10,0x00]
+#CHECK: stnsm 0(%r15), 42 # encoding: [0xac,0x2a,0xf0,0x00]
+#CHECK: stnsm 4095(%r1), 42 # encoding: [0xac,0x2a,0x1f,0xff]
+#CHECK: stnsm 4095(%r15), 42 # encoding: [0xac,0x2a,0xff,0xff]
+
+ stnsm 0, 0
+ stnsm 4095, 0
+ stnsm 0, 255
+ stnsm 0(%r1), 42
+ stnsm 0(%r15), 42
+ stnsm 4095(%r1), 42
+ stnsm 4095(%r15), 42
+
+#CHECK: stosm 0, 0 # encoding: [0xad,0x00,0x00,0x00]
+#CHECK: stosm 4095, 0 # encoding: [0xad,0x00,0x0f,0xff]
+#CHECK: stosm 0, 255 # encoding: [0xad,0xff,0x00,0x00]
+#CHECK: stosm 0(%r1), 42 # encoding: [0xad,0x2a,0x10,0x00]
+#CHECK: stosm 0(%r15), 42 # encoding: [0xad,0x2a,0xf0,0x00]
+#CHECK: stosm 4095(%r1), 42 # encoding: [0xad,0x2a,0x1f,0xff]
+#CHECK: stosm 4095(%r15), 42 # encoding: [0xad,0x2a,0xff,0xff]
+
+ stosm 0, 0
+ stosm 4095, 0
+ stosm 0, 255
+ stosm 0(%r1), 42
+ stosm 0(%r15), 42
+ stosm 4095(%r1), 42
+ stosm 4095(%r15), 42
+
+#CHECK: stpt 0 # encoding: [0xb2,0x09,0x00,0x00]
+#CHECK: stpt 0(%r1) # encoding: [0xb2,0x09,0x10,0x00]
+#CHECK: stpt 0(%r15) # encoding: [0xb2,0x09,0xf0,0x00]
+#CHECK: stpt 4095 # encoding: [0xb2,0x09,0x0f,0xff]
+#CHECK: stpt 4095(%r1) # encoding: [0xb2,0x09,0x1f,0xff]
+#CHECK: stpt 4095(%r15) # encoding: [0xb2,0x09,0xff,0xff]
+
+ stpt 0
+ stpt 0(%r1)
+ stpt 0(%r15)
+ stpt 4095
+ stpt 4095(%r1)
+ stpt 4095(%r15)
+
#CHECK: stpq %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x8e]
#CHECK: stpq %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x8e]
#CHECK: stpq %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x8e]
@@ -13703,6 +14908,20 @@
stpq %r0, 524287(%r15,%r1)
stpq %r14, 0
+#CHECK: stpx 0 # encoding: [0xb2,0x11,0x00,0x00]
+#CHECK: stpx 0(%r1) # encoding: [0xb2,0x11,0x10,0x00]
+#CHECK: stpx 0(%r15) # encoding: [0xb2,0x11,0xf0,0x00]
+#CHECK: stpx 4095 # encoding: [0xb2,0x11,0x0f,0xff]
+#CHECK: stpx 4095(%r1) # encoding: [0xb2,0x11,0x1f,0xff]
+#CHECK: stpx 4095(%r15) # encoding: [0xb2,0x11,0xff,0xff]
+
+ stpx 0
+ stpx 0(%r1)
+ stpx 0(%r15)
+ stpx 4095
+ stpx 4095(%r1)
+ stpx 4095(%r15)
+
#CHECK: strag 0, 0 # encoding: [0xe5,0x02,0x00,0x00,0x00,0x00]
#CHECK: strag 0(%r1), 0(%r2) # encoding: [0xe5,0x02,0x10,0x00,0x20,0x00]
#CHECK: strag 160(%r1), 320(%r15) # encoding: [0xe5,0x02,0x10,0xa0,0xf1,0x40]
@@ -13828,6 +15047,54 @@
strvh %r0,524287(%r15,%r1)
strvh %r15,0
+#CHECK: stsch 0 # encoding: [0xb2,0x34,0x00,0x00]
+#CHECK: stsch 0(%r1) # encoding: [0xb2,0x34,0x10,0x00]
+#CHECK: stsch 0(%r15) # encoding: [0xb2,0x34,0xf0,0x00]
+#CHECK: stsch 4095 # encoding: [0xb2,0x34,0x0f,0xff]
+#CHECK: stsch 4095(%r1) # encoding: [0xb2,0x34,0x1f,0xff]
+#CHECK: stsch 4095(%r15) # encoding: [0xb2,0x34,0xff,0xff]
+
+ stsch 0
+ stsch 0(%r1)
+ stsch 0(%r15)
+ stsch 4095
+ stsch 4095(%r1)
+ stsch 4095(%r15)
+
+#CHECK: stsi 0 # encoding: [0xb2,0x7d,0x00,0x00]
+#CHECK: stsi 0(%r1) # encoding: [0xb2,0x7d,0x10,0x00]
+#CHECK: stsi 0(%r15) # encoding: [0xb2,0x7d,0xf0,0x00]
+#CHECK: stsi 4095 # encoding: [0xb2,0x7d,0x0f,0xff]
+#CHECK: stsi 4095(%r1) # encoding: [0xb2,0x7d,0x1f,0xff]
+#CHECK: stsi 4095(%r15) # encoding: [0xb2,0x7d,0xff,0xff]
+
+ stsi 0
+ stsi 0(%r1)
+ stsi 0(%r15)
+ stsi 4095
+ stsi 4095(%r1)
+ stsi 4095(%r15)
+
+#CHECK: stura %r0, %r0 # encoding: [0xb2,0x46,0x00,0x00]
+#CHECK: stura %r0, %r15 # encoding: [0xb2,0x46,0x00,0x0f]
+#CHECK: stura %r15, %r0 # encoding: [0xb2,0x46,0x00,0xf0]
+#CHECK: stura %r7, %r8 # encoding: [0xb2,0x46,0x00,0x78]
+
+ stura %r0,%r0
+ stura %r0,%r15
+ stura %r15,%r0
+ stura %r7,%r8
+
+#CHECK: sturg %r0, %r0 # encoding: [0xb9,0x25,0x00,0x00]
+#CHECK: sturg %r0, %r15 # encoding: [0xb9,0x25,0x00,0x0f]
+#CHECK: sturg %r15, %r0 # encoding: [0xb9,0x25,0x00,0xf0]
+#CHECK: sturg %r7, %r8 # encoding: [0xb9,0x25,0x00,0x78]
+
+ sturg %r0,%r0
+ sturg %r0,%r15
+ sturg %r15,%r0
+ sturg %r7,%r8
+
#CHECK: sty %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x50]
#CHECK: sty %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x50]
#CHECK: sty %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x50]
@@ -13970,6 +15237,28 @@
tam
+#CHECK: tar %a0, %r0 # encoding: [0xb2,0x4c,0x00,0x00]
+#CHECK: tar %a0, %r15 # encoding: [0xb2,0x4c,0x00,0x0f]
+#CHECK: tar %a15, %r0 # encoding: [0xb2,0x4c,0x00,0xf0]
+#CHECK: tar %a7, %r8 # encoding: [0xb2,0x4c,0x00,0x78]
+
+ tar %a0,%r0
+ tar %a0,%r15
+ tar %a15,%r0
+ tar %a7,%r8
+
+#CHECK: tb %r0, %r0 # encoding: [0xb2,0x2c,0x00,0x00]
+#CHECK: tb %r0, %r15 # encoding: [0xb2,0x2c,0x00,0x0f]
+#CHECK: tb %r15, %r0 # encoding: [0xb2,0x2c,0x00,0xf0]
+#CHECK: tb %r7, %r8 # encoding: [0xb2,0x2c,0x00,0x78]
+#CHECK: tb %r15, %r15 # encoding: [0xb2,0x2c,0x00,0xff]
+
+ tb %r0,%r0
+ tb %r0,%r15
+ tb %r15,%r0
+ tb %r7,%r8
+ tb %r15,%r15
+
#CHECK: tbdr %f0, 0, %f0 # encoding: [0xb3,0x51,0x00,0x00]
#CHECK: tbdr %f0, 0, %f15 # encoding: [0xb3,0x51,0x00,0x0f]
#CHECK: tbdr %f0, 15, %f0 # encoding: [0xb3,0x51,0xf0,0x00]
@@ -14272,6 +15561,42 @@
tp 0(16,%r1)
tp 0(16,%r15)
+#CHECK: tpi 0 # encoding: [0xb2,0x36,0x00,0x00]
+#CHECK: tpi 0(%r1) # encoding: [0xb2,0x36,0x10,0x00]
+#CHECK: tpi 0(%r15) # encoding: [0xb2,0x36,0xf0,0x00]
+#CHECK: tpi 4095 # encoding: [0xb2,0x36,0x0f,0xff]
+#CHECK: tpi 4095(%r1) # encoding: [0xb2,0x36,0x1f,0xff]
+#CHECK: tpi 4095(%r15) # encoding: [0xb2,0x36,0xff,0xff]
+
+ tpi 0
+ tpi 0(%r1)
+ tpi 0(%r15)
+ tpi 4095
+ tpi 4095(%r1)
+ tpi 4095(%r15)
+
+#CHECK: tprot 0, 0 # encoding: [0xe5,0x01,0x00,0x00,0x00,0x00]
+#CHECK: tprot 0(%r1), 0(%r2) # encoding: [0xe5,0x01,0x10,0x00,0x20,0x00]
+#CHECK: tprot 160(%r1), 320(%r15) # encoding: [0xe5,0x01,0x10,0xa0,0xf1,0x40]
+#CHECK: tprot 0(%r1), 4095 # encoding: [0xe5,0x01,0x10,0x00,0x0f,0xff]
+#CHECK: tprot 0(%r1), 4095(%r2) # encoding: [0xe5,0x01,0x10,0x00,0x2f,0xff]
+#CHECK: tprot 0(%r1), 4095(%r15) # encoding: [0xe5,0x01,0x10,0x00,0xff,0xff]
+#CHECK: tprot 0(%r1), 0 # encoding: [0xe5,0x01,0x10,0x00,0x00,0x00]
+#CHECK: tprot 0(%r15), 0 # encoding: [0xe5,0x01,0xf0,0x00,0x00,0x00]
+#CHECK: tprot 4095(%r1), 0 # encoding: [0xe5,0x01,0x1f,0xff,0x00,0x00]
+#CHECK: tprot 4095(%r15), 0 # encoding: [0xe5,0x01,0xff,0xff,0x00,0x00]
+
+ tprot 0, 0
+ tprot 0(%r1), 0(%r2)
+ tprot 160(%r1), 320(%r15)
+ tprot 0(%r1), 4095
+ tprot 0(%r1), 4095(%r2)
+ tprot 0(%r1), 4095(%r15)
+ tprot 0(%r1), 0
+ tprot 0(%r15), 0
+ tprot 4095(%r1), 0
+ tprot 4095(%r15), 0
+
#CHECK: tr 0(1), 0 # encoding: [0xdc,0x00,0x00,0x00,0x00,0x00]
#CHECK: tr 0(1), 0(%r1) # encoding: [0xdc,0x00,0x00,0x00,0x10,0x00]
#CHECK: tr 0(1), 0(%r15) # encoding: [0xdc,0x00,0x00,0x00,0xf0,0x00]
@@ -14298,6 +15623,74 @@
tr 0(256,%r1), 0
tr 0(256,%r15), 0
+#CHECK: trace %r0, %r0, 0 # encoding: [0x99,0x00,0x00,0x00]
+#CHECK: trace %r0, %r15, 0 # encoding: [0x99,0x0f,0x00,0x00]
+#CHECK: trace %r14, %r15, 0 # encoding: [0x99,0xef,0x00,0x00]
+#CHECK: trace %r15, %r15, 0 # encoding: [0x99,0xff,0x00,0x00]
+#CHECK: trace %r0, %r0, 4095 # encoding: [0x99,0x00,0x0f,0xff]
+#CHECK: trace %r0, %r0, 1 # encoding: [0x99,0x00,0x00,0x01]
+#CHECK: trace %r0, %r0, 0(%r1) # encoding: [0x99,0x00,0x10,0x00]
+#CHECK: trace %r0, %r0, 0(%r15) # encoding: [0x99,0x00,0xf0,0x00]
+#CHECK: trace %r0, %r0, 4095(%r1) # encoding: [0x99,0x00,0x1f,0xff]
+#CHECK: trace %r0, %r0, 4095(%r15) # encoding: [0x99,0x00,0xff,0xff]
+
+ trace %r0,%r0,0
+ trace %r0,%r15,0
+ trace %r14,%r15,0
+ trace %r15,%r15,0
+ trace %r0,%r0,4095
+ trace %r0,%r0,1
+ trace %r0,%r0,0(%r1)
+ trace %r0,%r0,0(%r15)
+ trace %r0,%r0,4095(%r1)
+ trace %r0,%r0,4095(%r15)
+
+#CHECK: tracg %r0, %r0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0x0f]
+#CHECK: tracg %r0, %r15, 0 # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x0f]
+#CHECK: tracg %r14, %r15, 0 # encoding: [0xeb,0xef,0x00,0x00,0x00,0x0f]
+#CHECK: tracg %r15, %r15, 0 # encoding: [0xeb,0xff,0x00,0x00,0x00,0x0f]
+#CHECK: tracg %r0, %r0, -524288 # encoding: [0xeb,0x00,0x00,0x00,0x80,0x0f]
+#CHECK: tracg %r0, %r0, -1 # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x0f]
+#CHECK: tracg %r0, %r0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0x0f]
+#CHECK: tracg %r0, %r0, 1 # encoding: [0xeb,0x00,0x00,0x01,0x00,0x0f]
+#CHECK: tracg %r0, %r0, 524287 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x0f]
+#CHECK: tracg %r0, %r0, 0(%r1) # encoding: [0xeb,0x00,0x10,0x00,0x00,0x0f]
+#CHECK: tracg %r0, %r0, 0(%r15) # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x0f]
+#CHECK: tracg %r0, %r0, 524287(%r1) # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x0f]
+#CHECK: tracg %r0, %r0, 524287(%r15) # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x0f]
+
+ tracg %r0,%r0,0
+ tracg %r0,%r15,0
+ tracg %r14,%r15,0
+ tracg %r15,%r15,0
+ tracg %r0,%r0,-524288
+ tracg %r0,%r0,-1
+ tracg %r0,%r0,0
+ tracg %r0,%r0,1
+ tracg %r0,%r0,524287
+ tracg %r0,%r0,0(%r1)
+ tracg %r0,%r0,0(%r15)
+ tracg %r0,%r0,524287(%r1)
+ tracg %r0,%r0,524287(%r15)
+
+#CHECK: trap2 # encoding: [0x01,0xff]
+
+ trap2
+
+#CHECK: trap4 0 # encoding: [0xb2,0xff,0x00,0x00]
+#CHECK: trap4 0(%r1) # encoding: [0xb2,0xff,0x10,0x00]
+#CHECK: trap4 0(%r15) # encoding: [0xb2,0xff,0xf0,0x00]
+#CHECK: trap4 4095 # encoding: [0xb2,0xff,0x0f,0xff]
+#CHECK: trap4 4095(%r1) # encoding: [0xb2,0xff,0x1f,0xff]
+#CHECK: trap4 4095(%r15) # encoding: [0xb2,0xff,0xff,0xff]
+
+ trap4 0
+ trap4 0(%r1)
+ trap4 0(%r15)
+ trap4 4095
+ trap4 4095(%r1)
+ trap4 4095(%r15)
+
#CHECK: tre %r0, %r0 # encoding: [0xb2,0xa5,0x00,0x00]
#CHECK: tre %r0, %r15 # encoding: [0xb2,0xa5,0x00,0x0f]
#CHECK: tre %r14, %r0 # encoding: [0xb2,0xa5,0x00,0xe0]
@@ -14458,6 +15851,20 @@
ts 4095(%r1)
ts 4095(%r15)
+#CHECK: tsch 0 # encoding: [0xb2,0x35,0x00,0x00]
+#CHECK: tsch 0(%r1) # encoding: [0xb2,0x35,0x10,0x00]
+#CHECK: tsch 0(%r15) # encoding: [0xb2,0x35,0xf0,0x00]
+#CHECK: tsch 4095 # encoding: [0xb2,0x35,0x0f,0xff]
+#CHECK: tsch 4095(%r1) # encoding: [0xb2,0x35,0x1f,0xff]
+#CHECK: tsch 4095(%r15) # encoding: [0xb2,0x35,0xff,0xff]
+
+ tsch 0
+ tsch 0(%r1)
+ tsch 0(%r15)
+ tsch 4095
+ tsch 4095(%r1)
+ tsch 4095(%r15)
+
#CHECK: unpk 0(1), 0(1) # encoding: [0xf3,0x00,0x00,0x00,0x00,0x00]
#CHECK: unpk 0(1), 0(1,%r1) # encoding: [0xf3,0x00,0x00,0x00,0x10,0x00]
#CHECK: unpk 0(1), 0(1,%r15) # encoding: [0xf3,0x00,0x00,0x00,0xf0,0x00]
@@ -14682,6 +16089,10 @@
xr %r15,%r0
xr %r7,%r8
+#CHECK: xsch # encoding: [0xb2,0x76,0x00,0x00]
+
+ xsch
+
#CHECK: xy %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x57]
#CHECK: xy %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x57]
#CHECK: xy %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x57]
diff --git a/test/MC/SystemZ/regs-bad.s b/test/MC/SystemZ/regs-bad.s
index 37c83dd4b8ff..f4cdb69821d2 100644
--- a/test/MC/SystemZ/regs-bad.s
+++ b/test/MC/SystemZ/regs-bad.s
@@ -8,18 +8,24 @@
#CHECK: error: invalid operand for instruction
#CHECK: lr %a0,%r1
#CHECK: error: invalid operand for instruction
+#CHECK: lr %c0,%r1
+#CHECK: error: invalid operand for instruction
#CHECK: lr %r0,%f1
#CHECK: error: invalid operand for instruction
#CHECK: lr %r0,%a1
#CHECK: error: invalid operand for instruction
+#CHECK: lr %r0,%c1
+#CHECK: error: invalid operand for instruction
#CHECK: lr %r0,0
#CHECK: error: invalid operand for instruction
#CHECK: lr %r0,0(%r1)
lr %f0,%r1
lr %a0,%r1
+ lr %c0,%r1
lr %r0,%f1
lr %r0,%a1
+ lr %r0,%c1
lr %r0,0
lr %r0,0(%r1)
@@ -30,18 +36,24 @@
#CHECK: error: invalid operand for instruction
#CHECK: lgr %a0,%r1
#CHECK: error: invalid operand for instruction
+#CHECK: lgr %c0,%r1
+#CHECK: error: invalid operand for instruction
#CHECK: lgr %r0,%f1
#CHECK: error: invalid operand for instruction
#CHECK: lgr %r0,%a1
#CHECK: error: invalid operand for instruction
+#CHECK: lgr %r0,%c1
+#CHECK: error: invalid operand for instruction
#CHECK: lgr %r0,0
#CHECK: error: invalid operand for instruction
#CHECK: lgr %r0,0(%r1)
lgr %f0,%r1
lgr %a0,%r1
+ lgr %c0,%r1
lgr %r0,%f1
lgr %r0,%a1
+ lgr %r0,%c1
lgr %r0,0
lgr %r0,0(%r1)
@@ -68,10 +80,14 @@
#CHECK: error: invalid operand for instruction
#CHECK: dlr %a0,%r1
#CHECK: error: invalid operand for instruction
+#CHECK: dlr %c0,%r1
+#CHECK: error: invalid operand for instruction
#CHECK: dlr %r0,%f1
#CHECK: error: invalid operand for instruction
#CHECK: dlr %r0,%a1
#CHECK: error: invalid operand for instruction
+#CHECK: dlr %r0,%c1
+#CHECK: error: invalid operand for instruction
#CHECK: dlr %r0,0
#CHECK: error: invalid operand for instruction
#CHECK: dlr %r0,0(%r1)
@@ -86,8 +102,10 @@
dlr %r15,%r0
dlr %f0,%r1
dlr %a0,%r1
+ dlr %c0,%r1
dlr %r0,%f1
dlr %r0,%a1
+ dlr %r0,%c1
dlr %r0,0
dlr %r0,0(%r1)
@@ -98,18 +116,24 @@
#CHECK: error: invalid operand for instruction
#CHECK: ler %a0,%f1
#CHECK: error: invalid operand for instruction
+#CHECK: ler %c0,%f1
+#CHECK: error: invalid operand for instruction
#CHECK: ler %f0,%r1
#CHECK: error: invalid operand for instruction
#CHECK: ler %f0,%a1
#CHECK: error: invalid operand for instruction
+#CHECK: ler %f0,%c1
+#CHECK: error: invalid operand for instruction
#CHECK: ler %f0,0
#CHECK: error: invalid operand for instruction
#CHECK: ler %f0,0(%r1)
ler %r0,%f1
ler %a0,%f1
+ ler %c0,%f1
ler %f0,%r1
ler %f0,%a1
+ ler %f0,%c1
ler %f0,0
ler %f0,0(%r1)
@@ -120,18 +144,24 @@
#CHECK: error: invalid operand for instruction
#CHECK: ldr %a0,%f1
#CHECK: error: invalid operand for instruction
+#CHECK: ldr %c0,%f1
+#CHECK: error: invalid operand for instruction
#CHECK: ldr %f0,%r1
#CHECK: error: invalid operand for instruction
#CHECK: ldr %f0,%a1
#CHECK: error: invalid operand for instruction
+#CHECK: ldr %f0,%c1
+#CHECK: error: invalid operand for instruction
#CHECK: ldr %f0,0
#CHECK: error: invalid operand for instruction
#CHECK: ldr %f0,0(%r1)
ldr %r0,%f1
ldr %a0,%f1
+ ldr %c0,%f1
ldr %f0,%r1
ldr %f0,%a1
+ ldr %f0,%c1
ldr %f0,0
ldr %f0,0(%r1)
@@ -158,10 +188,14 @@
#CHECK: error: invalid operand for instruction
#CHECK: lxr %a0,%f1
#CHECK: error: invalid operand for instruction
+#CHECK: lxr %c0,%f1
+#CHECK: error: invalid operand for instruction
#CHECK: lxr %f0,%r1
#CHECK: error: invalid operand for instruction
#CHECK: lxr %f0,%a1
#CHECK: error: invalid operand for instruction
+#CHECK: lxr %f0,%c1
+#CHECK: error: invalid operand for instruction
#CHECK: lxr %f0,0
#CHECK: error: invalid operand for instruction
#CHECK: lxr %f0,0(%r1)
@@ -176,8 +210,10 @@
lxr %f0,%f15
lxr %r0,%f1
lxr %a0,%f1
+ lxr %c0,%f1
lxr %f0,%r1
lxr %f0,%a1
+ lxr %f0,%c1
lxr %f0,0
lxr %f0,0(%r1)
@@ -188,15 +224,33 @@
#CHECK: error: invalid operand for instruction
#CHECK: ear %r0,%f0
#CHECK: error: invalid operand for instruction
+#CHECK: ear %r0,%c0
+#CHECK: error: invalid operand for instruction
#CHECK: ear %r0,0
#CHECK: error: invalid operand for instruction
#CHECK: ear %r0,0(%r1)
ear %r0,%r0
ear %r0,%f0
+ ear %r0,%c0
ear %r0,0
ear %r0,0(%r1)
+# Test control register operands
+#
+#CHECK: error: invalid operand for instruction
+#CHECK: lctl %c0,%r0,0
+#CHECK: lctl %c0,%f0,0
+#CHECK: lctl %c0,%a0,0
+#CHECK: lctl %c0,0,0
+#CHECK: lctl %c0,0(%r1),0
+
+ lctl %c0,%r0,0
+ lctl %c0,%f0,0
+ lctl %c0,%a0,0
+ lctl %c0,0,0
+ lctl %c0,0(%r1),0
+
.cfi_startproc
# Test general register parsing, with no predetermined class in mind.
@@ -212,9 +266,9 @@
#CHECK: error: invalid register
#CHECK: .cfi_offset %a,0
#CHECK: error: invalid register
-#CHECK: .cfi_offset %0,0
+#CHECK: .cfi_offset %c,0
#CHECK: error: invalid register
-#CHECK: .cfi_offset %c0,0
+#CHECK: .cfi_offset %0,0
#CHECK: error: invalid register
#CHECK: .cfi_offset %r16,0
#CHECK: error: invalid register
@@ -222,6 +276,8 @@
#CHECK: error: invalid register
#CHECK: .cfi_offset %a16,0
#CHECK: error: invalid register
+#CHECK: .cfi_offset %c16,0
+#CHECK: error: invalid register
#CHECK: .cfi_offset %reef,0
#CHECK: error: invalid register
#CHECK: .cfi_offset %arid,0
@@ -231,11 +287,12 @@
.cfi_offset %r,0
.cfi_offset %f,0
.cfi_offset %a,0
+ .cfi_offset %c,0
.cfi_offset %0,0
- .cfi_offset %c0,0
.cfi_offset %r16,0
.cfi_offset %f16,0
.cfi_offset %a16,0
+ .cfi_offset %c16,0
.cfi_offset %reef,0
.cfi_offset %arid,0
diff --git a/test/MC/SystemZ/regs-good.s b/test/MC/SystemZ/regs-good.s
index 4047579bcbb3..c20301133d87 100644
--- a/test/MC/SystemZ/regs-good.s
+++ b/test/MC/SystemZ/regs-good.s
@@ -118,6 +118,25 @@
cpya %a12,%a13
cpya %a14,%a15
+#CHECK: lctl %c0, %c1, 0 # encoding: [0xb7,0x01,0x00,0x00]
+#CHECK: lctl %c2, %c3, 0 # encoding: [0xb7,0x23,0x00,0x00]
+#CHECK: lctl %c4, %c5, 0 # encoding: [0xb7,0x45,0x00,0x00]
+#CHECK: lctl %c6, %c7, 0 # encoding: [0xb7,0x67,0x00,0x00]
+#CHECK: lctl %c8, %c9, 0 # encoding: [0xb7,0x89,0x00,0x00]
+#CHECK: lctl %c10, %c11, 0 # encoding: [0xb7,0xab,0x00,0x00]
+#CHECK: lctl %c12, %c13, 0 # encoding: [0xb7,0xcd,0x00,0x00]
+#CHECK: lctl %c14, %c15, 0 # encoding: [0xb7,0xef,0x00,0x00]
+
+ lctl %c0,%c1,0
+ lctl %c2,%c3,0
+ lctl %c4,%c5,0
+ lctl %c6,%c7,0
+ lctl %c8,%c9,0
+ lctl %c10,%c11,0
+ lctl %c12,%c13,0
+ lctl %c14,%c15,0
+
+
#CHECK: .cfi_offset %r0, 0
#CHECK: .cfi_offset %r1, 8
#CHECK: .cfi_offset %r2, 16
@@ -166,6 +185,22 @@
#CHECK: .cfi_offset %a13, 308
#CHECK: .cfi_offset %a14, 312
#CHECK: .cfi_offset %a15, 316
+#CHECK: .cfi_offset %c0, 318
+#CHECK: .cfi_offset %c1, 326
+#CHECK: .cfi_offset %c2, 334
+#CHECK: .cfi_offset %c3, 342
+#CHECK: .cfi_offset %c4, 350
+#CHECK: .cfi_offset %c5, 358
+#CHECK: .cfi_offset %c6, 366
+#CHECK: .cfi_offset %c7, 374
+#CHECK: .cfi_offset %c8, 382
+#CHECK: .cfi_offset %c9, 390
+#CHECK: .cfi_offset %c10, 398
+#CHECK: .cfi_offset %c11, 406
+#CHECK: .cfi_offset %c12, 414
+#CHECK: .cfi_offset %c13, 422
+#CHECK: .cfi_offset %c14, 430
+#CHECK: .cfi_offset %c15, 438
.cfi_startproc
.cfi_offset %r0,0
@@ -216,4 +251,20 @@
.cfi_offset %a13,308
.cfi_offset %a14,312
.cfi_offset %a15,316
+ .cfi_offset %c0,318
+ .cfi_offset %c1,326
+ .cfi_offset %c2,334
+ .cfi_offset %c3,342
+ .cfi_offset %c4,350
+ .cfi_offset %c5,358
+ .cfi_offset %c6,366
+ .cfi_offset %c7,374
+ .cfi_offset %c8,382
+ .cfi_offset %c9,390
+ .cfi_offset %c10,398
+ .cfi_offset %c11,406
+ .cfi_offset %c12,414
+ .cfi_offset %c13,422
+ .cfi_offset %c14,430
+ .cfi_offset %c15,438
.cfi_endproc
diff --git a/test/MC/WebAssembly/unnamed-data.ll b/test/MC/WebAssembly/unnamed-data.ll
index 77a7c08f6594..fd985088c1d2 100644
--- a/test/MC/WebAssembly/unnamed-data.ll
+++ b/test/MC/WebAssembly/unnamed-data.ll
@@ -7,47 +7,53 @@
@b = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str2, i32 0, i32 0), align 8
-; CHECK: - Type: GLOBAL
-; CHECK: Globals:
-; CHECK: - Type: I32
-; CHECK: Mutable: false
-; CHECK: InitExpr:
-; CHECK: Opcode: I32_CONST
-; CHECK: Value: 0
-; CHECK: - Type: I32
-; CHECK: Mutable: false
-; CHECK: InitExpr:
-; CHECK: Opcode: I32_CONST
-; CHECK: Value: 6
-; CHECK: - Type: I32
-; CHECK: Mutable: false
-; CHECK: InitExpr:
-; CHECK: Opcode: I32_CONST
-; CHECK: Value: 16
-; CHECK: - Type: I32
-; CHECK: Mutable: false
-; CHECK: InitExpr:
-; CHECK: Opcode: I32_CONST
-; CHECK: Value: 24
-; CHECK: - Type: EXPORT
-; CHECK: Exports:
-; CHECK: - Name: a
-; CHECK: Kind: GLOBAL
-; CHECK: Index: 2
-; CHECK: - Name: b
-; CHECK: Kind: GLOBAL
-; CHECK: Index: 3
-; CHECK: - Type: DATA
-; CHECK: Relocations:
-; CHECK: - Type: R_WEBASSEMBLY_GLOBAL_ADDR_I32
-; CHECK: Index: 0
-; CHECK: Offset: 0x00000016
-; CHECK: - Type: R_WEBASSEMBLY_GLOBAL_ADDR_I32
-; CHECK: Index: 1
-; CHECK: Offset: 0x0000001E
-; CHECK: Segments:
-; CHECK: - Index: 0
-; CHECK: Offset:
-; CHECK: Opcode: I32_CONST
-; CHECK: Value: 0
-; CHECK: Content: 68656C6C6F00776F726C640000000000000000000000000006000000
+; CHECK: - Type: GLOBAL
+; CHECK-NEXT: Globals:
+; CHECK-NEXT: - Type: I32
+; CHECK-NEXT: Mutable: false
+; CHECK-NEXT: InitExpr:
+; CHECK-NEXT: Opcode: I32_CONST
+; CHECK-NEXT: Value: 0
+; CHECK-NEXT: - Type: I32
+; CHECK-NEXT: Mutable: false
+; CHECK-NEXT: InitExpr:
+; CHECK-NEXT: Opcode: I32_CONST
+; CHECK-NEXT: Value: 6
+; CHECK-NEXT: - Type: I32
+; CHECK-NEXT: Mutable: false
+; CHECK-NEXT: InitExpr:
+; CHECK-NEXT: Opcode: I32_CONST
+; CHECK-NEXT: Value: 16
+; CHECK-NEXT: - Type: I32
+; CHECK-NEXT: Mutable: false
+; CHECK-NEXT: InitExpr:
+; CHECK-NEXT: Opcode: I32_CONST
+; CHECK-NEXT: Value: 24
+; CHECK-NEXT: - Type: EXPORT
+; CHECK-NEXT: Exports:
+; CHECK-NEXT: - Name: a
+; CHECK-NEXT: Kind: GLOBAL
+; CHECK-NEXT: Index: 2
+; CHECK-NEXT: - Name: b
+; CHECK-NEXT: Kind: GLOBAL
+; CHECK-NEXT: Index: 3
+; CHECK-NEXT: - Type: DATA
+; CHECK-NEXT: Relocations:
+; CHECK-NEXT: - Type: R_WEBASSEMBLY_GLOBAL_ADDR_I32
+; CHECK-NEXT: Index: 0
+; CHECK-NEXT: Offset: 0x00000016
+; CHECK-NEXT: - Type: R_WEBASSEMBLY_GLOBAL_ADDR_I32
+; CHECK-NEXT: Index: 1
+; CHECK-NEXT: Offset: 0x0000001E
+; CHECK-NEXT: Segments:
+; CHECK-NEXT: - Index: 0
+; CHECK-NEXT: Offset:
+; CHECK-NEXT: Opcode: I32_CONST
+; CHECK-NEXT: Value: 0
+; CHECK-NEXT: Content: 68656C6C6F00776F726C640000000000000000000000000006000000
+; CHECK-NEXT: - Type: CUSTOM
+; CHECK-NEXT: Name: linking
+; CHECK-NEXT: DataSize: 28
+; CHECK-NEXT: DataAlignment: 8
+; CHECK-NEXT: SymbolInfo:
+; CHECK-NEXT: ...
diff --git a/test/MC/WebAssembly/weak-alias.ll b/test/MC/WebAssembly/weak-alias.ll
new file mode 100644
index 000000000000..6e2b8631d2b1
--- /dev/null
+++ b/test/MC/WebAssembly/weak-alias.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple wasm32-unknown-unknown-wasm -filetype=obj %s -o - | obj2yaml | FileCheck %s
+
+; foo_alias() function is weak alias of function foo()
+; Generates two exports of the same function, one of them weak
+
+@foo_alias = weak hidden alias i32 (...), bitcast (i32 ()* @foo to i32 (...)*)
+
+define hidden i32 @foo() #0 {
+entry:
+ ret i32 0
+}
+
+; CHECK: - Type: EXPORT
+; CHECK-NEXT: Exports:
+; CHECK-NEXT: - Name: foo
+; CHECK-NEXT: Kind: FUNCTION
+; CHECK-NEXT: Index: 0
+; CHECK-NEXT: - Name: foo_alias
+; CHECK-NEXT: Kind: FUNCTION
+; CHECK-NEXT: Index: 0
+
+
+; CHECK: - Type: CUSTOM
+; CHECK-NEXT: Name: name
+; CHECK-NEXT: FunctionNames:
+; CHECK-NEXT: - Index: 0
+; CHECK-NEXT: Name: foo
+; CHECK-NEXT: - Type: CUSTOM
+; CHECK-NEXT: Name: linking
+; CHECK-NEXT: DataSize: 0
+; CHECK-NEXT: DataAlignment: 0
+; CHECK-NEXT: SymbolInfo:
+; CHECK-NEXT: - Name: foo_alias
+; CHECK-NEXT: Flags: 1
+; CHECK-NEXT: ...
diff --git a/test/MC/WebAssembly/weak.ll b/test/MC/WebAssembly/weak.ll
new file mode 100644
index 000000000000..1bc06fec5910
--- /dev/null
+++ b/test/MC/WebAssembly/weak.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple wasm32-unknown-unknown-wasm -filetype=obj %s -o - | obj2yaml | FileCheck %s
+
+; Weak external data reference
+@weak_external_data = extern_weak global i32, align 4
+
+; Weak function definition
+define weak hidden i32 @weak_function() local_unnamed_addr #0 {
+entry:
+ %0 = load i32, i32* @weak_external_data, align 4
+ ret i32 %0
+}
+
+; CHECK: - Type: IMPORT
+; CHECK-NEXT: Imports:
+; CHECK-NEXT: - Module: env
+; CHECK-NEXT: Field: weak_external_data
+; CHECK-NEXT: Kind: GLOBAL
+; CHECK-NEXT: GlobalType: I32
+; CHECK-NEXT: GlobalMutable: false
+
+
+; CHECK: - Type: CUSTOM
+; CHECK-NEXT: Name: name
+; CHECK-NEXT: FunctionNames:
+; CHECK-NEXT: - Index: 0
+; CHECK-NEXT: Name: weak_function
+; CHECK-NEXT: - Type: CUSTOM
+; CHECK-NEXT: Name: linking
+; CHECK-NEXT: DataSize: 0
+; CHECK-NEXT: DataAlignment: 0
+; CHECK-NEXT: SymbolInfo:
+; CHECK-NEXT: - Name: weak_external_data
+; CHECK-NEXT: Flags: 1
+; CHECK-NEXT: - Name: weak_function
+; CHECK-NEXT: Flags: 1
+; CHECK-NEXT: ...
diff --git a/test/MC/X86/intel-syntax-bitwise-ops.s b/test/MC/X86/intel-syntax-bitwise-ops.s
index 6d4df609c061..a0b25800f976 100644
--- a/test/MC/X86/intel-syntax-bitwise-ops.s
+++ b/test/MC/X86/intel-syntax-bitwise-ops.s
@@ -56,3 +56,20 @@
add eax, 6 XOR 3
// CHECK: addl $5, %eax
add eax, 6 XOR 3 shl 1 SHR 1
+// CHECK: movl $-9, %eax
+ mov eax, not(1 shl 3)
+// CHECK: movl $-2, %eax
+ mov eax, ~(0x8 shr 3)
+// CHECK: movl $-4, %eax
+ mov eax, not(1 or 3)
+// CHECK: movl $-2, %eax
+ mov eax, -(1 xor 3)
+// CHECK: movl $-2, %eax
+ mov eax, not(1 and 3)
+// CHECK: movl $3, %eax
+ mov eax, not(not 3)
+// CHECK: movl $-3, %eax
+ mov eax, ~(5 mod 3)
+// CHECK: movl $-2, %eax
+ mov eax, (-5 mod 3)
+
diff --git a/test/MC/X86/signed-coff-pcrel.s b/test/MC/X86/signed-coff-pcrel.s
new file mode 100644
index 000000000000..768947bbf803
--- /dev/null
+++ b/test/MC/X86/signed-coff-pcrel.s
@@ -0,0 +1,12 @@
+// RUN: llvm-mc -triple i686-unknown-windows-msvc -filetype obj -o %t.o %s
+// RUN: llvm-objdump -r %t.o | FileCheck %s
+
+// CHECK: 00000004 IMAGE_REL_I386_REL32 twop32
+
+ .section .rdata,"rd"
+twop32:
+ .quad 0x41f0000000000000
+
+ .text
+0:
+ mulsd twop32-0b(%eax), %xmm1
diff --git a/test/Object/X86/irsymtab-asm.ll b/test/Object/X86/irsymtab-asm.ll
new file mode 100644
index 000000000000..487dc37b6571
--- /dev/null
+++ b/test/Object/X86/irsymtab-asm.ll
@@ -0,0 +1,17 @@
+; Check that we correctly handle the case where we have inline asm and the
+; target is not registered. In this case we shouldn't emit an irsymtab.
+
+; RUN: llvm-as -o %t %s
+; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=AS %s
+
+; AS-NOT: <SYMTAB_BLOCK
+
+; RUN: opt -o %t2 %s
+; RUN: llvm-bcanalyzer -dump %t2 | FileCheck --check-prefix=OPT %s
+
+; OPT: <SYMTAB_BLOCK
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "ret"
diff --git a/test/Object/X86/irsymtab-bad-alias.ll b/test/Object/X86/irsymtab-bad-alias.ll
new file mode 100644
index 000000000000..c54436d59219
--- /dev/null
+++ b/test/Object/X86/irsymtab-bad-alias.ll
@@ -0,0 +1,15 @@
+; Check that we do not create an irsymtab for modules with malformed IR.
+
+; RUN: opt -o %t %s
+; RUN: llvm-bcanalyzer -dump %t | FileCheck %s
+
+; CHECK-NOT: <SYMTAB_BLOCK
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@g1 = global i32 1
+@g2 = global i32 2
+
+@a = alias i32, inttoptr(i32 sub (i32 ptrtoint (i32* @g1 to i32),
+ i32 ptrtoint (i32* @g2 to i32)) to i32*)
diff --git a/test/Object/X86/irsymtab.ll b/test/Object/X86/irsymtab.ll
new file mode 100644
index 000000000000..053756d4fc6b
--- /dev/null
+++ b/test/Object/X86/irsymtab.ll
@@ -0,0 +1,33 @@
+; RUN: env LLVM_OVERRIDE_PRODUCER=producer opt -o %t %s
+; RUN: llvm-bcanalyzer -dump -show-binary-blobs %t | FileCheck --check-prefix=BCA %s
+
+; Same producer, does not require upgrade.
+; RUN: env LLVM_OVERRIDE_PRODUCER=producer llvm-lto2 dump-symtab %t | FileCheck --check-prefix=SYMTAB %s
+
+; Different producer, requires upgrade.
+; RUN: env LLVM_OVERRIDE_PRODUCER=consumer llvm-lto2 dump-symtab %t | FileCheck --check-prefix=SYMTAB %s
+
+; BCA: <SYMTAB_BLOCK
+; Version stored at offset 0.
+; BCA-NEXT: <BLOB abbrevid=4/> blob data = '\x00\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00'
+; BCA-NEXT: </SYMTAB_BLOCK>
+; BCA-NEXT: <STRTAB_BLOCK
+; BCA-NEXT: <BLOB abbrevid=4/> blob data = 'foobarproducerx86_64-unknown-linux-gnuirsymtab.ll'
+; BCA-NEXT: </STRTAB_BLOCK>
+
+; SYMTAB: version: 0
+; SYMTAB-NEXT: producer: producer
+; SYMTAB-NEXT: target triple: x86_64-unknown-linux-gnu
+; SYMTAB-NEXT: source filename: irsymtab.ll
+; SYMTAB-NEXT: D------X foo
+; SYMTAB-NEXT: DU-----X bar
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+source_filename = "irsymtab.ll"
+
+define void @foo() {
+ ret void
+}
+
+declare void @bar()
diff --git a/test/Object/X86/yaml-elf-x86-rel-broken.yaml b/test/Object/X86/yaml-elf-x86-rel-broken.yaml
new file mode 100644
index 000000000000..edd5dbce1236
--- /dev/null
+++ b/test/Object/X86/yaml-elf-x86-rel-broken.yaml
@@ -0,0 +1,29 @@
+# RUN: yaml2obj %s > %t
+# RUN: obj2yaml %t | FileCheck %s
+
+# CHECK: Relocations:
+# CHECK-NEXT: - Offset:
+# CHECK-NEXT: Symbol:
+# CHECK-NEXT: Type: 0x000000FF
+
+!ELF
+FileHeader:
+ Class: ELFCLASS32
+ Data: ELFDATA2LSB
+ Type: ET_REL
+ Machine: EM_386
+Sections:
+ - Type: SHT_PROGBITS
+ Name: .text
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x04
+ Content: 0000000000000000
+ - Type: SHT_REL
+ Name: .rel.text
+ Link: .symtab
+ Info: .text
+ AddressAlign: 0x04
+ Relocations:
+ - Offset: 0
+ Symbol: main
+ Type: 0xFF
diff --git a/test/ObjectYAML/wasm/weak_symbols.yaml b/test/ObjectYAML/wasm/weak_symbols.yaml
index 0ae8c9bec2a2..ab80c1e50290 100644
--- a/test/ObjectYAML/wasm/weak_symbols.yaml
+++ b/test/ObjectYAML/wasm/weak_symbols.yaml
@@ -3,6 +3,19 @@
FileHeader:
Version: 0x00000001
Sections:
+ - Type: TYPE
+ Signatures:
+ - ReturnType: I32
+ ParamTypes:
+ - Type: FUNCTION
+ FunctionTypes: [ 0, 0 ]
+ - Type: GLOBAL
+ Globals:
+ - Type: I32
+ Mutable: false
+ InitExpr:
+ Opcode: I32_CONST
+ Value: 1
- Type: EXPORT
Exports:
- Name: function_export
@@ -10,9 +23,11 @@ Sections:
Index: 1
- Name: global_export
Kind: GLOBAL
- Index: 2
+ Index: 0
- Type: CUSTOM
Name: linking
+ DataSize: 10
+ DataAlignment: 2
SymbolInfo:
- Name: function_export
Flags: 1
@@ -30,9 +45,11 @@ Sections:
# CHECK: Index: 1
# CHECK: - Name: global_export
# CHECK: Kind: GLOBAL
-# CHECK: Index: 2
+# CHECK: Index: 0
# CHECK: - Type: CUSTOM
# CHECK: Name: linking
+# CHECK: DataSize: 10
+# CHECK: DataAlignment: 2
# CHECK: SymbolInfo:
# CHECK: - Name: function_export
# CHECK: Flags: 1
diff --git a/test/Other/new-pm-defaults.ll b/test/Other/new-pm-defaults.ll
index c5d10a0a67e3..fbecb34aa4b7 100644
--- a/test/Other/new-pm-defaults.ll
+++ b/test/Other/new-pm-defaults.ll
@@ -74,6 +74,7 @@
; CHECK-O-NEXT: Starting llvm::Function pass manager run.
; CHECK-O-NEXT: Running pass: SROA
; CHECK-O-NEXT: Running pass: EarlyCSEPass
+; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
; CHECK-O-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O-NEXT: Running pass: JumpThreadingPass
; CHECK-O-NEXT: Running analysis: LazyValueAnalysis
diff --git a/test/Other/new-pm-thinlto-defaults.ll b/test/Other/new-pm-thinlto-defaults.ll
index 52f475b0397d..f5625d96d703 100644
--- a/test/Other/new-pm-thinlto-defaults.ll
+++ b/test/Other/new-pm-thinlto-defaults.ll
@@ -9,19 +9,19 @@
;
; Prelink pipelines:
; RUN: opt -disable-verify -debug-pass-manager \
-; RUN: -passes='thinlto-pre-link<O1>' -S %s 2>&1 \
+; RUN: -passes='thinlto-pre-link<O1>,name-anon-globals' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-PRELINK-O,CHECK-PRELINK-O1
; RUN: opt -disable-verify -debug-pass-manager \
-; RUN: -passes='thinlto-pre-link<O2>' -S %s 2>&1 \
+; RUN: -passes='thinlto-pre-link<O2>,name-anon-globals' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-PRELINK-O,CHECK-PRELINK-O2
; RUN: opt -disable-verify -debug-pass-manager \
-; RUN: -passes='thinlto-pre-link<O3>' -S %s 2>&1 \
+; RUN: -passes='thinlto-pre-link<O3>,name-anon-globals' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-PRELINK-O,CHECK-PRELINK-O3
; RUN: opt -disable-verify -debug-pass-manager \
-; RUN: -passes='thinlto-pre-link<Os>' -S %s 2>&1 \
+; RUN: -passes='thinlto-pre-link<Os>,name-anon-globals' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-PRELINK-O,CHECK-PRELINK-Os
; RUN: opt -disable-verify -debug-pass-manager \
-; RUN: -passes='thinlto-pre-link<Oz>' -S %s 2>&1 \
+; RUN: -passes='thinlto-pre-link<Oz>,name-anon-globals' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-PRELINK-O,CHECK-PRELINK-Oz
;
; Postlink pipelines:
@@ -90,6 +90,7 @@
; CHECK-O-NEXT: Starting llvm::Function pass manager run.
; CHECK-O-NEXT: Running pass: SROA
; CHECK-O-NEXT: Running pass: EarlyCSEPass
+; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
; CHECK-O-NEXT: Running pass: SpeculativeExecutionPass
; CHECK-O-NEXT: Running pass: JumpThreadingPass
; CHECK-O-NEXT: Running analysis: LazyValueAnalysis
@@ -153,7 +154,6 @@
; CHECK-O-NEXT: Finished CGSCC pass manager run.
; CHECK-O-NEXT: Finished llvm::Module pass manager run.
; CHECK-PRELINK-O-NEXT: Running pass: GlobalOptPass
-; CHECK-PRELINK-O-NEXT: Running pass: NameAnonGlobalPass
; CHECK-POSTLINK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}>
; CHECK-POSTLINK-O-NEXT: Starting llvm::Module pass manager run.
; CHECK-POSTLINK-O-NEXT: Running pass: GlobalOptPass
@@ -187,6 +187,7 @@
; CHECK-POSTLINK-O-NEXT: Running pass: ConstantMergePass
; CHECK-POSTLINK-O-NEXT: Finished llvm::Module pass manager run.
; CHECK-O-NEXT: Finished llvm::Module pass manager run.
+; CHECK-PRELINK-O-NEXT: Running pass: NameAnonGlobalPass
; CHECK-O-NEXT: Running pass: PrintModulePass
; Make sure we get the IR back out without changes when we print the module.
diff --git a/test/ThinLTO/X86/autoupgrade.ll b/test/ThinLTO/X86/autoupgrade.ll
index cbbe833d262a..2188d031c439 100644
--- a/test/ThinLTO/X86/autoupgrade.ll
+++ b/test/ThinLTO/X86/autoupgrade.ll
@@ -10,7 +10,7 @@
; RUN: | llvm-bcanalyzer -dump | FileCheck %s
; CHECK: <STRTAB_BLOCK
-; CHECK-NEXT: blob data = 'mainglobalfunc1llvm.invariant.start.p0i8'
+; CHECK-NEXT: blob data = 'mainglobalfunc1llvm.invariant.start.p0i8{{.*}}'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"
diff --git a/test/Transforms/BBVectorize/X86/cmp-types.ll b/test/Transforms/BBVectorize/X86/cmp-types.ll
deleted file mode 100644
index fc1da1b0c609..000000000000
--- a/test/Transforms/BBVectorize/X86/cmp-types.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
-
-%"struct.btSoftBody" = type { float, float, float*, i8 }
-
-define void @test1(%"struct.btSoftBody"* %n1, %"struct.btSoftBody"* %n2) uwtable align 2 {
-entry:
- %tobool15 = icmp ne %"struct.btSoftBody"* %n1, null
- %cond16 = zext i1 %tobool15 to i32
- %tobool21 = icmp ne %"struct.btSoftBody"* %n2, null
- %cond22 = zext i1 %tobool21 to i32
- ret void
-; CHECK-LABEL: @test1(
-}
-
diff --git a/test/Transforms/BBVectorize/X86/loop1.ll b/test/Transforms/BBVectorize/X86/loop1.ll
deleted file mode 100644
index a533713609a7..000000000000
--- a/test/Transforms/BBVectorize/X86/loop1.ll
+++ /dev/null
@@ -1,61 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -basicaa -loop-unroll -unroll-partial-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
-; The second check covers the use of alias analysis (with loop unrolling).
-
-define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
-entry:
- br label %for.body
-; CHECK-LABEL: @test1(
-; CHECK-UNRL-LABEL: @test1(
-
-for.body: ; preds = %for.body, %entry
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
- %0 = load double, double* %arrayidx, align 8
- %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
- %1 = load double, double* %arrayidx2, align 8
- %mul = fmul double %0, %0
- %mul3 = fmul double %0, %1
- %add = fadd double %mul, %mul3
- %add4 = fadd double %1, %1
- %add5 = fadd double %add4, %0
- %mul6 = fmul double %0, %add5
- %add7 = fadd double %add, %mul6
- %mul8 = fmul double %1, %1
- %add9 = fadd double %0, %0
- %add10 = fadd double %add9, %0
- %mul11 = fmul double %mul8, %add10
- %add12 = fadd double %add7, %mul11
- %arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
- store double %add12, double* %arrayidx14, align 8
- %indvars.iv.next = add i64 %indvars.iv, 1
- %lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp eq i32 %lftr.wideiv, 10
- br i1 %exitcond, label %for.end, label %for.body
-; CHECK: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: fadd <2 x double>
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: shufflevector
-; CHECK-NEXT: fadd <2 x double>
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: fmul <2 x double>
-
-; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
-; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
-; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
-; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3
-; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2
-; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5
-; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6
-; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3
-; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2
-; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2
-; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10
-; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
-
-for.end: ; preds = %for.body
- ret void
-}
diff --git a/test/Transforms/BBVectorize/X86/pr15289.ll b/test/Transforms/BBVectorize/X86/pr15289.ll
deleted file mode 100644
index a383a260fafd..000000000000
--- a/test/Transforms/BBVectorize/X86/pr15289.ll
+++ /dev/null
@@ -1,95 +0,0 @@
-; RUN: opt < %s -basicaa -bb-vectorize -disable-output
-; This is a bugpoint-reduced test case. It did not always assert, but does reproduce the bug
-; and running under valgrind (or some similar tool) will catch the error.
-
-target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin12.2.0"
-
-%0 = type { [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }] }
-%1 = type { [10 x [8 x i8]] }
-%2 = type { i64, i64 }
-%3 = type { [10 x i64], i64, i64, i64, i64, i64 }
-%4 = type { i64, i64, i64, i64, i64, i64 }
-%5 = type { [10 x i64] }
-%6 = type { [10 x float], [10 x float], [10 x float], [10 x float] }
-%struct.__st_parameter_dt.1.3.5.7 = type { %struct.__st_parameter_common.0.2.4.6, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
-%struct.__st_parameter_common.0.2.4.6 = type { i32, i32, i8*, i32, i32, i8*, i32* }
-
-@cctenso_ = external unnamed_addr global %0, align 32
-@ctenso_ = external unnamed_addr global %1, align 32
-@i_dim_ = external unnamed_addr global %2, align 16
-@itenso1_ = external unnamed_addr global %3, align 32
-@itenso2_ = external unnamed_addr global %4, align 32
-@ltenso_ = external unnamed_addr global %5, align 32
-@rtenso_ = external unnamed_addr global %6, align 32
-@.cst = external unnamed_addr constant [8 x i8], align 8
-@.cst1 = external unnamed_addr constant [3 x i8], align 8
-@.cst2 = external unnamed_addr constant [29 x i8], align 8
-@.cst3 = external unnamed_addr constant [32 x i8], align 64
-
-define void @cart_to_dc2y_(double* noalias nocapture %xx, double* noalias nocapture %yy, double* noalias nocapture %zz, [5 x { double, double }]* noalias nocapture %c2ten) nounwind uwtable {
-entry:
- %0 = fmul double undef, undef
- %1 = fmul double undef, undef
- %2 = fadd double undef, undef
- %3 = fmul double undef, 0x3FE8B8B76E3E9919
- %4 = fsub double %0, %1
- %5 = fsub double -0.000000e+00, undef
- %6 = fmul double undef, undef
- %7 = fmul double %4, %6
- %8 = fmul double undef, 2.000000e+00
- %9 = fmul double %8, undef
- %10 = fmul double undef, %9
- %11 = fmul double %10, undef
- %12 = fsub double undef, %7
- %13 = fmul double %3, %12
- %14 = fmul double %3, undef
- %15 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 0
- store double %13, double* %15, align 8
- %16 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 1
- %17 = fmul double undef, %8
- %18 = fmul double %17, undef
- %19 = fmul double undef, %18
- %20 = fadd double undef, undef
- %21 = fmul double %3, %19
- %22 = fsub double -0.000000e+00, %21
- %23 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 0
- store double %22, double* %23, align 8
- %24 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 1
- %25 = fmul double undef, 0x3FE42F601A8C6794
- %26 = fmul double undef, 2.000000e+00
- %27 = fsub double %26, %0
- %28 = fmul double %6, undef
- %29 = fsub double undef, %28
- %30 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 0
- store double undef, double* %30, align 8
- %31 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 1
- %32 = fmul double undef, %17
- %33 = fmul double undef, %17
- %34 = fmul double undef, %32
- %35 = fmul double undef, %33
- %36 = fsub double undef, %35
- %37 = fmul double %3, %34
- %38 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 0
- store double %37, double* %38, align 8
- %39 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 1
- %40 = fmul double undef, %8
- %41 = fmul double undef, %40
- %42 = fmul double undef, %41
- %43 = fsub double undef, %42
- %44 = fmul double %3, %43
- %45 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 0
- store double %13, double* %45, align 8
- %46 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 1
- %47 = fsub double -0.000000e+00, %14
- store double %47, double* %16, align 8
- store double undef, double* %24, align 8
- store double -0.000000e+00, double* %31, align 8
- store double undef, double* %39, align 8
- store double undef, double* %46, align 8
- ret void
-}
-
-attributes #0 = { nounwind uwtable }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
diff --git a/test/Transforms/BBVectorize/X86/sh-rec.ll b/test/Transforms/BBVectorize/X86/sh-rec.ll
deleted file mode 100644
index 2cb9dbded224..000000000000
--- a/test/Transforms/BBVectorize/X86/sh-rec.ll
+++ /dev/null
@@ -1,54 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
-
-define void @ptoa() nounwind uwtable {
-entry:
- %call = call i8* @malloc() nounwind
- br i1 undef, label %return, label %if.end10
-
-if.end10: ; preds = %entry
- %incdec.ptr = getelementptr inbounds i8, i8* %call, i64 undef
- %call17 = call i32 @ptou() nounwind
- %incdec.ptr26.1 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -2
- store i8 undef, i8* %incdec.ptr26.1, align 1
- %div27.1 = udiv i32 %call17, 100
- %rem.2 = urem i32 %div27.1, 10
- %add2230.2 = or i32 %rem.2, 48
- %conv25.2 = trunc i32 %add2230.2 to i8
- %incdec.ptr26.2 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -3
- store i8 %conv25.2, i8* %incdec.ptr26.2, align 1
- %incdec.ptr26.3 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -4
- store i8 undef, i8* %incdec.ptr26.3, align 1
- %div27.3 = udiv i32 %call17, 10000
- %rem.4 = urem i32 %div27.3, 10
- %add2230.4 = or i32 %rem.4, 48
- %conv25.4 = trunc i32 %add2230.4 to i8
- %incdec.ptr26.4 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -5
- store i8 %conv25.4, i8* %incdec.ptr26.4, align 1
- %div27.4 = udiv i32 %call17, 100000
- %rem.5 = urem i32 %div27.4, 10
- %add2230.5 = or i32 %rem.5, 48
- %conv25.5 = trunc i32 %add2230.5 to i8
- %incdec.ptr26.5 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -6
- store i8 %conv25.5, i8* %incdec.ptr26.5, align 1
- %incdec.ptr26.6 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -7
- store i8 0, i8* %incdec.ptr26.6, align 1
- %incdec.ptr26.7 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -8
- store i8 undef, i8* %incdec.ptr26.7, align 1
- %div27.7 = udiv i32 %call17, 100000000
- %rem.8 = urem i32 %div27.7, 10
- %add2230.8 = or i32 %rem.8, 48
- %conv25.8 = trunc i32 %add2230.8 to i8
- %incdec.ptr26.8 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -9
- store i8 %conv25.8, i8* %incdec.ptr26.8, align 1
- unreachable
-
-return: ; preds = %entry
- ret void
-; CHECK-LABEL: @ptoa(
-}
-
-declare noalias i8* @malloc() nounwind
-
-declare i32 @ptou()
diff --git a/test/Transforms/BBVectorize/X86/sh-rec2.ll b/test/Transforms/BBVectorize/X86/sh-rec2.ll
deleted file mode 100644
index d7a004c21384..000000000000
--- a/test/Transforms/BBVectorize/X86/sh-rec2.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -basicaa -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
-
-%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352 = type { [280 x i16], i16, i64, i32, [8 x i16], [2 x [8 x i16]], i16, i16, [9 x i16], i16, i8, i8 }
-
-define void @gsm_encode(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352* %s, i16* %source, i8* %c) nounwind uwtable {
-entry:
- %xmc = alloca [52 x i16], align 16
- %arraydecay5 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 0
- call void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352* %s, i16* %source, i16* undef, i16* null, i16* undef, i16* undef, i16* undef, i16* %arraydecay5) nounwind
- %incdec.ptr136 = getelementptr inbounds i8, i8* %c, i64 10
- %incdec.ptr157 = getelementptr inbounds i8, i8* %c, i64 11
- store i8 0, i8* %incdec.ptr136, align 1
- %arrayidx162 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 11
- %0 = load i16, i16* %arrayidx162, align 2
- %conv1631 = trunc i16 %0 to i8
- %and164 = shl i8 %conv1631, 3
- %shl165 = and i8 %and164, 56
- %incdec.ptr172 = getelementptr inbounds i8, i8* %c, i64 12
- store i8 %shl165, i8* %incdec.ptr157, align 1
- %1 = load i16, i16* inttoptr (i64 2 to i16*), align 2
- %conv1742 = trunc i16 %1 to i8
- %and175 = shl i8 %conv1742, 1
- %incdec.ptr183 = getelementptr inbounds i8, i8* %c, i64 13
- store i8 %and175, i8* %incdec.ptr172, align 1
- %incdec.ptr199 = getelementptr inbounds i8, i8* %c, i64 14
- store i8 0, i8* %incdec.ptr183, align 1
- %arrayidx214 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 15
- %incdec.ptr220 = getelementptr inbounds i8, i8* %c, i64 15
- store i8 0, i8* %incdec.ptr199, align 1
- %2 = load i16, i16* %arrayidx214, align 2
- %conv2223 = trunc i16 %2 to i8
- %and223 = shl i8 %conv2223, 6
- %incdec.ptr235 = getelementptr inbounds i8, i8* %c, i64 16
- store i8 %and223, i8* %incdec.ptr220, align 1
- %arrayidx240 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 19
- %3 = load i16, i16* %arrayidx240, align 2
- %conv2414 = trunc i16 %3 to i8
- %and242 = shl i8 %conv2414, 2
- %shl243 = and i8 %and242, 28
- %incdec.ptr251 = getelementptr inbounds i8, i8* %c, i64 17
- store i8 %shl243, i8* %incdec.ptr235, align 1
- %incdec.ptr272 = getelementptr inbounds i8, i8* %c, i64 18
- store i8 0, i8* %incdec.ptr251, align 1
- %arrayidx282 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 25
- %4 = load i16, i16* %arrayidx282, align 2
- %conv2835 = trunc i16 %4 to i8
- %and284 = and i8 %conv2835, 7
- %incdec.ptr287 = getelementptr inbounds i8, i8* %c, i64 19
- store i8 %and284, i8* %incdec.ptr272, align 1
- %incdec.ptr298 = getelementptr inbounds i8, i8* %c, i64 20
- store i8 0, i8* %incdec.ptr287, align 1
- %incdec.ptr314 = getelementptr inbounds i8, i8* %c, i64 21
- store i8 0, i8* %incdec.ptr298, align 1
- %arrayidx319 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 26
- %5 = load i16, i16* %arrayidx319, align 4
- %conv3206 = trunc i16 %5 to i8
- %and321 = shl i8 %conv3206, 4
- %shl322 = and i8 %and321, 112
- %incdec.ptr335 = getelementptr inbounds i8, i8* %c, i64 22
- store i8 %shl322, i8* %incdec.ptr314, align 1
- %arrayidx340 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 29
- %6 = load i16, i16* %arrayidx340, align 2
- %conv3417 = trunc i16 %6 to i8
- %and342 = shl i8 %conv3417, 3
- %shl343 = and i8 %and342, 56
- %incdec.ptr350 = getelementptr inbounds i8, i8* %c, i64 23
- store i8 %shl343, i8* %incdec.ptr335, align 1
- %incdec.ptr366 = getelementptr inbounds i8, i8* %c, i64 24
- store i8 0, i8* %incdec.ptr350, align 1
- %arrayidx381 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 36
- %incdec.ptr387 = getelementptr inbounds i8, i8* %c, i64 25
- store i8 0, i8* %incdec.ptr366, align 1
- %7 = load i16, i16* %arrayidx381, align 8
- %conv3898 = trunc i16 %7 to i8
- %and390 = shl i8 %conv3898, 6
- store i8 %and390, i8* %incdec.ptr387, align 1
- unreachable
-; CHECK-LABEL: @gsm_encode(
-}
-
-declare void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
-
-declare void @llvm.trap() noreturn nounwind
diff --git a/test/Transforms/BBVectorize/X86/sh-rec3.ll b/test/Transforms/BBVectorize/X86/sh-rec3.ll
deleted file mode 100644
index 2096deb08a90..000000000000
--- a/test/Transforms/BBVectorize/X86/sh-rec3.ll
+++ /dev/null
@@ -1,170 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -basicaa -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
-
-%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565 = type { [280 x i16], i16, i64, i32, [8 x i16], [2 x [8 x i16]], i16, i16, [9 x i16], i16, i8, i8 }
-
-define void @gsm_encode(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565* %s, i16* %source, i8* %c) nounwind uwtable {
-entry:
- %LARc28 = alloca [2 x i64], align 16
- %LARc28.sub = getelementptr inbounds [2 x i64], [2 x i64]* %LARc28, i64 0, i64 0
- %tmpcast = bitcast [2 x i64]* %LARc28 to [8 x i16]*
- %Nc = alloca [4 x i16], align 2
- %Mc = alloca [4 x i16], align 2
- %bc = alloca [4 x i16], align 2
- %xmc = alloca [52 x i16], align 16
- %arraydecay = bitcast [2 x i64]* %LARc28 to i16*
- %arraydecay1 = getelementptr inbounds [4 x i16], [4 x i16]* %Nc, i64 0, i64 0
- %arraydecay2 = getelementptr inbounds [4 x i16], [4 x i16]* %bc, i64 0, i64 0
- %arraydecay3 = getelementptr inbounds [4 x i16], [4 x i16]* %Mc, i64 0, i64 0
- %arraydecay5 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 0
- call void @Gsm_Coder(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565* %s, i16* %source, i16* %arraydecay, i16* %arraydecay1, i16* %arraydecay2, i16* %arraydecay3, i16* undef, i16* %arraydecay5) nounwind
- %0 = load i64, i64* %LARc28.sub, align 16
- %1 = trunc i64 %0 to i32
- %conv1 = lshr i32 %1, 2
- %and = and i32 %conv1, 15
- %or = or i32 %and, 208
- %conv6 = trunc i32 %or to i8
- %incdec.ptr = getelementptr inbounds i8, i8* %c, i64 1
- store i8 %conv6, i8* %c, align 1
- %conv84 = trunc i64 %0 to i8
- %and9 = shl i8 %conv84, 6
- %incdec.ptr15 = getelementptr inbounds i8, i8* %c, i64 2
- store i8 %and9, i8* %incdec.ptr, align 1
- %2 = lshr i64 %0, 50
- %shr226.tr = trunc i64 %2 to i8
- %conv25 = and i8 %shr226.tr, 7
- %incdec.ptr26 = getelementptr inbounds i8, i8* %c, i64 3
- store i8 %conv25, i8* %incdec.ptr15, align 1
- %incdec.ptr42 = getelementptr inbounds i8, i8* %c, i64 4
- store i8 0, i8* %incdec.ptr26, align 1
- %arrayidx52 = getelementptr inbounds [8 x i16], [8 x i16]* %tmpcast, i64 0, i64 7
- %3 = load i16, i16* %arrayidx52, align 2
- %conv537 = trunc i16 %3 to i8
- %and54 = and i8 %conv537, 7
- %incdec.ptr57 = getelementptr inbounds i8, i8* %c, i64 5
- store i8 %and54, i8* %incdec.ptr42, align 1
- %incdec.ptr68 = getelementptr inbounds i8, i8* %c, i64 6
- store i8 0, i8* %incdec.ptr57, align 1
- %4 = load i16, i16* %arraydecay3, align 2
- %conv748 = trunc i16 %4 to i8
- %and75 = shl i8 %conv748, 5
- %shl76 = and i8 %and75, 96
- %incdec.ptr84 = getelementptr inbounds i8, i8* %c, i64 7
- store i8 %shl76, i8* %incdec.ptr68, align 1
- %arrayidx94 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 1
- %5 = load i16, i16* %arrayidx94, align 2
- %conv959 = trunc i16 %5 to i8
- %and96 = shl i8 %conv959, 1
- %shl97 = and i8 %and96, 14
- %or103 = or i8 %shl97, 1
- %incdec.ptr105 = getelementptr inbounds i8, i8* %c, i64 8
- store i8 %or103, i8* %incdec.ptr84, align 1
- %arrayidx115 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 4
- %6 = bitcast i16* %arrayidx115 to i32*
- %7 = load i32, i32* %6, align 8
- %conv11610 = trunc i32 %7 to i8
- %and117 = and i8 %conv11610, 7
- %incdec.ptr120 = getelementptr inbounds i8, i8* %c, i64 9
- store i8 %and117, i8* %incdec.ptr105, align 1
- %8 = lshr i32 %7, 16
- %and12330 = shl nuw nsw i32 %8, 5
- %and123 = trunc i32 %and12330 to i8
- %incdec.ptr136 = getelementptr inbounds i8, i8* %c, i64 10
- store i8 %and123, i8* %incdec.ptr120, align 1
- %incdec.ptr157 = getelementptr inbounds i8, i8* %c, i64 11
- store i8 0, i8* %incdec.ptr136, align 1
- %incdec.ptr172 = getelementptr inbounds i8, i8* %c, i64 12
- store i8 0, i8* %incdec.ptr157, align 1
- %arrayidx173 = getelementptr inbounds [4 x i16], [4 x i16]* %Nc, i64 0, i64 1
- %9 = load i16, i16* %arrayidx173, align 2
- %conv17412 = zext i16 %9 to i32
- %and175 = shl nuw nsw i32 %conv17412, 1
- %arrayidx177 = getelementptr inbounds [4 x i16], [4 x i16]* %bc, i64 0, i64 1
- %10 = load i16, i16* %arrayidx177, align 2
- %conv17826 = zext i16 %10 to i32
- %shr17913 = lshr i32 %conv17826, 1
- %and180 = and i32 %shr17913, 1
- %or181 = or i32 %and175, %and180
- %conv182 = trunc i32 %or181 to i8
- %incdec.ptr183 = getelementptr inbounds i8, i8* %c, i64 13
- store i8 %conv182, i8* %incdec.ptr172, align 1
- %arrayidx188 = getelementptr inbounds [4 x i16], [4 x i16]* %Mc, i64 0, i64 1
- %11 = load i16, i16* %arrayidx188, align 2
- %conv18914 = trunc i16 %11 to i8
- %and190 = shl i8 %conv18914, 5
- %shl191 = and i8 %and190, 96
- %incdec.ptr199 = getelementptr inbounds i8, i8* %c, i64 14
- store i8 %shl191, i8* %incdec.ptr183, align 1
- %arrayidx209 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 14
- %12 = load i16, i16* %arrayidx209, align 4
- %conv21015 = trunc i16 %12 to i8
- %and211 = shl i8 %conv21015, 1
- %shl212 = and i8 %and211, 14
- %or218 = or i8 %shl212, 1
- %incdec.ptr220 = getelementptr inbounds i8, i8* %c, i64 15
- store i8 %or218, i8* %incdec.ptr199, align 1
- %arrayidx225 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 16
- %13 = bitcast i16* %arrayidx225 to i64*
- %14 = load i64, i64* %13, align 16
- %conv22616 = trunc i64 %14 to i8
- %and227 = shl i8 %conv22616, 3
- %shl228 = and i8 %and227, 56
- %incdec.ptr235 = getelementptr inbounds i8, i8* %c, i64 16
- store i8 %shl228, i8* %incdec.ptr220, align 1
- %15 = lshr i64 %14, 32
- %and23832 = shl nuw nsw i64 %15, 5
- %and238 = trunc i64 %and23832 to i8
- %incdec.ptr251 = getelementptr inbounds i8, i8* %c, i64 17
- store i8 %and238, i8* %incdec.ptr235, align 1
- %arrayidx266 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 23
- %incdec.ptr272 = getelementptr inbounds i8, i8* %c, i64 18
- store i8 0, i8* %incdec.ptr251, align 1
- %16 = load i16, i16* %arrayidx266, align 2
- %conv27418 = trunc i16 %16 to i8
- %and275 = shl i8 %conv27418, 6
- %incdec.ptr287 = getelementptr inbounds i8, i8* %c, i64 19
- store i8 %and275, i8* %incdec.ptr272, align 1
- %arrayidx288 = getelementptr inbounds [4 x i16], [4 x i16]* %Nc, i64 0, i64 2
- %17 = load i16, i16* %arrayidx288, align 2
- %conv28919 = zext i16 %17 to i32
- %and290 = shl nuw nsw i32 %conv28919, 1
- %arrayidx292 = getelementptr inbounds [4 x i16], [4 x i16]* %bc, i64 0, i64 2
- %18 = load i16, i16* %arrayidx292, align 2
- %conv29327 = zext i16 %18 to i32
- %shr29420 = lshr i32 %conv29327, 1
- %and295 = and i32 %shr29420, 1
- %or296 = or i32 %and290, %and295
- %conv297 = trunc i32 %or296 to i8
- %incdec.ptr298 = getelementptr inbounds i8, i8* %c, i64 20
- store i8 %conv297, i8* %incdec.ptr287, align 1
- %conv30021 = trunc i16 %18 to i8
- %and301 = shl i8 %conv30021, 7
- %incdec.ptr314 = getelementptr inbounds i8, i8* %c, i64 21
- store i8 %and301, i8* %incdec.ptr298, align 1
- %incdec.ptr335 = getelementptr inbounds i8, i8* %c, i64 22
- store i8 0, i8* %incdec.ptr314, align 1
- %arrayidx340 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 29
- %19 = load i16, i16* %arrayidx340, align 2
- %conv34122 = trunc i16 %19 to i8
- %and342 = shl i8 %conv34122, 3
- %shl343 = and i8 %and342, 56
- %incdec.ptr350 = getelementptr inbounds i8, i8* %c, i64 23
- store i8 %shl343, i8* %incdec.ptr335, align 1
- %arrayidx355 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 32
- %20 = bitcast i16* %arrayidx355 to i32*
- %21 = load i32, i32* %20, align 16
- %conv35623 = shl i32 %21, 2
- %shl358 = and i32 %conv35623, 28
- %22 = lshr i32 %21, 17
- %and363 = and i32 %22, 3
- %or364 = or i32 %shl358, %and363
- %conv365 = trunc i32 %or364 to i8
- store i8 %conv365, i8* %incdec.ptr350, align 1
- unreachable
-; CHECK-LABEL: @gsm_encode(
-}
-
-declare void @Gsm_Coder(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
-
-declare void @llvm.trap() noreturn nounwind
diff --git a/test/Transforms/BBVectorize/X86/sh-types.ll b/test/Transforms/BBVectorize/X86/sh-types.ll
deleted file mode 100644
index fbff2fb86eb0..000000000000
--- a/test/Transforms/BBVectorize/X86/sh-types.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
-
-define <4 x float> @test7(<4 x float> %A1, <4 x float> %B1, double %C1, double %C2, double %D1, double %D2) {
- %A2 = shufflevector <4 x float> %A1, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
- %B2 = shufflevector <4 x float> %B1, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
- %X1 = shufflevector <4 x float> %A2, <4 x float> undef, <2 x i32> <i32 0, i32 1>
- %X2 = shufflevector <4 x float> %B2, <4 x float> undef, <2 x i32> <i32 2, i32 3>
- %Y1 = shufflevector <2 x float> %X1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
- %Y2 = shufflevector <2 x float> %X2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-
- %M1 = fsub double %C1, %D1
- %M2 = fsub double %C2, %D2
- %N1 = fmul double %M1, %C1
- %N2 = fmul double %M2, %C2
- %Z1 = fadd double %N1, %D1
- %Z2 = fadd double %N2, %D2
-
- %R = fmul <4 x float> %Y1, %Y2
- ret <4 x float> %R
-; CHECK-LABEL: @test7(
-; CHECK-NOT: <8 x float>
-; CHECK: ret <4 x float>
-}
-
diff --git a/test/Transforms/BBVectorize/X86/simple-int.ll b/test/Transforms/BBVectorize/X86/simple-int.ll
deleted file mode 100644
index 7842ec85b6c8..000000000000
--- a/test/Transforms/BBVectorize/X86/simple-int.ll
+++ /dev/null
@@ -1,79 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-
-declare double @llvm.fma.f64(double, double, double)
-declare double @llvm.fmuladd.f64(double, double, double)
-declare double @llvm.cos.f64(double)
-declare double @llvm.powi.f64(double, i32)
-
-; Basic depth-3 chain with fma
-define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
- %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test1(
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain with fmuladd
-define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
- %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test1a(
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain with cos
-define double @test2(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.cos.f64(double %X1)
- %Y2 = call double @llvm.cos.f64(double %X2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test2(
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain with powi
-define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
- %Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test3(
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain with powi (different powers: should not vectorize)
-define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %P2 = add i32 %P, 1
- %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
- %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test4(
-; CHECK: ret double %R
-}
-
diff --git a/test/Transforms/BBVectorize/X86/simple-ldstr.ll b/test/Transforms/BBVectorize/X86/simple-ldstr.ll
deleted file mode 100644
index 2c05f30d0818..000000000000
--- a/test/Transforms/BBVectorize/X86/simple-ldstr.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-
-; Simple 3-pair chain with loads and stores
-define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
-entry:
- %i0 = load double, double* %a, align 8
- %i1 = load double, double* %b, align 8
- %mul = fmul double %i0, %i1
- %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
- %i3 = load double, double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
- %i4 = load double, double* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4
- store double %mul, double* %c, align 8
- %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
- store double %mul5, double* %arrayidx5, align 8
- ret void
-; CHECK-LABEL: @test1(
-; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
-; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
-; CHECK: %mul = fmul <2 x double> %i0, %i1
-; CHECK: %0 = bitcast double* %c to <2 x double>*
-; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
-; CHECK: ret void
-}
-
diff --git a/test/Transforms/BBVectorize/X86/simple.ll b/test/Transforms/BBVectorize/X86/simple.ll
deleted file mode 100644
index a11e3090f205..000000000000
--- a/test/Transforms/BBVectorize/X86/simple.ll
+++ /dev/null
@@ -1,120 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-
-; Basic depth-3 chain
-define double @test1(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test1(
-; CHECK: fsub <2 x double>
-; CHECK: fmul <2 x double>
-; CHECK: fadd <2 x double>
-; CHECK: extract
-; CHECK: extract
-; CHECK: ret double %R
-}
-
-; Basic chain
-define double @test1a(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %W1 = fadd double %Y1, %Z1
- %W2 = fadd double %Y2, %Z2
- %V1 = fadd double %W1, %Z1
- %V2 = fadd double %W2, %Z2
- %Q1 = fadd double %W1, %V1
- %Q2 = fadd double %W2, %V2
- %S1 = fadd double %W1, %Q1
- %S2 = fadd double %W2, %Q2
- %R = fmul double %S1, %S2
- ret double %R
-; CHECK-LABEL: @test1a(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %W1 = fadd <2 x double> %Y1, %Z1
-; CHECK: %V1 = fadd <2 x double> %W1, %Z1
-; CHECK: %Q1 = fadd <2 x double> %W1, %V1
-; CHECK: %S1 = fadd <2 x double> %W1, %Q1
-; CHECK: %S1.v.r1 = extractelement <2 x double> %S1, i32 0
-; CHECK: %S1.v.r2 = extractelement <2 x double> %S1, i32 1
-; CHECK: %R = fmul double %S1.v.r1, %S1.v.r2
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain (last pair permuted)
-define double @test2(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
- %Z1 = fadd double %Y2, %B1
- %Z2 = fadd double %Y1, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test2(
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: fsub <2 x double>
-; CHECK: fmul <2 x double>
-; CHECK: ret double %R
-}
-
-; Basic depth-4 chain (internal permutation)
-define double @test4(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
- %Z1 = fadd double %Y2, %B1
- %Z2 = fadd double %Y1, %B2
- %W1 = fadd double %Y2, %Z1
- %W2 = fadd double %Y1, %Z2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test4(
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: fsub <2 x double>
-; CHECK: fmul <2 x double>
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: fadd <2 x double>
-; CHECK: ret double %R
-}
-
-; Basic chain with shuffles
-define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
- %X1 = sub <8 x i8> %A1, %B1
- %X2 = sub <8 x i8> %A2, %B2
- %Y1 = mul <8 x i8> %X1, %A1
- %Y2 = mul <8 x i8> %X2, %A2
- %Z1 = add <8 x i8> %Y1, %B1
- %Z2 = add <8 x i8> %Y2, %B2
- %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
- %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
- %R = mul <8 x i8> %Q1, %Q2
- ret <8 x i8> %R
-; CHECK-LABEL: @test6(
-; CHECK-NOT: sub <16 x i8>
-; CHECK: ret <8 x i8>
-}
-
diff --git a/test/Transforms/BBVectorize/X86/vs-cast.ll b/test/Transforms/BBVectorize/X86/vs-cast.ll
deleted file mode 100644
index 0c666b11976c..000000000000
--- a/test/Transforms/BBVectorize/X86/vs-cast.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
-
-define void @main() nounwind uwtable {
-entry:
- %0 = bitcast <2 x i64> undef to i128
- %1 = bitcast <2 x i64> undef to i128
- ret void
-; CHECK-LABEL: @main(
-}
-
diff --git a/test/Transforms/BBVectorize/X86/wr-aliases.ll b/test/Transforms/BBVectorize/X86/wr-aliases.ll
deleted file mode 100644
index e34414988f32..000000000000
--- a/test/Transforms/BBVectorize/X86/wr-aliases.ll
+++ /dev/null
@@ -1,144 +0,0 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -disable-basicaa -bb-vectorize -S < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-%class.QBezier.15 = type { double, double, double, double, double, double, double, double }
-
-; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
-
-; Function Attrs: uwtable
-declare fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval nocapture readonly align 8) #1
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture) #0
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
-
-define void @main_arrayctor.cont([10 x %class.QBezier.15]* %beziers, %class.QBezier.15* %agg.tmp.i, %class.QBezier.15* %agg.tmp55.i, %class.QBezier.15* %agg.tmp56.i) {
-newFuncRoot:
- br label %arrayctor.cont
-
-arrayctor.cont.ret.exitStub: ; preds = %arrayctor.cont
- ret void
-
-; CHECK-LABEL: @main_arrayctor.cont
-; CHECK: <2 x double>
-; CHECK: @_ZL12printQBezier7QBezier
-; CHECK: store double %mul8.i, double* %x3.i, align 16
-; CHECK: load double, double* %x3.i, align 16
-; CHECK: ret
-
-arrayctor.cont: ; preds = %newFuncRoot
- %ref.tmp.sroa.0.0.idx = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
- store double 1.000000e+01, double* %ref.tmp.sroa.0.0.idx, align 16
- %ref.tmp.sroa.2.0.idx1 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
- store double 2.000000e+01, double* %ref.tmp.sroa.2.0.idx1, align 8
- %ref.tmp.sroa.3.0.idx2 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
- store double 3.000000e+01, double* %ref.tmp.sroa.3.0.idx2, align 16
- %ref.tmp.sroa.4.0.idx3 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
- store double 4.000000e+01, double* %ref.tmp.sroa.4.0.idx3, align 8
- %ref.tmp.sroa.5.0.idx4 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
- store double 5.000000e+01, double* %ref.tmp.sroa.5.0.idx4, align 16
- %ref.tmp.sroa.6.0.idx5 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
- store double 6.000000e+01, double* %ref.tmp.sroa.6.0.idx5, align 8
- %ref.tmp.sroa.7.0.idx6 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
- store double 7.000000e+01, double* %ref.tmp.sroa.7.0.idx6, align 16
- %ref.tmp.sroa.8.0.idx7 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
- store double 8.000000e+01, double* %ref.tmp.sroa.8.0.idx7, align 8
- %add.ptr = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1
- %v0 = bitcast %class.QBezier.15* %agg.tmp.i to i8*
- call void @llvm.lifetime.start(i64 64, i8* %v0)
- %v1 = bitcast %class.QBezier.15* %agg.tmp55.i to i8*
- call void @llvm.lifetime.start(i64 64, i8* %v1)
- %v2 = bitcast %class.QBezier.15* %agg.tmp56.i to i8*
- call void @llvm.lifetime.start(i64 64, i8* %v2)
- %v3 = bitcast [10 x %class.QBezier.15]* %beziers to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v0, i8* %v3, i64 64, i32 8, i1 false)
- call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp.i)
- %x2.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
- %v4 = load double, double* %x2.i, align 16
- %x3.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
- %v5 = load double, double* %x3.i, align 16
- %add.i = fadd double %v4, %v5
- %mul.i = fmul double 5.000000e-01, %add.i
- %x1.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
- %v6 = load double, double* %x1.i, align 16
- %add3.i = fadd double %v4, %v6
- %mul4.i = fmul double 5.000000e-01, %add3.i
- %x25.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 2
- store double %mul4.i, double* %x25.i, align 16
- %v7 = load double, double* %x3.i, align 16
- %x4.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
- %v8 = load double, double* %x4.i, align 16
- %add7.i = fadd double %v7, %v8
- %mul8.i = fmul double 5.000000e-01, %add7.i
- store double %mul8.i, double* %x3.i, align 16
- %v9 = load double, double* %x1.i, align 16
- %x111.i = getelementptr inbounds %class.QBezier.15, %class.QBezier.15* %add.ptr, i64 0, i32 0
- store double %v9, double* %x111.i, align 16
- %v10 = load double, double* %x25.i, align 16
- %add15.i = fadd double %mul.i, %v10
- %mul16.i = fmul double 5.000000e-01, %add15.i
- %x317.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 4
- store double %mul16.i, double* %x317.i, align 16
- %v11 = load double, double* %x3.i, align 16
- %add19.i = fadd double %mul.i, %v11
- %mul20.i = fmul double 5.000000e-01, %add19.i
- store double %mul20.i, double* %x2.i, align 16
- %v12 = load double, double* %x317.i, align 16
- %add24.i = fadd double %v12, %mul20.i
- %mul25.i = fmul double 5.000000e-01, %add24.i
- store double %mul25.i, double* %x1.i, align 16
- %x427.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 6
- store double %mul25.i, double* %x427.i, align 16
- %y2.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
- %v13 = load double, double* %y2.i, align 8
- %y3.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
- %v14 = load double, double* %y3.i, align 8
- %add28.i = fadd double %v13, %v14
- %div.i = fmul double 5.000000e-01, %add28.i
- %y1.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
- %v15 = load double, double* %y1.i, align 8
- %add30.i = fadd double %v13, %v15
- %mul31.i = fmul double 5.000000e-01, %add30.i
- %y232.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 3
- store double %mul31.i, double* %y232.i, align 8
- %v16 = load double, double* %y3.i, align 8
- %y4.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
- %v17 = load double, double* %y4.i, align 8
- %add34.i = fadd double %v16, %v17
- %mul35.i = fmul double 5.000000e-01, %add34.i
- store double %mul35.i, double* %y3.i, align 8
- %v18 = load double, double* %y1.i, align 8
- %y138.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 1
- store double %v18, double* %y138.i, align 8
- %v19 = load double, double* %y232.i, align 8
- %add42.i = fadd double %div.i, %v19
- %mul43.i = fmul double 5.000000e-01, %add42.i
- %y344.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 5
- store double %mul43.i, double* %y344.i, align 8
- %v20 = load double, double* %y3.i, align 8
- %add46.i = fadd double %div.i, %v20
- %mul47.i = fmul double 5.000000e-01, %add46.i
- store double %mul47.i, double* %y2.i, align 8
- %v21 = load double, double* %y344.i, align 8
- %add51.i = fadd double %v21, %mul47.i
- %mul52.i = fmul double 5.000000e-01, %add51.i
- store double %mul52.i, double* %y1.i, align 8
- %y454.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 7
- store double %mul52.i, double* %y454.i, align 8
- %v22 = bitcast %class.QBezier.15* %add.ptr to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v1, i8* %v22, i64 64, i32 8, i1 false)
- call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp55.i)
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v2, i8* %v3, i64 64, i32 8, i1 false)
- call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp56.i)
- call void @llvm.lifetime.end.p0i8(i64 64, i8* %v0)
- call void @llvm.lifetime.end.p0i8(i64 64, i8* %v1)
- call void @llvm.lifetime.end.p0i8(i64 64, i8* %v2)
- br label %arrayctor.cont.ret.exitStub
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/BBVectorize/cycle.ll b/test/Transforms/BBVectorize/cycle.ll
deleted file mode 100644
index 6bfa625ea5f0..000000000000
--- a/test/Transforms/BBVectorize/cycle.ll
+++ /dev/null
@@ -1,112 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-
-; This test checks the non-trivial pairing-induced cycle avoidance. Without this cycle avoidance, the algorithm would otherwise
-; want to select the pairs:
-; %div77 = fdiv double %sub74, %mul76.v.r1 <-> %div125 = fdiv double %mul121, %mul76.v.r2 (div125 depends on mul117)
-; %add84 = fadd double %sub83, 2.000000e+00 <-> %add127 = fadd double %mul126, 1.000000e+00 (add127 depends on div77)
-; %mul95 = fmul double %sub45.v.r1, %sub36.v.r1 <-> %mul88 = fmul double %sub36.v.r1, %sub87 (mul88 depends on add84)
-; %mul117 = fmul double %sub39.v.r1, %sub116 <-> %mul97 = fmul double %mul96, %sub39.v.r1 (mul97 depends on mul95)
-; and so a dependency cycle would be created.
-
-declare double @fabs(double) nounwind readnone
-define void @test1(double %a, double %b, double %c, double %add80, double %mul1, double %mul2.v.r1, double %mul73, double %sub, double %sub65, double %F.0, i32 %n.0, double %Bnm3.0, double %Bnm2.0, double %Bnm1.0, double %Anm3.0, double %Anm2.0, double %Anm1.0) {
-entry:
- br label %go
-go:
- %conv = sitofp i32 %n.0 to double
- %add35 = fadd double %conv, %a
- %sub36 = fadd double %add35, -1.000000e+00
- %add38 = fadd double %conv, %b
- %sub39 = fadd double %add38, -1.000000e+00
- %add41 = fadd double %conv, %c
- %sub42 = fadd double %add41, -1.000000e+00
- %sub45 = fadd double %add35, -2.000000e+00
- %sub48 = fadd double %add38, -2.000000e+00
- %sub51 = fadd double %add41, -2.000000e+00
- %mul52 = shl nsw i32 %n.0, 1
- %sub53 = add nsw i32 %mul52, -1
- %conv54 = sitofp i32 %sub53 to double
- %sub56 = add nsw i32 %mul52, -3
- %conv57 = sitofp i32 %sub56 to double
- %sub59 = add nsw i32 %mul52, -5
- %conv60 = sitofp i32 %sub59 to double
- %mul61 = mul nsw i32 %n.0, %n.0
- %conv62 = sitofp i32 %mul61 to double
- %mul63 = fmul double %conv62, 3.000000e+00
- %mul67 = fmul double %sub65, %conv
- %add68 = fadd double %mul63, %mul67
- %add69 = fadd double %add68, 2.000000e+00
- %sub71 = fsub double %add69, %mul2.v.r1
- %sub74 = fsub double %sub71, %mul73
- %mul75 = fmul double %conv57, 2.000000e+00
- %mul76 = fmul double %mul75, %sub42
- %div77 = fdiv double %sub74, %mul76
- %mul82 = fmul double %add80, %conv
- %sub83 = fsub double %mul63, %mul82
- %add84 = fadd double %sub83, 2.000000e+00
- %sub86 = fsub double %add84, %mul2.v.r1
- %sub87 = fsub double -0.000000e+00, %sub86
- %mul88 = fmul double %sub36, %sub87
- %mul89 = fmul double %mul88, %sub39
- %mul90 = fmul double %conv54, 4.000000e+00
- %mul91 = fmul double %mul90, %conv57
- %mul92 = fmul double %mul91, %sub51
- %mul93 = fmul double %mul92, %sub42
- %div94 = fdiv double %mul89, %mul93
- %mul95 = fmul double %sub45, %sub36
- %mul96 = fmul double %mul95, %sub48
- %mul97 = fmul double %mul96, %sub39
- %sub99 = fsub double %conv, %a
- %sub100 = fadd double %sub99, -2.000000e+00
- %mul101 = fmul double %mul97, %sub100
- %sub103 = fsub double %conv, %b
- %sub104 = fadd double %sub103, -2.000000e+00
- %mul105 = fmul double %mul101, %sub104
- %mul106 = fmul double %conv57, 8.000000e+00
- %mul107 = fmul double %mul106, %conv57
- %mul108 = fmul double %mul107, %conv60
- %sub111 = fadd double %add41, -3.000000e+00
- %mul112 = fmul double %mul108, %sub111
- %mul113 = fmul double %mul112, %sub51
- %mul114 = fmul double %mul113, %sub42
- %div115 = fdiv double %mul105, %mul114
- %sub116 = fsub double -0.000000e+00, %sub36
- %mul117 = fmul double %sub39, %sub116
- %sub119 = fsub double %conv, %c
- %sub120 = fadd double %sub119, -1.000000e+00
- %mul121 = fmul double %mul117, %sub120
- %mul123 = fmul double %mul75, %sub51
- %mul124 = fmul double %mul123, %sub42
- %div125 = fdiv double %mul121, %mul124
- %mul126 = fmul double %div77, %sub
- %add127 = fadd double %mul126, 1.000000e+00
- %mul128 = fmul double %add127, %Anm1.0
- %mul129 = fmul double %div94, %sub
- %add130 = fadd double %div125, %mul129
- %mul131 = fmul double %add130, %sub
- %mul132 = fmul double %mul131, %Anm2.0
- %add133 = fadd double %mul128, %mul132
- %mul134 = fmul double %div115, %mul1
- %mul135 = fmul double %mul134, %Anm3.0
- %add136 = fadd double %add133, %mul135
- %mul139 = fmul double %add127, %Bnm1.0
- %mul143 = fmul double %mul131, %Bnm2.0
- %add144 = fadd double %mul139, %mul143
- %mul146 = fmul double %mul134, %Bnm3.0
- %add147 = fadd double %add144, %mul146
- %div148 = fdiv double %add136, %add147
- %sub149 = fsub double %F.0, %div148
- %div150 = fdiv double %sub149, %F.0
- %call = tail call double @fabs(double %div150) nounwind readnone
- %cmp = fcmp olt double %call, 0x3CB0000000000000
- %cmp152 = icmp sgt i32 %n.0, 20000
- %or.cond = or i1 %cmp, %cmp152
- br i1 %or.cond, label %done, label %go
-done:
- ret void
-; CHECK-LABEL: @test1(
-; CHECK: go:
-; CHECK: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0
-; FIXME: When tree pruning is deterministic, include the entire output.
-}
diff --git a/test/Transforms/BBVectorize/func-alias.ll b/test/Transforms/BBVectorize/func-alias.ll
deleted file mode 100644
index ab72ec0e1991..000000000000
--- a/test/Transforms/BBVectorize/func-alias.ll
+++ /dev/null
@@ -1,244 +0,0 @@
-target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -basicaa -bb-vectorize -bb-vectorize-req-chain-depth=2 -instcombine -gvn -S | FileCheck %s
-; The chain length is set to 2 so that this will do some vectorization; check that the order of the functions is unchanged.
-
-%struct.descriptor_dimension = type { i64, i64, i64 }
-%struct.__st_parameter_common = type { i32, i32, i8*, i32, i32, i8*, i32* }
-%struct.__st_parameter_dt = type { %struct.__st_parameter_common, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
-%"struct.array4_real(kind=4)" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
-%"struct.array4_integer(kind=4).73" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
-%struct.array4_unknown = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
-
-@.cst4 = external unnamed_addr constant [11 x i8], align 8
-@.cst823 = external unnamed_addr constant [214 x i8], align 64
-@j.4580 = external global i32
-@j1.4581 = external global i32
-@nty1.4590 = external global [2 x i8]
-@nty2.4591 = external global [2 x i8]
-@xr1.4592 = external global float
-@xr2.4593 = external global float
-@yr1.4594 = external global float
-@yr2.4595 = external global float
-
-@__main1_MOD_iave = external unnamed_addr global i32
-@__main1_MOD_igrp = external global i32
-@__main1_MOD_iounit = external global i32
-@__main1_MOD_ityp = external global i32
-@__main1_MOD_mclmsg = external unnamed_addr global %struct.array4_unknown, align 32
-@__main1_MOD_mxdate = external unnamed_addr global %"struct.array4_integer(kind=4).73", align 32
-@__main1_MOD_rmxval = external unnamed_addr global %"struct.array4_real(kind=4)", align 32
-
-declare void @_gfortran_st_write(%struct.__st_parameter_dt*)
-declare void @_gfortran_st_write_done(%struct.__st_parameter_dt*)
-declare void @_gfortran_transfer_character_write(%struct.__st_parameter_dt*, i8*, i32)
-declare void @_gfortran_transfer_integer_write(%struct.__st_parameter_dt*, i8*, i32)
-declare void @_gfortran_transfer_real_write(%struct.__st_parameter_dt*, i8*, i32)
-
-define i1 @"prtmax__<bb 3>_<bb 34>"(%struct.__st_parameter_dt* %memtmp3, i32 %D.4627_188.reload) nounwind {
-; CHECK: prtmax__
-newFuncRoot:
- br label %"<bb 34>"
-
-codeRepl80.exitStub: ; preds = %"<bb 34>"
- ret i1 true
-
-"<bb 34>.<bb 25>_crit_edge.exitStub": ; preds = %"<bb 34>"
- ret i1 false
-
-"<bb 34>": ; preds = %newFuncRoot
- %tmp128 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
- %tmp129 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp128, i32 0, i32 2
- store i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.cst4, i64 0, i64 0), i8** %tmp129, align 8
- %tmp130 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
- %tmp131 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp130, i32 0, i32 3
- store i32 31495, i32* %tmp131, align 4
- %tmp132 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 5
- store i8* getelementptr inbounds ([214 x i8], [214 x i8]* @.cst823, i64 0, i64 0), i8** %tmp132, align 8
- %tmp133 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 6
- store i32 214, i32* %tmp133, align 4
- %tmp134 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
- %tmp135 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp134, i32 0, i32 0
- store i32 4096, i32* %tmp135, align 4
- %iounit.8748_288 = load i32, i32* @__main1_MOD_iounit, align 4
- %tmp136 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
- %tmp137 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp136, i32 0, i32 1
- store i32 %iounit.8748_288, i32* %tmp137, align 4
- call void @_gfortran_st_write(%struct.__st_parameter_dt* %memtmp3) nounwind
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j.4580, i32 4) nounwind
-; CHECK: @_gfortran_transfer_integer_write
- %D.75807_289 = load i8*, i8** getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
- %j.8758_290 = load i32, i32* @j.4580, align 4
- %D.75760_291 = sext i32 %j.8758_290 to i64
- %iave.8736_292 = load i32, i32* @__main1_MOD_iave, align 4
- %D.75620_293 = sext i32 %iave.8736_292 to i64
- %D.75808_294 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
- %D.75809_295 = mul nsw i64 %D.75620_293, %D.75808_294
- %igrp.8737_296 = load i32, i32* @__main1_MOD_igrp, align 4
- %D.75635_297 = sext i32 %igrp.8737_296 to i64
- %D.75810_298 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
- %D.75811_299 = mul nsw i64 %D.75635_297, %D.75810_298
- %D.75812_300 = add nsw i64 %D.75809_295, %D.75811_299
- %D.75813_301 = add nsw i64 %D.75760_291, %D.75812_300
- %ityp.8750_302 = load i32, i32* @__main1_MOD_ityp, align 4
- %D.75704_303 = sext i32 %ityp.8750_302 to i64
- %D.75814_304 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
- %D.75815_305 = mul nsw i64 %D.75704_303, %D.75814_304
- %D.75816_306 = add nsw i64 %D.75813_301, %D.75815_305
- %D.75817_307 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
- %D.75818_308 = add nsw i64 %D.75816_306, %D.75817_307
- %tmp138 = bitcast i8* %D.75807_289 to [0 x float]*
- %tmp139 = bitcast [0 x float]* %tmp138 to float*
- %D.75819_309 = getelementptr inbounds float, float* %tmp139, i64 %D.75818_308
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75819_309, i32 4) nounwind
-; CHECK: @_gfortran_transfer_real_write
- %D.75820_310 = load i8*, i8** getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
- %j.8758_311 = load i32, i32* @j.4580, align 4
- %D.75760_312 = sext i32 %j.8758_311 to i64
- %iave.8736_313 = load i32, i32* @__main1_MOD_iave, align 4
- %D.75620_314 = sext i32 %iave.8736_313 to i64
- %D.75821_315 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
- %D.75822_316 = mul nsw i64 %D.75620_314, %D.75821_315
- %igrp.8737_317 = load i32, i32* @__main1_MOD_igrp, align 4
- %D.75635_318 = sext i32 %igrp.8737_317 to i64
- %D.75823_319 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
- %D.75824_320 = mul nsw i64 %D.75635_318, %D.75823_319
- %D.75825_321 = add nsw i64 %D.75822_316, %D.75824_320
- %D.75826_322 = add nsw i64 %D.75760_312, %D.75825_321
- %ityp.8750_323 = load i32, i32* @__main1_MOD_ityp, align 4
- %D.75704_324 = sext i32 %ityp.8750_323 to i64
- %D.75827_325 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
- %D.75828_326 = mul nsw i64 %D.75704_324, %D.75827_325
- %D.75829_327 = add nsw i64 %D.75826_322, %D.75828_326
- %D.75830_328 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
- %D.75831_329 = add nsw i64 %D.75829_327, %D.75830_328
- %tmp140 = bitcast i8* %D.75820_310 to [0 x [1 x i8]]*
- %tmp141 = bitcast [0 x [1 x i8]]* %tmp140 to [1 x i8]*
- %D.75832_330 = getelementptr inbounds [1 x i8], [1 x i8]* %tmp141, i64 %D.75831_329
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75832_330, i32 1) nounwind
-; CHECK: @_gfortran_transfer_character_write
- %D.75833_331 = load i8*, i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
- %j.8758_332 = load i32, i32* @j.4580, align 4
- %D.75760_333 = sext i32 %j.8758_332 to i64
- %iave.8736_334 = load i32, i32* @__main1_MOD_iave, align 4
- %D.75620_335 = sext i32 %iave.8736_334 to i64
- %D.75834_336 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
- %D.75835_337 = mul nsw i64 %D.75620_335, %D.75834_336
- %igrp.8737_338 = load i32, i32* @__main1_MOD_igrp, align 4
- %D.75635_339 = sext i32 %igrp.8737_338 to i64
- %D.75836_340 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
- %D.75837_341 = mul nsw i64 %D.75635_339, %D.75836_340
- %D.75838_342 = add nsw i64 %D.75835_337, %D.75837_341
- %D.75839_343 = add nsw i64 %D.75760_333, %D.75838_342
- %ityp.8750_344 = load i32, i32* @__main1_MOD_ityp, align 4
- %D.75704_345 = sext i32 %ityp.8750_344 to i64
- %D.75840_346 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
- %D.75841_347 = mul nsw i64 %D.75704_345, %D.75840_346
- %D.75842_348 = add nsw i64 %D.75839_343, %D.75841_347
- %D.75843_349 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
- %D.75844_350 = add nsw i64 %D.75842_348, %D.75843_349
- %tmp142 = bitcast i8* %D.75833_331 to [0 x i32]*
- %tmp143 = bitcast [0 x i32]* %tmp142 to i32*
- %D.75845_351 = getelementptr inbounds i32, i32* %tmp143, i64 %D.75844_350
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75845_351, i32 4) nounwind
-; CHECK: @_gfortran_transfer_integer_write
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr1.4592, i32 4) nounwind
-; CHECK: @_gfortran_transfer_real_write
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr1.4594, i32 4) nounwind
-; CHECK: @_gfortran_transfer_real_write
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty1.4590, i32 2) nounwind
-; CHECK: @_gfortran_transfer_character_write
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j1.4581, i32 4) nounwind
-; CHECK: @_gfortran_transfer_integer_write
- %D.75807_352 = load i8*, i8** getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
- %j1.8760_353 = load i32, i32* @j1.4581, align 4
- %D.75773_354 = sext i32 %j1.8760_353 to i64
- %iave.8736_355 = load i32, i32* @__main1_MOD_iave, align 4
- %D.75620_356 = sext i32 %iave.8736_355 to i64
- %D.75808_357 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
- %D.75809_358 = mul nsw i64 %D.75620_356, %D.75808_357
- %igrp.8737_359 = load i32, i32* @__main1_MOD_igrp, align 4
- %D.75635_360 = sext i32 %igrp.8737_359 to i64
- %D.75810_361 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
- %D.75811_362 = mul nsw i64 %D.75635_360, %D.75810_361
- %D.75812_363 = add nsw i64 %D.75809_358, %D.75811_362
- %D.75846_364 = add nsw i64 %D.75773_354, %D.75812_363
- %ityp.8750_365 = load i32, i32* @__main1_MOD_ityp, align 4
- %D.75704_366 = sext i32 %ityp.8750_365 to i64
- %D.75814_367 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
- %D.75815_368 = mul nsw i64 %D.75704_366, %D.75814_367
- %D.75847_369 = add nsw i64 %D.75846_364, %D.75815_368
- %D.75817_370 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
- %D.75848_371 = add nsw i64 %D.75847_369, %D.75817_370
- %tmp144 = bitcast i8* %D.75807_352 to [0 x float]*
- %tmp145 = bitcast [0 x float]* %tmp144 to float*
- %D.75849_372 = getelementptr inbounds float, float* %tmp145, i64 %D.75848_371
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75849_372, i32 4) nounwind
-; CHECK: @_gfortran_transfer_real_write
- %D.75820_373 = load i8*, i8** getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
- %j1.8760_374 = load i32, i32* @j1.4581, align 4
- %D.75773_375 = sext i32 %j1.8760_374 to i64
- %iave.8736_376 = load i32, i32* @__main1_MOD_iave, align 4
- %D.75620_377 = sext i32 %iave.8736_376 to i64
- %D.75821_378 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
- %D.75822_379 = mul nsw i64 %D.75620_377, %D.75821_378
- %igrp.8737_380 = load i32, i32* @__main1_MOD_igrp, align 4
- %D.75635_381 = sext i32 %igrp.8737_380 to i64
- %D.75823_382 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
- %D.75824_383 = mul nsw i64 %D.75635_381, %D.75823_382
- %D.75825_384 = add nsw i64 %D.75822_379, %D.75824_383
- %D.75850_385 = add nsw i64 %D.75773_375, %D.75825_384
- %ityp.8750_386 = load i32, i32* @__main1_MOD_ityp, align 4
- %D.75704_387 = sext i32 %ityp.8750_386 to i64
- %D.75827_388 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
- %D.75828_389 = mul nsw i64 %D.75704_387, %D.75827_388
- %D.75851_390 = add nsw i64 %D.75850_385, %D.75828_389
- %D.75830_391 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
- %D.75852_392 = add nsw i64 %D.75851_390, %D.75830_391
- %tmp146 = bitcast i8* %D.75820_373 to [0 x [1 x i8]]*
- %tmp147 = bitcast [0 x [1 x i8]]* %tmp146 to [1 x i8]*
- %D.75853_393 = getelementptr inbounds [1 x i8], [1 x i8]* %tmp147, i64 %D.75852_392
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75853_393, i32 1) nounwind
-; CHECK: @_gfortran_transfer_character_write
- %D.75833_394 = load i8*, i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
- %j1.8760_395 = load i32, i32* @j1.4581, align 4
- %D.75773_396 = sext i32 %j1.8760_395 to i64
- %iave.8736_397 = load i32, i32* @__main1_MOD_iave, align 4
- %D.75620_398 = sext i32 %iave.8736_397 to i64
- %D.75834_399 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
- %D.75835_400 = mul nsw i64 %D.75620_398, %D.75834_399
- %igrp.8737_401 = load i32, i32* @__main1_MOD_igrp, align 4
- %D.75635_402 = sext i32 %igrp.8737_401 to i64
- %D.75836_403 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
- %D.75837_404 = mul nsw i64 %D.75635_402, %D.75836_403
- %D.75838_405 = add nsw i64 %D.75835_400, %D.75837_404
- %D.75854_406 = add nsw i64 %D.75773_396, %D.75838_405
- %ityp.8750_407 = load i32, i32* @__main1_MOD_ityp, align 4
- %D.75704_408 = sext i32 %ityp.8750_407 to i64
- %D.75840_409 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
- %D.75841_410 = mul nsw i64 %D.75704_408, %D.75840_409
- %D.75855_411 = add nsw i64 %D.75854_406, %D.75841_410
- %D.75843_412 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
- %D.75856_413 = add nsw i64 %D.75855_411, %D.75843_412
- %tmp148 = bitcast i8* %D.75833_394 to [0 x i32]*
- %tmp149 = bitcast [0 x i32]* %tmp148 to i32*
- %D.75857_414 = getelementptr inbounds i32, i32* %tmp149, i64 %D.75856_413
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75857_414, i32 4) nounwind
-; CHECK: @_gfortran_transfer_integer_write
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr2.4593, i32 4) nounwind
-; CHECK: @_gfortran_transfer_real_write
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr2.4595, i32 4) nounwind
-; CHECK: @_gfortran_transfer_real_write
- call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty2.4591, i32 2) nounwind
-; CHECK: @_gfortran_transfer_character_write
- call void @_gfortran_st_write_done(%struct.__st_parameter_dt* %memtmp3) nounwind
-; CHECK: @_gfortran_st_write_done
- %j.8758_415 = load i32, i32* @j.4580, align 4
- %D.4634_416 = icmp eq i32 %j.8758_415, %D.4627_188.reload
- %j.8758_417 = load i32, i32* @j.4580, align 4
- %j.8770_418 = add nsw i32 %j.8758_417, 1
- store i32 %j.8770_418, i32* @j.4580, align 4
- %tmp150 = icmp ne i1 %D.4634_416, false
- br i1 %tmp150, label %codeRepl80.exitStub, label %"<bb 34>.<bb 25>_crit_edge.exitStub"
-}
-
diff --git a/test/Transforms/BBVectorize/ld1.ll b/test/Transforms/BBVectorize/ld1.ll
deleted file mode 100644
index 368c38aa5ce7..000000000000
--- a/test/Transforms/BBVectorize/ld1.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-
-define double @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
-entry:
- %i0 = load double, double* %a, align 8
- %i1 = load double, double* %b, align 8
- %mul = fmul double %i0, %i1
- %i2 = load double, double* %c, align 8
- %add = fadd double %mul, %i2
- %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
- %i3 = load double, double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
- %i4 = load double, double* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4
- %arrayidx6 = getelementptr inbounds double, double* %c, i64 1
- %i5 = load double, double* %arrayidx6, align 8
- %add7 = fadd double %mul5, %i5
- %mul9 = fmul double %add, %i1
- %add11 = fadd double %mul9, %i2
- %mul13 = fmul double %add7, %i4
- %add15 = fadd double %mul13, %i5
- %mul16 = fmul double %add11, %add15
- ret double %mul16
-; CHECK-LABEL: @test1(
-; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
-; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i2.v.i0 = bitcast double* %c to <2 x double>*
-; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
-; CHECK: %mul = fmul <2 x double> %i0, %i1
-; CHECK: %i2 = load <2 x double>, <2 x double>* %i2.v.i0, align 8
-; CHECK: %add = fadd <2 x double> %mul, %i2
-; CHECK: %mul9 = fmul <2 x double> %add, %i1
-; CHECK: %add11 = fadd <2 x double> %mul9, %i2
-; CHECK: %add11.v.r1 = extractelement <2 x double> %add11, i32 0
-; CHECK: %add11.v.r2 = extractelement <2 x double> %add11, i32 1
-; CHECK: %mul16 = fmul double %add11.v.r1, %add11.v.r2
-; CHECK: ret double %mul16
-}
-
diff --git a/test/Transforms/BBVectorize/lit.local.cfg b/test/Transforms/BBVectorize/lit.local.cfg
deleted file mode 100644
index e71f3cc4c41e..000000000000
--- a/test/Transforms/BBVectorize/lit.local.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-if not 'X86' in config.root.targets:
- config.unsupported = True
-
diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll
deleted file mode 100644
index 8ff5953cf46a..000000000000
--- a/test/Transforms/BBVectorize/loop1.ll
+++ /dev/null
@@ -1,93 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -dont-improve-non-negative-phi-bits=false -basicaa -loop-unroll -unroll-threshold=45 -unroll-partial-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
-; The second check covers the use of alias analysis (with loop unrolling).
-
-define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
-entry:
- br label %for.body
-; CHECK-LABEL: @test1(
-; CHECK-UNRL-LABEL: @test1(
-
-for.body: ; preds = %for.body, %entry
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
- %0 = load double, double* %arrayidx, align 8
- %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
- %1 = load double, double* %arrayidx2, align 8
- %mul = fmul double %0, %0
- %mul3 = fmul double %0, %1
- %add = fadd double %mul, %mul3
- %add4 = fadd double %1, %1
- %add5 = fadd double %add4, %0
- %mul6 = fmul double %0, %add5
- %add7 = fadd double %add, %mul6
- %mul8 = fmul double %1, %1
- %add9 = fadd double %0, %0
- %add10 = fadd double %add9, %0
- %mul11 = fmul double %mul8, %add10
- %add12 = fadd double %add7, %mul11
- %arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
- store double %add12, double* %arrayidx14, align 8
- %indvars.iv.next = add i64 %indvars.iv, 1
- %lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp eq i32 %lftr.wideiv, 10
- br i1 %exitcond, label %for.end, label %for.body
-; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-; CHECK: %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
-; CHECK: %0 = load double, double* %arrayidx, align 8
-; CHECK: %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
-; CHECK: %1 = load double, double* %arrayidx2, align 8
-; CHECK: %mul = fmul double %0, %0
-; CHECK: %mul3 = fmul double %0, %1
-; CHECK: %add = fadd double %mul, %mul3
-; CHECK: %mul8 = fmul double %1, %1
-; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0
-; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1
-; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2
-; CHECK: %2 = insertelement <2 x double> undef, double %0, i32 0
-; CHECK: %add5.v.i1.2 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK: %add5 = fadd <2 x double> %add4, %add5.v.i1.2
-; CHECK: %mul6.v.i0.2 = insertelement <2 x double> %2, double %mul8, i32 1
-; CHECK: %mul6 = fmul <2 x double> %mul6.v.i0.2, %add5
-; CHECK: %mul6.v.r1 = extractelement <2 x double> %mul6, i32 0
-; CHECK: %mul6.v.r2 = extractelement <2 x double> %mul6, i32 1
-; CHECK: %add7 = fadd double %add, %mul6.v.r1
-; CHECK: %add12 = fadd double %add7, %mul6.v.r2
-; CHECK: %arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
-; CHECK: store double %add12, double* %arrayidx14, align 8
-; CHECK: %indvars.iv.next = add i64 %indvars.iv, 1
-; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, 10
-; CHECK: br i1 %exitcond, label %for.end, label %for.body
-; CHECK-UNRL: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
-; CHECK-UNRL: %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
-; CHECK-UNRL: %0 = bitcast double* %arrayidx to <2 x double>*
-; CHECK-UNRL: %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
-; CHECK-UNRL: %1 = bitcast double* %arrayidx2 to <2 x double>*
-; CHECK-UNRL: %arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
-; CHECK-UNRL: %2 = load <2 x double>, <2 x double>* %0, align 8
-; CHECK-UNRL: %3 = load <2 x double>, <2 x double>* %1, align 8
-; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
-; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
-; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
-; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3
-; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2
-; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5
-; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6
-; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3
-; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2
-; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2
-; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10
-; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
-; CHECK-UNRL: %4 = bitcast double* %arrayidx14 to <2 x double>*
-; CHECK-UNRL: store <2 x double> %add12, <2 x double>* %4, align 8
-; CHECK-UNRL: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2
-; CHECK-UNRL: %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32
-; CHECK-UNRL: %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 10
-; CHECK-UNRL: br i1 %exitcond.1, label %for.end, label %for.body
-
-for.end: ; preds = %for.body
- ret void
-}
diff --git a/test/Transforms/BBVectorize/mem-op-depth.ll b/test/Transforms/BBVectorize/mem-op-depth.ll
deleted file mode 100644
index 732043b7f8ec..000000000000
--- a/test/Transforms/BBVectorize/mem-op-depth.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=6 -instcombine -gvn -S | FileCheck %s
-
-@A = common global [1024 x float] zeroinitializer, align 16
-@B = common global [1024 x float] zeroinitializer, align 16
-
-define i32 @test1() nounwind {
-; CHECK-LABEL: @test1(
- %V1 = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 0), align 16
- %V2 = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 1), align 4
- %V3= load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 2), align 8
- %V4 = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 3), align 4
-; CHECK: %V1 = load <4 x float>, <4 x float>* bitcast ([1024 x float]* @A to <4 x float>*), align 16
- store float %V1, float* getelementptr inbounds ([1024 x float], [1024 x float]* @B, i64 0, i64 0), align 16
- store float %V2, float* getelementptr inbounds ([1024 x float], [1024 x float]* @B, i64 0, i64 1), align 4
- store float %V3, float* getelementptr inbounds ([1024 x float], [1024 x float]* @B, i64 0, i64 2), align 8
- store float %V4, float* getelementptr inbounds ([1024 x float], [1024 x float]* @B, i64 0, i64 3), align 4
-; CHECK-NEXT: store <4 x float> %V1, <4 x float>* bitcast ([1024 x float]* @B to <4 x float>*), align 16
- ret i32 0
-; CHECK-NEXT: ret i32 0
-}
diff --git a/test/Transforms/BBVectorize/metadata.ll b/test/Transforms/BBVectorize/metadata.ll
deleted file mode 100644
index f5580a888616..000000000000
--- a/test/Transforms/BBVectorize/metadata.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -S | FileCheck %s
-
-; Simple 3-pair chain with loads and stores (with fpmath)
-define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
-entry:
- %i0 = load double, double* %a, align 8
- %i1 = load double, double* %b, align 8
- %mul = fmul double %i0, %i1, !fpmath !2
- %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
- %i3 = load double, double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
- %i4 = load double, double* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4, !fpmath !3
- store double %mul, double* %c, align 8
- %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
- store double %mul5, double* %arrayidx5, align 8
- ret void
-; CHECK-LABEL: @test1(
-; CHECK: !fpmath
-; CHECK: ret void
-}
-
-; Simple 3-pair chain with loads and stores (ints with range)
-define void @test2(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
-entry:
- %i0 = load i64, i64* %a, align 8, !range !0
- %i1 = load i64, i64* %b, align 8
- %mul = mul i64 %i0, %i1
- %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
- %i3 = load i64, i64* %arrayidx3, align 8, !range !1
- %arrayidx4 = getelementptr inbounds i64, i64* %b, i64 1
- %i4 = load i64, i64* %arrayidx4, align 8
- %mul5 = mul i64 %i3, %i4
- store i64 %mul, i64* %c, align 8
- %arrayidx5 = getelementptr inbounds i64, i64* %c, i64 1
- store i64 %mul5, i64* %arrayidx5, align 8
- ret void
-; CHECK-LABEL: @test2(
-; CHECK-NOT: !range
-; CHECK: ret void
-}
-
-!0 = !{i64 0, i64 2}
-!1 = !{i64 3, i64 5}
-
-!2 = !{ float 5.0 }
-!3 = !{ float 2.5 }
-
diff --git a/test/Transforms/BBVectorize/no-ldstr-conn.ll b/test/Transforms/BBVectorize/no-ldstr-conn.ll
deleted file mode 100644
index a84cd6585602..000000000000
--- a/test/Transforms/BBVectorize/no-ldstr-conn.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=2 -instcombine -gvn -S | FileCheck %s
-
-; Make sure that things (specifically getelementptr) are not connected to loads
-; and stores via the address operand (which would be bad because the address
-; is really a scalar even after vectorization)
-define i64 @test2(i64 %a) nounwind uwtable readonly {
-entry:
- %a1 = inttoptr i64 %a to i64*
- %a2 = getelementptr i64, i64* %a1, i64 1
- %a3 = getelementptr i64, i64* %a1, i64 2
- %v2 = load i64, i64* %a2, align 8
- %v3 = load i64, i64* %a3, align 8
- %v2a = add i64 %v2, 5
- %v3a = add i64 %v3, 7
- store i64 %v2a, i64* %a2, align 8
- store i64 %v3a, i64* %a3, align 8
- %r = add i64 %v2, %v3
- ret i64 %r
-; CHECK-LABEL: @test2(
-; CHECK-NOT: getelementptr i64, <2 x i64*>
-}
-
diff --git a/test/Transforms/BBVectorize/req-depth.ll b/test/Transforms/BBVectorize/req-depth.ll
deleted file mode 100644
index 2675354183a6..000000000000
--- a/test/Transforms/BBVectorize/req-depth.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -bb-vectorize-ignore-target-info -S | FileCheck %s -check-prefix=CHECK-RD3
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -bb-vectorize-ignore-target-info -S | FileCheck %s -check-prefix=CHECK-RD2
-
-define double @test1(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
- %R = fmul double %Y1, %Y2
- ret double %R
-; CHECK-RD3-LABEL: @test1(
-; CHECK-RD2-LABEL: @test1(
-; CHECK-RD3-NOT: <2 x double>
-; CHECK-RD2: <2 x double>
-}
-
diff --git a/test/Transforms/BBVectorize/search-limit.ll b/test/Transforms/BBVectorize/search-limit.ll
deleted file mode 100644
index be38d3402603..000000000000
--- a/test/Transforms/BBVectorize/search-limit.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
-
-define double @test1(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test1(
-; CHECK-SL4-LABEL: @test1(
-; CHECK-SL4-NOT: <2 x double>
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
-; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
- %Z1 = fadd double %Y1, %B1
- ; Here we have a dependency chain: the short search limit will not
- ; see past this chain and so will not see the second part of the
- ; pair to vectorize.
- %mul41 = fmul double %Z1, %Y2
- %sub48 = fsub double %Z1, %mul41
- %mul62 = fmul double %Z1, %sub48
- %sub69 = fsub double %Z1, %mul62
- %mul83 = fmul double %Z1, %sub69
- %sub90 = fsub double %Z1, %mul83
- %mul104 = fmul double %Z1, %sub90
- %sub111 = fsub double %Z1, %mul104
- %mul125 = fmul double %Z1, %sub111
- %sub132 = fsub double %Z1, %mul125
- %mul146 = fmul double %Z1, %sub132
- %sub153 = fsub double %Z1, %mul146
- ; end of chain.
- %Z2 = fadd double %Y2, %B2
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
- %R1 = fdiv double %Z1, %Z2
- %R = fmul double %R1, %sub153
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R1 = fdiv double %Z1.v.r1, %Z1.v.r2
- ret double %R
-; CHECK: ret double %R
-}
-
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
deleted file mode 100644
index b7f87fe1db0e..000000000000
--- a/test/Transforms/BBVectorize/simple-int.ll
+++ /dev/null
@@ -1,506 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-
-declare double @llvm.fma.f64(double, double, double)
-declare double @llvm.fmuladd.f64(double, double, double)
-declare double @llvm.cos.f64(double)
-declare double @llvm.powi.f64(double, i32)
-declare double @llvm.round.f64(double)
-declare double @llvm.copysign.f64(double, double)
-declare double @llvm.ceil.f64(double)
-declare double @llvm.nearbyint.f64(double)
-declare double @llvm.rint.f64(double)
-declare double @llvm.trunc.f64(double)
-declare double @llvm.floor.f64(double)
-declare double @llvm.fabs.f64(double)
-declare i64 @llvm.bswap.i64(i64)
-declare i64 @llvm.ctpop.i64(i64)
-declare i64 @llvm.ctlz.i64(i64, i1)
-declare i64 @llvm.cttz.i64(i64, i1)
-
-; Basic depth-3 chain with fma
-define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
- %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test1(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
-; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
-; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain with fmuladd
-define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
- %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test1a(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
-; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
-; CHECK: %Y1 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain with cos
-define double @test2(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.cos.f64(double %X1)
- %Y2 = call double @llvm.cos.f64(double %X2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test2(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1)
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain with powi
-define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
-
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
- %Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test3(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P)
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain with powi (different powers: should not vectorize)
-define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
-
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %P2 = add i32 %P, 1
- %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
- %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK-LABEL: @test4(
-; CHECK-NOT: <2 x double>
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain with round
-define double @testround(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.round.f64(double %X1)
- %Y2 = call double @llvm.round.f64(double %X2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK: @testround
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x double> @llvm.round.v2f64(<2 x double> %X1)
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: ret double %R
-
-}
-
-; Basic depth-3 chain with copysign
-define double @testcopysign(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.copysign.f64(double %X1, double %A1)
- %Y2 = call double @llvm.copysign.f64(double %X2, double %A1)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK: @testcopysign
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1.v.i1.2 = shufflevector <2 x double> %X1.v.i0.1, <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK: %Y1 = call <2 x double> @llvm.copysign.v2f64(<2 x double> %X1, <2 x double> %Y1.v.i1.2)
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: ret double %R
-
-}
-
-; Basic depth-3 chain with ceil
-define double @testceil(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.ceil.f64(double %X1)
- %Y2 = call double @llvm.ceil.f64(double %X2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK: @testceil
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %X1)
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: ret double %R
-
-}
-
-; Basic depth-3 chain with nearbyint
-define double @testnearbyint(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.nearbyint.f64(double %X1)
- %Y2 = call double @llvm.nearbyint.f64(double %X2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK: @testnearbyint
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %X1)
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: ret double %R
-
-}
-
-; Basic depth-3 chain with rint
-define double @testrint(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.rint.f64(double %X1)
- %Y2 = call double @llvm.rint.f64(double %X2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK: @testrint
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x double> @llvm.rint.v2f64(<2 x double> %X1)
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: ret double %R
-
-}
-
-; Basic depth-3 chain with trunc
-define double @testtrunc(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.trunc.f64(double %X1)
- %Y2 = call double @llvm.trunc.f64(double %X2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK: @testtrunc
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %X1)
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: ret double %R
-
-}
-
-; Basic depth-3 chain with floor
-define double @testfloor(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.floor.f64(double %X1)
- %Y2 = call double @llvm.floor.f64(double %X2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK: @testfloor
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %X1)
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: ret double %R
-
-}
-
-; Basic depth-3 chain with fabs
-define double @testfabs(double %A1, double %A2, double %B1, double %B2) {
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = call double @llvm.fabs.f64(double %X1)
- %Y2 = call double @llvm.fabs.f64(double %X2)
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-; CHECK: @testfabs
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %X1)
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: ret double %R
-
-}
-
-; Basic depth-3 chain with bswap
-define i64 @testbswap(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
- %X1 = sub i64 %A1, %B1
- %X2 = sub i64 %A2, %B2
- %Y1 = call i64 @llvm.bswap.i64(i64 %X1)
- %Y2 = call i64 @llvm.bswap.i64(i64 %X2)
- %Z1 = add i64 %Y1, %B1
- %Z2 = add i64 %Y2, %B2
- %R = mul i64 %Z1, %Z2
- ret i64 %R
-
-; CHECK: @testbswap
-; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
-; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %X1)
-; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
-; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
-; CHECK: ret i64 %R
-
-}
-
-; Basic depth-3 chain with ctpop
-define i64 @testctpop(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
- %X1 = sub i64 %A1, %B1
- %X2 = sub i64 %A2, %B2
- %Y1 = call i64 @llvm.ctpop.i64(i64 %X1)
- %Y2 = call i64 @llvm.ctpop.i64(i64 %X2)
- %Z1 = add i64 %Y1, %B1
- %Z2 = add i64 %Y2, %B2
- %R = mul i64 %Z1, %Z2
- ret i64 %R
-
-; CHECK: @testctpop
-; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
-; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %X1)
-; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
-; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
-; CHECK: ret i64 %R
-
-}
-
-; Basic depth-3 chain with ctlz
-define i64 @testctlz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
- %X1 = sub i64 %A1, %B1
- %X2 = sub i64 %A2, %B2
- %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
- %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 true)
- %Z1 = add i64 %Y1, %B1
- %Z2 = add i64 %Y2, %B2
- %R = mul i64 %Z1, %Z2
- ret i64 %R
-
-; CHECK: @testctlz
-; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
-; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %X1, i1 true)
-; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
-; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
-; CHECK: ret i64 %R
-
-}
-
-; Basic depth-3 chain with ctlz
-define i64 @testctlzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
- %X1 = sub i64 %A1, %B1
- %X2 = sub i64 %A2, %B2
- %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
- %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false)
- %Z1 = add i64 %Y1, %B1
- %Z2 = add i64 %Y2, %B2
- %R = mul i64 %Z1, %Z2
- ret i64 %R
-
-; CHECK: @testctlzneg
-; CHECK: %X1 = sub i64 %A1, %B1
-; CHECK: %X2 = sub i64 %A2, %B2
-; CHECK: %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
-; CHECK: %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false)
-; CHECK: %Z1 = add i64 %Y1, %B1
-; CHECK: %Z2 = add i64 %Y2, %B2
-; CHECK: %R = mul i64 %Z1, %Z2
-; CHECK: ret i64 %R
-}
-
-; Basic depth-3 chain with cttz
-define i64 @testcttz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
- %X1 = sub i64 %A1, %B1
- %X2 = sub i64 %A2, %B2
- %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
- %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 true)
- %Z1 = add i64 %Y1, %B1
- %Z2 = add i64 %Y2, %B2
- %R = mul i64 %Z1, %Z2
- ret i64 %R
-
-; CHECK: @testcttz
-; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
-; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %X1, i1 true)
-; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
-; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
-; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
-; CHECK: ret i64 %R
-
-}
-
-; Basic depth-3 chain with cttz
-define i64 @testcttzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
- %X1 = sub i64 %A1, %B1
- %X2 = sub i64 %A2, %B2
- %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
- %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false)
- %Z1 = add i64 %Y1, %B1
- %Z2 = add i64 %Y2, %B2
- %R = mul i64 %Z1, %Z2
- ret i64 %R
-
-; CHECK: @testcttzneg
-; CHECK: %X1 = sub i64 %A1, %B1
-; CHECK: %X2 = sub i64 %A2, %B2
-; CHECK: %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
-; CHECK: %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false)
-; CHECK: %Z1 = add i64 %Y1, %B1
-; CHECK: %Z2 = add i64 %Y2, %B2
-; CHECK: %R = mul i64 %Z1, %Z2
-; CHECK: ret i64 %R
-}
-
-
-
-; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
-; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
-; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #0
-; CHECK: declare <2 x double> @llvm.round.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) #0
-; CHECK: declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0
-; CHECK: declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #0
-; CHECK: declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) #0
-; CHECK: declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0
-; CHECK: declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0
-; CHECK: attributes #0 = { nounwind readnone speculatable }
diff --git a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
deleted file mode 100644
index fcc0236bae9d..000000000000
--- a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
+++ /dev/null
@@ -1,134 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
-
-; FIXME: re-enable this once pointer vectors work properly
-; XFAIL: *
-
-; Simple 3-pair chain also with loads and stores (using ptrs and gep)
-define double @test1(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
-entry:
- %i0 = load i64, i64* %a, align 8
- %i1 = load i64, i64* %b, align 8
- %mul = mul i64 %i0, %i1
- %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
- %i3 = load i64, i64* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds i64, i64* %b, i64 1
- %i4 = load i64, i64* %arrayidx4, align 8
- %mul5 = mul i64 %i3, %i4
- %ptr = inttoptr i64 %mul to double*
- %ptr5 = inttoptr i64 %mul5 to double*
- %aptr = getelementptr inbounds double, double* %ptr, i64 2
- %aptr5 = getelementptr inbounds double, double* %ptr5, i64 3
- %av = load double, double* %aptr, align 16
- %av5 = load double, double* %aptr5, align 16
- %r = fmul double %av, %av5
- store i64 %mul, i64* %c, align 8
- %arrayidx5 = getelementptr inbounds i64, i64* %c, i64 1
- store i64 %mul5, i64* %arrayidx5, align 8
- ret double %r
-; CHECK-LABEL: @test1(
-; CHECK: %i0.v.i0 = bitcast i64* %a to <2 x i64>*
-; CHECK: %i1.v.i0 = bitcast i64* %b to <2 x i64>*
-; CHECK: %i0 = load <2 x i64>, <2 x i64>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x i64>, <2 x i64>* %i1.v.i0, align 8
-; CHECK: %mul = mul <2 x i64> %i0, %i1
-; CHECK: %ptr = inttoptr <2 x i64> %mul to <2 x double*>
-; CHECK: %aptr = getelementptr inbounds double, <2 x double*> %ptr, <2 x i64> <i64 2, i64 3>
-; CHECK: %aptr.v.r1 = extractelement <2 x double*> %aptr, i32 0
-; CHECK: %aptr.v.r2 = extractelement <2 x double*> %aptr, i32 1
-; CHECK: %av = load double, double* %aptr.v.r1, align 16
-; CHECK: %av5 = load double, double* %aptr.v.r2, align 16
-; CHECK: %r = fmul double %av, %av5
-; CHECK: %0 = bitcast i64* %c to <2 x i64>*
-; CHECK: store <2 x i64> %mul, <2 x i64>* %0, align 8
-; CHECK: ret double %r
-; CHECK-AO-LABEL: @test1(
-; CHECK-AO-NOT: load <2 x
-}
-
-; Simple 3-pair chain with loads and stores (using ptrs and gep)
-define void @test2(i64** %a, i64** %b, i64** %c) nounwind uwtable readonly {
-entry:
- %i0 = load i64*, i64** %a, align 8
- %i1 = load i64*, i64** %b, align 8
- %arrayidx3 = getelementptr inbounds i64*, i64** %a, i64 1
- %i3 = load i64*, i64** %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds i64*, i64** %b, i64 1
- %i4 = load i64*, i64** %arrayidx4, align 8
- %o1 = load i64, i64* %i1, align 8
- %o4 = load i64, i64* %i4, align 8
- %ptr0 = getelementptr inbounds i64, i64* %i0, i64 %o1
- %ptr3 = getelementptr inbounds i64, i64* %i3, i64 %o4
- store i64* %ptr0, i64** %c, align 8
- %arrayidx5 = getelementptr inbounds i64*, i64** %c, i64 1
- store i64* %ptr3, i64** %arrayidx5, align 8
- ret void
-; CHECK-LABEL: @test2(
-; CHECK: %i0.v.i0 = bitcast i64** %a to <2 x i64*>*
-; CHECK: %i1 = load i64*, i64** %b, align 8
-; CHECK: %i0 = load <2 x i64*>, <2 x i64*>* %i0.v.i0, align 8
-; CHECK: %arrayidx4 = getelementptr inbounds i64*, i64** %b, i64 1
-; CHECK: %i4 = load i64*, i64** %arrayidx4, align 8
-; CHECK: %o1 = load i64, i64* %i1, align 8
-; CHECK: %o4 = load i64, i64* %i4, align 8
-; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0
-; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1
-; CHECK: %ptr0 = getelementptr inbounds i64, <2 x i64*> %i0, <2 x i64> %ptr0.v.i1.2
-; CHECK: %0 = bitcast i64** %c to <2 x i64*>*
-; CHECK: store <2 x i64*> %ptr0, <2 x i64*>* %0, align 8
-; CHECK: ret void
-; CHECK-AO-LABEL: @test2(
-; CHECK-AO-NOT: <2 x
-}
-
-; Simple 3-pair chain with loads and stores (using ptrs and gep)
-; using pointer vectors.
-define void @test3(<2 x i64*>* %a, <2 x i64*>* %b, <2 x i64*>* %c) nounwind uwtable readonly {
-entry:
- %i0 = load <2 x i64*>, <2 x i64*>* %a, align 8
- %i1 = load <2 x i64*>, <2 x i64*>* %b, align 8
- %arrayidx3 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %a, i64 1
- %i3 = load <2 x i64*>, <2 x i64*>* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %b, i64 1
- %i4 = load <2 x i64*>, <2 x i64*>* %arrayidx4, align 8
- %j1 = extractelement <2 x i64*> %i1, i32 0
- %j4 = extractelement <2 x i64*> %i4, i32 0
- %o1 = load i64, i64* %j1, align 8
- %o4 = load i64, i64* %j4, align 8
- %j0 = extractelement <2 x i64*> %i0, i32 0
- %j3 = extractelement <2 x i64*> %i3, i32 0
- %ptr0 = getelementptr inbounds i64, i64* %j0, i64 %o1
- %ptr3 = getelementptr inbounds i64, i64* %j3, i64 %o4
- %qtr0 = insertelement <2 x i64*> undef, i64* %ptr0, i32 0
- %rtr0 = insertelement <2 x i64*> %qtr0, i64* %ptr0, i32 1
- %qtr3 = insertelement <2 x i64*> undef, i64* %ptr3, i32 0
- %rtr3 = insertelement <2 x i64*> %qtr3, i64* %ptr3, i32 1
- store <2 x i64*> %rtr0, <2 x i64*>* %c, align 8
- %arrayidx5 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %c, i64 1
- store <2 x i64*> %rtr3, <2 x i64*>* %arrayidx5, align 8
- ret void
-; CHECK-LABEL: @test3(
-; CHECK: %i0.v.i0 = bitcast <2 x i64*>* %a to <4 x i64*>*
-; CHECK: %i1 = load <2 x i64*>, <2 x i64*>* %b, align 8
-; CHECK: %i0 = load <4 x i64*>, <4 x i64*>* %i0.v.i0, align 8
-; CHECK: %arrayidx4 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %b, i64 1
-; CHECK: %i4 = load <2 x i64*>, <2 x i64*>* %arrayidx4, align 8
-; CHECK: %j1 = extractelement <2 x i64*> %i1, i32 0
-; CHECK: %j4 = extractelement <2 x i64*> %i4, i32 0
-; CHECK: %o1 = load i64, i64* %j1, align 8
-; CHECK: %o4 = load i64, i64* %j4, align 8
-; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0
-; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1
-; CHECK: %ptr0.v.i0 = shufflevector <4 x i64*> %i0, <4 x i64*> undef, <2 x i32> <i32 0, i32 2>
-; CHECK: %ptr0 = getelementptr inbounds i64, <2 x i64*> %ptr0.v.i0, <2 x i64> %ptr0.v.i1.2
-; CHECK: %rtr0 = shufflevector <2 x i64*> %ptr0, <2 x i64*> undef, <2 x i32> zeroinitializer
-; CHECK: %rtr3 = shufflevector <2 x i64*> %ptr0, <2 x i64*> undef, <2 x i32> <i32 1, i32 1>
-; CHECK: %0 = bitcast <2 x i64*>* %c to <4 x i64*>*
-; CHECK: %1 = shufflevector <2 x i64*> %rtr0, <2 x i64*> %rtr3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK: store <4 x i64*> %1, <4 x i64*>* %0, align 8
-; CHECK: ret void
-; CHECK-AO-LABEL: @test3(
-; CHECK-AO-NOT: <4 x
-}
-
diff --git a/test/Transforms/BBVectorize/simple-ldstr.ll b/test/Transforms/BBVectorize/simple-ldstr.ll
deleted file mode 100644
index 56c1a06b42ea..000000000000
--- a/test/Transforms/BBVectorize/simple-ldstr.ll
+++ /dev/null
@@ -1,170 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
-
-; Simple 3-pair chain with loads and stores
-define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
-entry:
- %i0 = load double, double* %a, align 8
- %i1 = load double, double* %b, align 8
- %mul = fmul double %i0, %i1
- %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
- %i3 = load double, double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
- %i4 = load double, double* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4
- store double %mul, double* %c, align 8
- %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
- store double %mul5, double* %arrayidx5, align 8
- ret void
-; CHECK-LABEL: @test1(
-; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
-; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
-; CHECK: %mul = fmul <2 x double> %i0, %i1
-; CHECK: %0 = bitcast double* %c to <2 x double>*
-; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
-; CHECK: ret void
-; CHECK-AO-LABEL: @test1(
-; CHECK-AO-NOT: <2 x double>
-}
-
-; Simple chain with extending loads and stores
-define void @test2(float* %a, float* %b, double* %c) nounwind uwtable readonly {
-entry:
- %i0f = load float, float* %a, align 4
- %i0 = fpext float %i0f to double
- %i1f = load float, float* %b, align 4
- %i1 = fpext float %i1f to double
- %mul = fmul double %i0, %i1
- %arrayidx3 = getelementptr inbounds float, float* %a, i64 1
- %i3f = load float, float* %arrayidx3, align 4
- %i3 = fpext float %i3f to double
- %arrayidx4 = getelementptr inbounds float, float* %b, i64 1
- %i4f = load float, float* %arrayidx4, align 4
- %i4 = fpext float %i4f to double
- %mul5 = fmul double %i3, %i4
- store double %mul, double* %c, align 8
- %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
- store double %mul5, double* %arrayidx5, align 8
- ret void
-; CHECK-LABEL: @test2(
-; CHECK: %i0f.v.i0 = bitcast float* %a to <2 x float>*
-; CHECK: %i1f.v.i0 = bitcast float* %b to <2 x float>*
-; CHECK: %i0f = load <2 x float>, <2 x float>* %i0f.v.i0, align 4
-; CHECK: %i0 = fpext <2 x float> %i0f to <2 x double>
-; CHECK: %i1f = load <2 x float>, <2 x float>* %i1f.v.i0, align 4
-; CHECK: %i1 = fpext <2 x float> %i1f to <2 x double>
-; CHECK: %mul = fmul <2 x double> %i0, %i1
-; CHECK: %0 = bitcast double* %c to <2 x double>*
-; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
-; CHECK: ret void
-; CHECK-AO-LABEL: @test2(
-; CHECK-AO-NOT: <2 x double>
-}
-
-; Simple chain with loads and truncating stores
-define void @test3(double* %a, double* %b, float* %c) nounwind uwtable readonly {
-entry:
- %i0 = load double, double* %a, align 8
- %i1 = load double, double* %b, align 8
- %mul = fmul double %i0, %i1
- %mulf = fptrunc double %mul to float
- %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
- %i3 = load double, double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
- %i4 = load double, double* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4
- %mul5f = fptrunc double %mul5 to float
- store float %mulf, float* %c, align 8
- %arrayidx5 = getelementptr inbounds float, float* %c, i64 1
- store float %mul5f, float* %arrayidx5, align 4
- ret void
-; CHECK-LABEL: @test3(
-; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
-; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
-; CHECK: %mul = fmul <2 x double> %i0, %i1
-; CHECK: %mulf = fptrunc <2 x double> %mul to <2 x float>
-; CHECK: %0 = bitcast float* %c to <2 x float>*
-; CHECK: store <2 x float> %mulf, <2 x float>* %0, align 8
-; CHECK: ret void
-; CHECK-AO-LABEL: @test3(
-; CHECK-AO: %i0 = load double, double* %a, align 8
-; CHECK-AO: %i1 = load double, double* %b, align 8
-; CHECK-AO: %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-; CHECK-AO: %i3 = load double, double* %arrayidx3, align 8
-; CHECK-AO: %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-; CHECK-AO: %i4 = load double, double* %arrayidx4, align 8
-; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0
-; CHECK-AO: %mul.v.i1.2 = insertelement <2 x double> %mul.v.i1.1, double %i4, i32 1
-; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0
-; CHECK-AO: %mul.v.i0.2 = insertelement <2 x double> %mul.v.i0.1, double %i3, i32 1
-; CHECK-AO: %mul = fmul <2 x double> %mul.v.i0.2, %mul.v.i1.2
-; CHECK-AO: %mulf = fptrunc <2 x double> %mul to <2 x float>
-; CHECK-AO: %0 = bitcast float* %c to <2 x float>*
-; CHECK-AO: store <2 x float> %mulf, <2 x float>* %0, align 8
-; CHECK-AO: ret void
-}
-
-; Simple 3-pair chain with loads and stores (unreachable)
-define void @test4(i1 %bool, double* %a, double* %b, double* %c) nounwind uwtable readonly {
-entry:
- br i1 %bool, label %if.then1, label %if.end
-
-if.then1:
- unreachable
- br label %if.then
-
-if.then:
- %i0 = load double, double* %a, align 8
- %i1 = load double, double* %b, align 8
- %mul = fmul double %i0, %i1
- %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
- %i3 = load double, double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
- %i4 = load double, double* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4
- store double %mul, double* %c, align 8
- %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
- store double %mul5, double* %arrayidx5, align 8
- br label %if.end
-
-if.end:
- ret void
-; CHECK-LABEL: @test4(
-; CHECK-NOT: <2 x double>
-; CHECK-AO-LABEL: @test4(
-; CHECK-AO-NOT: <2 x double>
-}
-
-; Simple 3-pair chain with loads and stores
-define void @test5(double* %a, double* %b, double* %c) nounwind uwtable readonly {
-entry:
- %i0 = load double, double* %a, align 8
- %i1 = load double, double* %b, align 8
- %mul = fmul double %i0, %i1
- %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
- %i3 = load double, double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
- %i4 = load double, double* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4
- %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
- store double %mul5, double* %arrayidx5, align 8
- store double %mul, double* %c, align 4
- ret void
-; CHECK-LABEL: @test5(
-; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
-; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
-; CHECK: %mul = fmul <2 x double> %i0, %i1
-; CHECK: %0 = bitcast double* %c to <2 x double>*
-; CHECK: store <2 x double> %mul, <2 x double>* %0, align 4
-; CHECK: ret void
-; CHECK-AO-LABEL: @test5(
-; CHECK-AO-NOT: <2 x double>
-}
-
diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll
deleted file mode 100644
index 269b07f82d19..000000000000
--- a/test/Transforms/BBVectorize/simple-sel.ll
+++ /dev/null
@@ -1,59 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-no-bools -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-NB
-
-; Basic depth-3 chain with select
-define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) {
-; CHECK-LABEL: @test1(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
-; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
- %Z1 = select i1 %C1, double %Y1, double %B1
- %Z2 = select i1 %C2, double %Y2, double %B2
-; CHECK: %Z1.v.i0.1 = insertelement <2 x i1> undef, i1 %C1, i32 0
-; CHECK: %Z1.v.i0.2 = insertelement <2 x i1> %Z1.v.i0.1, i1 %C2, i32 1
-; CHECK: %Z1 = select <2 x i1> %Z1.v.i0.2, <2 x double> %Y1, <2 x double> %X1.v.i1.2
- %R = fmul double %Z1, %Z2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
- ret double %R
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain with select (and vect. compare)
-define double @test2(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test2(
-; CHECK-NB-LABEL: @test2(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
-; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
- %C1 = fcmp ogt double %X1, %A1
- %C2 = fcmp ogt double %X2, %A2
-; CHECK: %C1 = fcmp ogt <2 x double> %X1, %X1.v.i0.2
-; CHECK-NB: fcmp ogt double
- %Z1 = select i1 %C1, double %Y1, double %B1
- %Z2 = select i1 %C2, double %Y2, double %B2
-; CHECK: %Z1 = select <2 x i1> %C1, <2 x double> %Y1, <2 x double> %X1.v.i1.2
- %R = fmul double %Z1, %Z2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
- ret double %R
-; CHECK: ret double %R
-}
-
diff --git a/test/Transforms/BBVectorize/simple-tst.ll b/test/Transforms/BBVectorize/simple-tst.ll
deleted file mode 100644
index 6a88e1b09c1b..000000000000
--- a/test/Transforms/BBVectorize/simple-tst.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=256 -instcombine -gvn -S | FileCheck %s
-
-; Basic depth-3 chain (target-specific type should not vectorize)
-define ppc_fp128 @test7(ppc_fp128 %A1, ppc_fp128 %A2, ppc_fp128 %B1, ppc_fp128 %B2) {
-; CHECK-LABEL: @test7(
-; CHECK-NOT: <2 x ppc_fp128>
- %X1 = fsub ppc_fp128 %A1, %B1
- %X2 = fsub ppc_fp128 %A2, %B2
- %Y1 = fmul ppc_fp128 %X1, %A1
- %Y2 = fmul ppc_fp128 %X2, %A2
- %Z1 = fadd ppc_fp128 %Y1, %B1
- %Z2 = fadd ppc_fp128 %Y2, %B2
- %R = fmul ppc_fp128 %Z1, %Z2
- ret ppc_fp128 %R
-}
-
diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll
deleted file mode 100644
index 0fe33f17a646..000000000000
--- a/test/Transforms/BBVectorize/simple.ll
+++ /dev/null
@@ -1,199 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-
-; Basic depth-3 chain
-define double @test1(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test1(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
-; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
- %R = fmul double %Z1, %Z2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
- ret double %R
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain (last pair permuted)
-define double @test2(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test2(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
-; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
- %Z1 = fadd double %Y2, %B1
- %Z2 = fadd double %Y1, %B2
-; CHECK: %Z1.v.i1.1 = insertelement <2 x double> undef, double %B2, i32 0
-; CHECK: %Z1.v.i1.2 = insertelement <2 x double> %Z1.v.i1.1, double %B1, i32 1
-; CHECK: %Z2 = fadd <2 x double> %Y1, %Z1.v.i1.2
- %R = fmul double %Z1, %Z2
-; CHECK: %Z2.v.r1 = extractelement <2 x double> %Z2, i32 0
-; CHECK: %Z2.v.r2 = extractelement <2 x double> %Z2, i32 1
-; CHECK: %R = fmul double %Z2.v.r2, %Z2.v.r1
- ret double %R
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain (last pair first splat)
-define double @test3(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test3(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
-; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
- %Z1 = fadd double %Y2, %B1
- %Z2 = fadd double %Y2, %B2
-; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
- %R = fmul double %Z1, %Z2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
- ret double %R
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain (last pair second splat)
-define double @test4(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test4(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
-; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y1, %B2
-; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
- %R = fmul double %Z1, %Z2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
- ret double %R
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain
-define <2 x float> @test5(<2 x float> %A1, <2 x float> %A2, <2 x float> %B1, <2 x float> %B2) {
-; CHECK-LABEL: @test5(
-; CHECK: %X1.v.i1 = shufflevector <2 x float> %B1, <2 x float> %B2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK: %X1.v.i0 = shufflevector <2 x float> %A1, <2 x float> %A2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %X1 = fsub <2 x float> %A1, %B1
- %X2 = fsub <2 x float> %A2, %B2
-; CHECK: %X1 = fsub <4 x float> %X1.v.i0, %X1.v.i1
- %Y1 = fmul <2 x float> %X1, %A1
- %Y2 = fmul <2 x float> %X2, %A2
-; CHECK: %Y1 = fmul <4 x float> %X1, %X1.v.i0
- %Z1 = fadd <2 x float> %Y1, %B1
- %Z2 = fadd <2 x float> %Y2, %B2
-; CHECK: %Z1 = fadd <4 x float> %Y1, %X1.v.i1
- %R = fmul <2 x float> %Z1, %Z2
-; CHECK: %Z1.v.r1 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> <i32 0, i32 1>
-; CHECK: %Z1.v.r2 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> <i32 2, i32 3>
-; CHECK: %R = fmul <2 x float> %Z1.v.r1, %Z1.v.r2
- ret <2 x float> %R
-; CHECK: ret <2 x float> %R
-}
-
-; Basic chain with shuffles
-define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
-; CHECK-LABEL: @test6(
-; CHECK: %X1.v.i1 = shufflevector <8 x i8> %B1, <8 x i8> %B2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK: %X1.v.i0 = shufflevector <8 x i8> %A1, <8 x i8> %A2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- %X1 = sub <8 x i8> %A1, %B1
- %X2 = sub <8 x i8> %A2, %B2
-; CHECK: %X1 = sub <16 x i8> %X1.v.i0, %X1.v.i1
- %Y1 = mul <8 x i8> %X1, %A1
- %Y2 = mul <8 x i8> %X2, %A2
-; CHECK: %Y1 = mul <16 x i8> %X1, %X1.v.i0
- %Z1 = add <8 x i8> %Y1, %B1
- %Z2 = add <8 x i8> %Y2, %B2
-; CHECK: %Z1 = add <16 x i8> %Y1, %X1.v.i1
- %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
- %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
-; CHECK: %Q1.v.i1 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <16 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK: %Q1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <16 x i32> <i32 23, i32 16, i32 6, i32 1, i32 21, i32 18, i32 4, i32 3, i32 14, i32 15, i32 8, i32 9, i32 10, i32 12, i32 12, i32 9>
- %R = mul <8 x i8> %Q1, %Q2
-; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK: %Q1.v.r2 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK: %R = mul <8 x i8> %Q1.v.r1, %Q1.v.r2
- ret <8 x i8> %R
-; CHECK: ret <8 x i8> %R
-}
-
-; Basic depth-3 chain (flipped order)
-define double @test7(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test7(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
-; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
- %Z2 = fadd double %Y2, %B2
- %Z1 = fadd double %Y1, %B1
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
- %R = fmul double %Z1, %Z2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
- ret double %R
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain (subclass data)
-define i64 @test8(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
-; CHECK-LABEL: @test8(
-; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
- %X1 = sub nsw i64 %A1, %B1
- %X2 = sub i64 %A2, %B2
-; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
- %Y1 = mul i64 %X1, %A1
- %Y2 = mul i64 %X2, %A2
-; CHECK: %Y1 = mul <2 x i64> %X1, %X1.v.i0.2
- %Z1 = add i64 %Y1, %B1
- %Z2 = add i64 %Y2, %B2
-; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
- %R = mul i64 %Z1, %Z2
-; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
-; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
- ret i64 %R
-; CHECK: ret i64 %R
-}
-
diff --git a/test/Transforms/BBVectorize/simple3.ll b/test/Transforms/BBVectorize/simple3.ll
deleted file mode 100644
index 6edf7f07ac1d..000000000000
--- a/test/Transforms/BBVectorize/simple3.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-
-; Basic depth-3 chain
-define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) {
-; CHECK-LABEL: @test1(
-; CHECK: %X1.v.i1.11 = insertelement <3 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.22 = insertelement <3 x double> %X1.v.i1.11, double %B2, i32 1
-; CHECK: %X1.v.i1 = insertelement <3 x double> %X1.v.i1.22, double %B3, i32 2
-; CHECK: %X1.v.i0.13 = insertelement <3 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.24 = insertelement <3 x double> %X1.v.i0.13, double %A2, i32 1
-; CHECK: %X1.v.i0 = insertelement <3 x double> %X1.v.i0.24, double %A3, i32 2
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %X3 = fsub double %A3, %B3
-; CHECK: %X1 = fsub <3 x double> %X1.v.i0, %X1.v.i1
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
- %Y3 = fmul double %X3, %A3
-; CHECK: %Y1 = fmul <3 x double> %X1, %X1.v.i0
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %Z3 = fadd double %Y3, %B3
-; CHECK: %Z1 = fadd <3 x double> %Y1, %X1.v.i1
- %R1 = fmul double %Z1, %Z2
- %R = fmul double %R1, %Z3
-; CHECK: %Z1.v.r210 = extractelement <3 x double> %Z1, i32 2
-; CHECK: %Z1.v.r1 = extractelement <3 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <3 x double> %Z1, i32 1
-; CHECK: %R1 = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: %R = fmul double %R1, %Z1.v.r210
- ret double %R
-; CHECK: ret double %R
-}
-
diff --git a/test/Transforms/BBVectorize/vector-sel.ll b/test/Transforms/BBVectorize/vector-sel.ll
deleted file mode 100644
index cb775ceae695..000000000000
--- a/test/Transforms/BBVectorize/vector-sel.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt < %s -bb-vectorize -S | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-@d = external global [1 x [10 x [1 x i16]]], align 16
-
-;CHECK-LABEL: @test
-;CHECK: %0 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
-;CHECK: %1 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
-;CHECK: %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-;CHECK: %3 = shufflevector <4 x i1> %boolvec, <4 x i1> %boolvec, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-;CHECK: %4 = select <8 x i1> %3, <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>, <8 x i16> %2
-define void @test() {
-entry:
- %bool = icmp ne i32 undef, 0
- %boolvec = icmp ne <4 x i32> undef, zeroinitializer
- br label %body
-
-body:
- %0 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
- %1 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
- %2 = select <4 x i1> %boolvec, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>, <4 x i16> %0
- %3 = select <4 x i1> %boolvec, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>, <4 x i16> %1
- %4 = add nsw <4 x i16> %2, zeroinitializer
- %5 = add nsw <4 x i16> %3, zeroinitializer
- %6 = getelementptr inbounds [1 x [10 x [1 x i16]]], [1 x [10 x [1 x i16]]]* @d, i64 0, i64 0, i64 undef, i64 0
- %7 = bitcast i16* %6 to <4 x i16>*
- store <4 x i16> %4, <4 x i16>* %7, align 2
- %8 = getelementptr [1 x [10 x [1 x i16]]], [1 x [10 x [1 x i16]]]* @d, i64 0, i64 0, i64 undef, i64 4
- %9 = bitcast i16* %8 to <4 x i16>*
- store <4 x i16> %5, <4 x i16>* %9, align 2
- ret void
-}
diff --git a/test/Transforms/BBVectorize/xcore/no-vector-registers.ll b/test/Transforms/BBVectorize/xcore/no-vector-registers.ll
deleted file mode 100644
index 9ebdb7368a35..000000000000
--- a/test/Transforms/BBVectorize/xcore/no-vector-registers.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S -mtriple=xcore | FileCheck %s
-
-target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
-target triple = "xcore"
-
-; Basic depth-3 chain
-define double @test1(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test1(
-; CHECK-NOT: <2 x double>
- %X1 = fsub double %A1, %B1
- %X2 = fsub double %A2, %B2
- %Y1 = fmul double %X1, %A1
- %Y2 = fmul double %X2, %A2
- %Z1 = fadd double %Y1, %B1
- %Z2 = fadd double %Y2, %B2
- %R = fmul double %Z1, %Z2
- ret double %R
-}
diff --git a/test/Transforms/CodeExtractor/BlockAddressReference.ll b/test/Transforms/CodeExtractor/BlockAddressReference.ll
new file mode 100644
index 000000000000..91f85bf3ed87
--- /dev/null
+++ b/test/Transforms/CodeExtractor/BlockAddressReference.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -loop-extract -S | FileCheck %s
+
+@label = common local_unnamed_addr global i8* null
+
+; CHECK: define
+; no outlined function
+; CHECK-NOT: define
+define i32 @sterix(i32 %n) {
+entry:
+ %tobool = icmp ne i32 %n, 0
+ ; this blockaddress references a basic block that goes in the extracted loop
+ %cond = select i1 %tobool, i8* blockaddress(@sterix, %for.cond), i8* blockaddress(@sterix, %exit)
+ store i8* %cond, i8** @label
+ %cmp5 = icmp sgt i32 %n, 0
+ br i1 %cmp5, label %for.body, label %exit
+
+for.cond:
+ %mul = shl nsw i32 %s.06, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %exit.loopexit, label %for.body
+
+for.body:
+ %i.07 = phi i32 [ %inc, %for.cond ], [ 0, %entry ]
+ %s.06 = phi i32 [ %mul, %for.cond ], [ 1, %entry ]
+ %inc = add nuw nsw i32 %i.07, 1
+ br label %for.cond
+
+exit.loopexit:
+ %phitmp = icmp ne i32 %s.06, 2
+ %phitmp8 = zext i1 %phitmp to i32
+ br label %exit
+
+exit:
+ %s.1 = phi i32 [ 1, %entry ], [ %phitmp8, %exit.loopexit ]
+ ret i32 %s.1
+}
diff --git a/test/Transforms/CodeExtractor/BlockAddressSelfReference.ll b/test/Transforms/CodeExtractor/BlockAddressSelfReference.ll
new file mode 100644
index 000000000000..7d5a827a358a
--- /dev/null
+++ b/test/Transforms/CodeExtractor/BlockAddressSelfReference.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -loop-extract -S | FileCheck %s
+
+@choum.addr = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@choum, %bb10), i8* blockaddress(@choum, %bb14), i8* blockaddress(@choum, %bb18)]
+
+; CHECK: define
+; no outlined function
+; CHECK-NOT: define
+
+define void @choum(i32 %arg, i32* nocapture %arg1, i32 %arg2) {
+bb:
+ %tmp = icmp sgt i32 %arg, 0
+ br i1 %tmp, label %bb3, label %bb24
+
+bb3: ; preds = %bb
+ %tmp4 = sext i32 %arg2 to i64
+ %tmp5 = getelementptr inbounds [3 x i8*], [3 x i8*]* @choum.addr, i64 0, i64 %tmp4
+ %tmp6 = load i8*, i8** %tmp5
+ %tmp7 = zext i32 %arg to i64
+ br label %bb8
+
+bb8: ; preds = %bb18, %bb3
+ %tmp9 = phi i64 [ 0, %bb3 ], [ %tmp22, %bb18 ]
+ indirectbr i8* %tmp6, [label %bb10, label %bb14, label %bb18]
+
+bb10: ; preds = %bb8
+ %tmp11 = getelementptr inbounds i32, i32* %arg1, i64 %tmp9
+ %tmp12 = load i32, i32* %tmp11
+ %tmp13 = add nsw i32 %tmp12, 1
+ store i32 %tmp13, i32* %tmp11
+ br label %bb14
+
+bb14: ; preds = %bb10, %bb8
+ %tmp15 = getelementptr inbounds i32, i32* %arg1, i64 %tmp9
+ %tmp16 = load i32, i32* %tmp15
+ %tmp17 = shl nsw i32 %tmp16, 1
+ store i32 %tmp17, i32* %tmp15
+ br label %bb18
+
+bb18: ; preds = %bb14, %bb8
+ %tmp19 = getelementptr inbounds i32, i32* %arg1, i64 %tmp9
+ %tmp20 = load i32, i32* %tmp19
+ %tmp21 = add nsw i32 %tmp20, -3
+ store i32 %tmp21, i32* %tmp19
+ %tmp22 = add nuw nsw i64 %tmp9, 1
+ %tmp23 = icmp eq i64 %tmp22, %tmp7
+ br i1 %tmp23, label %bb24, label %bb8
+
+bb24: ; preds = %bb18, %bb
+ ret void
+}
diff --git a/test/Transforms/CodeGenPrepare/X86/memcmp.ll b/test/Transforms/CodeGenPrepare/X86/memcmp.ll
index 690e714af261..2435cd7d0a83 100644
--- a/test/Transforms/CodeGenPrepare/X86/memcmp.ll
+++ b/test/Transforms/CodeGenPrepare/X86/memcmp.ll
@@ -4,47 +4,18 @@
declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)
define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; X32-LABEL: @cmp2(
-; X32-NEXT: loadbb:
-; X32-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i16*
-; X32-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i16*
-; X32-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
-; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; X32-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
-; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
-; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X32-NEXT: [[TMP8:%.*]] = sub i32 [[TMP6]], [[TMP7]]
-; X32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
-; X32-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
-; X32: res_block:
-; X32-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]]
-; X32-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
-; X32-NEXT: br label %endblock
-; X32: endblock:
-; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
-; X32-NEXT: ret i32 [[PHI_RES]]
-;
-; X64-LABEL: @cmp2(
-; X64-NEXT: loadbb:
-; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i16*
-; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i16*
-; X64-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
-; X64-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
-; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i64
-; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i64
-; X64-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
-; X64-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
-; X64: res_block:
-; X64-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
-; X64-NEXT: br label %endblock
-; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
-; X64-NEXT: ret i32 [[PHI_RES]]
+; ALL-LABEL: @cmp2(
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i16*
+; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i16*
+; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; ALL-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; ALL-NEXT: [[TMP7:%.*]] = icmp ne i16 [[TMP5]], [[TMP6]]
+; ALL-NEXT: [[TMP8:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+; ALL-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
+; ALL-NEXT: [[TMP10:%.*]] = select i1 [[TMP7]], i32 [[TMP9]], i32 0
+; ALL-NEXT: ret i32 [[TMP10]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
ret i32 %call
@@ -60,45 +31,18 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; X32-LABEL: @cmp4(
-; X32-NEXT: loadbb:
-; X32-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i32*
-; X32-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i32*
-; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
-; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
-; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
-; X32-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
-; X32-NEXT: br i1 [[TMP7]], label %res_block, label %endblock
-; X32: res_block:
-; X32-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
-; X32-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
-; X32-NEXT: br label %endblock
-; X32: endblock:
-; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP9]], %res_block ]
-; X32-NEXT: ret i32 [[PHI_RES]]
-;
-; X64-LABEL: @cmp4(
-; X64-NEXT: loadbb:
-; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i32*
-; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i32*
-; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
-; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
-; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
-; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
-; X64-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
-; X64-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
-; X64: res_block:
-; X64-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
-; X64-NEXT: br label %endblock
-; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
-; X64-NEXT: ret i32 [[PHI_RES]]
+; ALL-LABEL: @cmp4(
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32*
+; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i32*
+; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; ALL-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; ALL-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP5]], [[TMP6]]
+; ALL-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]]
+; ALL-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
+; ALL-NEXT: [[TMP10:%.*]] = select i1 [[TMP7]], i32 [[TMP9]], i32 0
+; ALL-NEXT: ret i32 [[TMP10]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
ret i32 %call
@@ -137,23 +81,17 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: ret i32 [[CALL]]
;
; X64-LABEL: @cmp8(
-; X64-NEXT: loadbb:
-; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i64*
-; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i64*
-; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i64*
+; X64-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i64*
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
-; X64-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
-; X64-NEXT: br i1 [[TMP7]], label %res_block, label %endblock
-; X64: res_block:
-; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]]
; X64-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
-; X64-NEXT: br label %endblock
-; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP9]], %res_block ]
-; X64-NEXT: ret i32 [[PHI_RES]]
+; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP7]], i32 [[TMP9]], i32 0
+; X64-NEXT: ret i32 [[TMP10]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
ret i32 %call
diff --git a/test/Transforms/CodeGenPrepare/nonintegral.ll b/test/Transforms/CodeGenPrepare/nonintegral.ll
new file mode 100644
index 000000000000..06554cc1c9ee
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/nonintegral.ll
@@ -0,0 +1,68 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+; RUN: opt -S -codegenprepare -addr-sink-using-gep=false < %s | FileCheck %s
+
+; This target data layout is modified to have a non-integral addrspace(1),
+; in order to verify that codegenprepare does not try to introduce illegal
+; inttoptrs.
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test_simple(i1 %cond, i64 addrspace(1)* %base) {
+; CHECK-LABEL: @test_simple
+; CHECK-NOT: inttoptr {{.*}} to i64 addrspace(1)*
+entry:
+ %addr = getelementptr inbounds i64, i64 addrspace(1)* %base, i64 5
+ %casted = bitcast i64 addrspace(1)* %addr to i32 addrspace(1)*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %v = load i32, i32 addrspace(1)* %casted, align 4
+ br label %fallthrough
+
+fallthrough:
+ ret void
+}
+
+
+define void @test_inttoptr_base(i1 %cond, i64 %base) {
+; CHECK-LABEL: @test_inttoptr_base
+; CHECK-NOT: inttoptr {{.*}} to i64 addrspace(1)*
+entry:
+; Doing the inttoptr in the integral addrspace(0) followed by an explicit
+; (frontend-introduced) addrspacecast is fine. We cannot however introduce
+; a direct inttoptr to addrspace(1)
+ %baseptr = inttoptr i64 %base to i64*
+ %baseptrni = addrspacecast i64 *%baseptr to i64 addrspace(1)*
+ %addr = getelementptr inbounds i64, i64 addrspace(1)* %baseptrni, i64 5
+ %casted = bitcast i64 addrspace(1)* %addr to i32 addrspace(1)*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %v = load i32, i32 addrspace(1)* %casted, align 4
+ br label %fallthrough
+
+fallthrough:
+ ret void
+}
+
+define void @test_ptrtoint_base(i1 %cond, i64 addrspace(1)* %base) {
+; CHECK-LABEL: @test_ptrtoint_base
+; CHECK-NOT: ptrtoint addrspace(1)* {{.*}} to i64
+entry:
+; This one is inserted by the frontend, so it's fine. We're not allowed to
+; directly ptrtoint %base ourselves though
+ %baseptr0 = addrspacecast i64 addrspace(1)* %base to i64*
+ %toint = ptrtoint i64* %baseptr0 to i64
+ %added = add i64 %toint, 8
+ %toptr = inttoptr i64 %added to i64*
+ %geped = getelementptr i64, i64* %toptr, i64 2
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %v = load i64, i64* %geped, align 4
+ br label %fallthrough
+
+fallthrough:
+ ret void
+}
diff --git a/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll b/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll
new file mode 100644
index 000000000000..45f4500b37c1
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll
@@ -0,0 +1,37 @@
+; RUN: opt -consthoist -S < %s | FileCheck %s
+target triple = "thumbv6m-none-eabi"
+
+%T = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32 }
+
+; Indices for GEPs that index into a struct type should not be hoisted.
+define i32 @test1(%T* %P) nounwind {
+; CHECK-LABEL: @test1
+; CHECK: %const = bitcast i32 256 to i32
+; CHECK: %addr1 = getelementptr %T, %T* %P, i32 %const, i32 256
+; CHECK: %addr2 = getelementptr %T, %T* %P, i32 %const, i32 256
+; The first index into the pointer is hoisted, but the second one into the
+; struct isn't.
+ %addr1 = getelementptr %T, %T* %P, i32 256, i32 256
+ %tmp1 = load i32, i32* %addr1
+ %addr2 = getelementptr %T, %T* %P, i32 256, i32 256
+ %tmp2 = load i32, i32* %addr2
+ %tmp4 = add i32 %tmp1, %tmp2
+ ret i32 %tmp4
+}
+
diff --git a/test/Transforms/Inline/AArch64/inline-target-attr.ll b/test/Transforms/Inline/AArch64/inline-target-attr.ll
new file mode 100644
index 000000000000..af87ff6e7404
--- /dev/null
+++ b/test/Transforms/Inline/AArch64/inline-target-attr.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -S -inline | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s
+; Check that we only inline when we have compatible target attributes.
+
+define i32 @foo() #0 {
+entry:
+ %call = call i32 (...) @baz()
+ ret i32 %call
+; CHECK-LABEL: foo
+; CHECK: call i32 (...) @baz()
+}
+declare i32 @baz(...) #0
+
+define i32 @bar() #1 {
+entry:
+ %call = call i32 @foo()
+ ret i32 %call
+; CHECK-LABEL: bar
+; CHECK: call i32 (...) @baz()
+}
+
+define i32 @qux() #0 {
+entry:
+ %call = call i32 @bar()
+ ret i32 %call
+; CHECK-LABEL: qux
+; CHECK: call i32 @bar()
+}
+
+define i32 @strict_align() #2 {
+entry:
+ %call = call i32 @foo()
+ ret i32 %call
+; CHECK-LABEL: strict_align
+; CHECK: call i32 (...) @baz()
+}
+
+attributes #0 = { "target-cpu"="generic" "target-features"="+crc,+neon" }
+attributes #1 = { "target-cpu"="generic" "target-features"="+crc,+neon,+crypto" }
+attributes #2 = { "target-cpu"="generic" "target-features"="+crc,+neon,+strict-align" }
diff --git a/test/Transforms/Inline/inline-cold-callsite-pgo.ll b/test/Transforms/Inline/inline-cold-callsite-pgo.ll
new file mode 100644
index 000000000000..26ea8e50eaf1
--- /dev/null
+++ b/test/Transforms/Inline/inline-cold-callsite-pgo.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s
+
+; This tests that a cold callsite gets the inline-cold-callsite-threshold
+; and does not get inlined. Another callsite to an identical callee that
+; is not cold gets inlined because cost is below the inline-threshold.
+
+define i32 @callee1(i32 %x) !prof !21 {
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+ call void @extern()
+ ret i32 %x3
+}
+
+define i32 @caller(i32 %n) !prof !22 {
+; CHECK-LABEL: @caller(
+ %cond = icmp sle i32 %n, 100
+ br i1 %cond, label %cond_true, label %cond_false, !prof !0
+
+cond_true:
+; CHECK-LABEL: cond_true:
+; CHECK-NOT: call i32 @callee1
+; CHECK: ret i32 %x3.i
+ %i = call i32 @callee1(i32 %n)
+ ret i32 %i
+cond_false:
+; CHECK-LABEL: cond_false:
+; CHECK: call i32 @callee1
+; CHECK: ret i32 %j
+ %j = call i32 @callee1(i32 %n)
+ ret i32 %j
+}
+declare void @extern()
+
+!0 = !{!"branch_weights", i32 200, i32 1}
+
+!llvm.module.flags = !{!1}
+!21 = !{!"function_entry_count", i64 200}
+!22 = !{!"function_entry_count", i64 200}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 1000, i32 1}
+!13 = !{i32 999000, i64 1000, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
diff --git a/test/Transforms/Inline/inline-cold-callsite.ll b/test/Transforms/Inline/inline-cold-callsite.ll
index 26ea8e50eaf1..50dd55d62edb 100644
--- a/test/Transforms/Inline/inline-cold-callsite.ll
+++ b/test/Transforms/Inline/inline-cold-callsite.ll
@@ -1,54 +1,47 @@
+
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s
; This tests that a cold callsite gets the inline-cold-callsite-threshold
; and does not get inlined. Another callsite to an identical callee that
; is not cold gets inlined because cost is below the inline-threshold.
-define i32 @callee1(i32 %x) !prof !21 {
- %x1 = add i32 %x, 1
- %x2 = add i32 %x1, 1
- %x3 = add i32 %x2, 1
+define void @callee() {
+ call void @extern()
call void @extern()
- ret i32 %x3
+ ret void
}
-define i32 @caller(i32 %n) !prof !22 {
-; CHECK-LABEL: @caller(
- %cond = icmp sle i32 %n, 100
- br i1 %cond, label %cond_true, label %cond_false, !prof !0
-
-cond_true:
-; CHECK-LABEL: cond_true:
-; CHECK-NOT: call i32 @callee1
-; CHECK: ret i32 %x3.i
- %i = call i32 @callee1(i32 %n)
- ret i32 %i
-cond_false:
-; CHECK-LABEL: cond_false:
-; CHECK: call i32 @callee1
-; CHECK: ret i32 %j
- %j = call i32 @callee1(i32 %n)
- ret i32 %j
-}
declare void @extern()
+declare i1 @ext(i32)
+
+; CHECK-LABEL: caller
+define i32 @caller(i32 %n) {
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret i32 0
+
+for.body:
+ %i.05 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+; CHECK: %call = tail call
+ %call = tail call zeroext i1 @ext(i32 %i.05)
+; CHECK-NOT: call void @callee
+; CHECK-NEXT: call void @extern
+ call void @callee()
+ br i1 %call, label %cold, label %for.inc, !prof !0
+
+cold:
+; CHECK: call void @callee
+ call void @callee()
+ br label %for.inc
+
+for.inc:
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
-!0 = !{!"branch_weights", i32 200, i32 1}
-
-!llvm.module.flags = !{!1}
-!21 = !{!"function_entry_count", i64 200}
-!22 = !{!"function_entry_count", i64 200}
-
-!1 = !{i32 1, !"ProfileSummary", !2}
-!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
-!3 = !{!"ProfileFormat", !"InstrProf"}
-!4 = !{!"TotalCount", i64 10000}
-!5 = !{!"MaxCount", i64 1000}
-!6 = !{!"MaxInternalCount", i64 1}
-!7 = !{!"MaxFunctionCount", i64 1000}
-!8 = !{!"NumCounts", i64 3}
-!9 = !{!"NumFunctions", i64 3}
-!10 = !{!"DetailedSummary", !11}
-!11 = !{!12, !13, !14}
-!12 = !{i32 10000, i64 1000, i32 1}
-!13 = !{i32 999000, i64 1000, i32 1}
-!14 = !{i32 999999, i64 1, i32 2}
+!0 = !{!"branch_weights", i32 1, i32 2000}
diff --git a/test/Transforms/Inline/optimization-remarks-yaml.ll b/test/Transforms/Inline/optimization-remarks-yaml.ll
index 532e443e2170..16783634484f 100644
--- a/test/Transforms/Inline/optimization-remarks-yaml.ll
+++ b/test/Transforms/Inline/optimization-remarks-yaml.ll
@@ -1,8 +1,21 @@
-; RUN: opt < %s -S -inline -pass-remarks-missed=inline -pass-remarks-with-hotness \
+; RUN: opt < %s -S -inline -pass-remarks-missed=inline \
+; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 15 \
; RUN: -pass-remarks-output=%t 2>&1 | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=YAML %s
; RUN: opt < %s -S -inline -pass-remarks-with-hotness -pass-remarks-output=%t
; RUN: cat %t | FileCheck -check-prefix=YAML %s
+;
+; Verify that remarks that don't meet the hotness threshold are not output.
+; RUN: opt < %s -S -inline -pass-remarks-missed=inline \
+; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 100 \
+; RUN: -pass-remarks-output=%t.threshold 2>&1 | \
+; RUN: FileCheck -check-prefix=THRESHOLD %s
+; RUN: test ! -s %t.threshold
+; RUN: opt < %s -S -inline \
+; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 100 \
+; RUN: -pass-remarks-output=%t.threshold
+; The remarks output file should be empty.
+; RUN: test ! -s %t.threshold
; Check the YAML file generated for inliner remarks for this program:
;
@@ -43,6 +56,9 @@
; YAML-NEXT: - String: ' because its definition is unavailable'
; YAML-NEXT: ...
+; No remarks should be output, since none meet the threshold.
+; THRESHOLD-NOT: remark
+
; ModuleID = '/tmp/s.c'
source_filename = "/tmp/s.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/Transforms/Inline/pr33637.ll b/test/Transforms/Inline/pr33637.ll
new file mode 100644
index 000000000000..315feca27bd9
--- /dev/null
+++ b/test/Transforms/Inline/pr33637.ll
@@ -0,0 +1,25 @@
+; RUN: opt -inline < %s
+
+define void @patatino() {
+for.cond:
+ br label %for.body
+
+for.body:
+ %tobool = icmp eq i32 5, 0
+ %sel = select i1 %tobool, i32 0, i32 2
+ br i1 undef, label %cleanup1.thread, label %cleanup1
+
+cleanup1.thread:
+ ret void
+
+cleanup1:
+ %cleanup.dest2 = phi i32 [ %sel, %for.body ]
+ %switch = icmp ult i32 %cleanup.dest2, 1
+ ret void
+}
+
+define void @main() {
+entry:
+ call void @patatino()
+ ret void
+}
diff --git a/test/Transforms/InstCombine/and-or-not.ll b/test/Transforms/InstCombine/and-or-not.ll
index 28881668ca89..1baecb4a13a3 100644
--- a/test/Transforms/InstCombine/and-or-not.ll
+++ b/test/Transforms/InstCombine/and-or-not.ll
@@ -370,7 +370,7 @@ define i32 @xor_to_xor6(float %fa, float %fb) {
; CHECK-LABEL: @xor_to_xor6(
; CHECK-NEXT: [[A:%.*]] = fptosi float %fa to i32
; CHECK-NEXT: [[B:%.*]] = fptosi float %fb to i32
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A]], [[B]]
; CHECK-NEXT: ret i32 [[XOR]]
;
%a = fptosi float %fa to i32
@@ -408,7 +408,7 @@ define i32 @xor_to_xor8(float %fa, float %fb) {
; CHECK-LABEL: @xor_to_xor8(
; CHECK-NEXT: [[A:%.*]] = fptosi float %fa to i32
; CHECK-NEXT: [[B:%.*]] = fptosi float %fb to i32
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A]], [[B]]
; CHECK-NEXT: ret i32 [[XOR]]
;
%a = fptosi float %fa to i32
@@ -446,7 +446,7 @@ define i32 @xor_to_xor10(float %fa, float %fb) {
; CHECK-LABEL: @xor_to_xor10(
; CHECK-NEXT: [[A:%.*]] = fptosi float %fa to i32
; CHECK-NEXT: [[B:%.*]] = fptosi float %fb to i32
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A]], [[B]]
; CHECK-NEXT: ret i32 [[XOR]]
;
%a = fptosi float %fa to i32
@@ -484,7 +484,7 @@ define i32 @xor_to_xor12(float %fa, float %fb) {
; CHECK-LABEL: @xor_to_xor12(
; CHECK-NEXT: [[A:%.*]] = fptosi float %fa to i32
; CHECK-NEXT: [[B:%.*]] = fptosi float %fb to i32
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A]], [[B]]
; CHECK-NEXT: ret i32 [[XOR]]
;
%a = fptosi float %fa to i32
@@ -518,7 +518,7 @@ define i64 @PR32830(i64 %a, i64 %b, i64 %c) {
}
; (~a | b) & (~b | a) --> ~(a ^ b)
-; TODO: this increases instrunction count if the pieces have additional users
+; TODO: this increases instruction count if the pieces have additional users
define i32 @and_to_nxor_multiuse(float %fa, float %fb) {
; CHECK-LABEL: @and_to_nxor_multiuse(
; CHECK-NEXT: [[A:%.*]] = fptosi float [[FA:%.*]] to i32
@@ -545,7 +545,7 @@ define i32 @and_to_nxor_multiuse(float %fa, float %fb) {
}
; (a & b) | ~(a | b) --> ~(a ^ b)
-; TODO: this increases instrunction count if the pieces have additional users
+; TODO: this increases instruction count if the pieces have additional users
define i32 @or_to_nxor_multiuse(i32 %a, i32 %b) {
; CHECK-LABEL: @or_to_nxor_multiuse(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], [[B:%.*]]
@@ -564,3 +564,87 @@ define i32 @or_to_nxor_multiuse(i32 %a, i32 %b) {
%mul2 = mul i32 %mul1, %or2
ret i32 %mul2
}
+
+; (a | b) ^ (~a | ~b) --> ~(a ^ b)
+define i32 @xor_to_xnor1(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xnor1(
+; CHECK-NEXT: [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT: [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[B]]
+; CHECK-NEXT: [[OR2_DEMORGAN:%.*]] = and i32 [[A]], [[B]]
+; CHECK-NEXT: [[OR2:%.*]] = xor i32 [[OR2_DEMORGAN]], -1
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR1]], [[OR2]]
+; CHECK-NEXT: ret i32 [[XOR]]
+;
+ %a = fptosi float %fa to i32
+ %b = fptosi float %fb to i32
+ %nota = xor i32 %a, -1
+ %notb = xor i32 %b, -1
+ %or1 = or i32 %a, %b
+ %or2 = or i32 %nota, %notb
+ %xor = xor i32 %or1, %or2
+ ret i32 %xor
+}
+
+; (a | b) ^ (~b | ~a) --> ~(a ^ b)
+define i32 @xor_to_xnor2(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xnor2(
+; CHECK-NEXT: [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT: [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[B]]
+; CHECK-NEXT: [[OR2_DEMORGAN:%.*]] = and i32 [[B]], [[A]]
+; CHECK-NEXT: [[OR2:%.*]] = xor i32 [[OR2_DEMORGAN]], -1
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR1]], [[OR2]]
+; CHECK-NEXT: ret i32 [[XOR]]
+;
+ %a = fptosi float %fa to i32
+ %b = fptosi float %fb to i32
+ %nota = xor i32 %a, -1
+ %notb = xor i32 %b, -1
+ %or1 = or i32 %a, %b
+ %or2 = or i32 %notb, %nota
+ %xor = xor i32 %or1, %or2
+ ret i32 %xor
+}
+
+; (~a | ~b) ^ (a | b) --> ~(a ^ b)
+define i32 @xor_to_xnor3(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xnor3(
+; CHECK-NEXT: [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT: [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT: [[OR1_DEMORGAN:%.*]] = and i32 [[A]], [[B]]
+; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[OR1_DEMORGAN]], -1
+; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[B]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR2]], [[OR1]]
+; CHECK-NEXT: ret i32 [[XOR]]
+;
+ %a = fptosi float %fa to i32
+ %b = fptosi float %fb to i32
+ %nota = xor i32 %a, -1
+ %notb = xor i32 %b, -1
+ %or1 = or i32 %nota, %notb
+ %or2 = or i32 %a, %b
+ %xor = xor i32 %or1, %or2
+ ret i32 %xor
+}
+
+; (~a | ~b) ^ (b | a) --> ~(a ^ b)
+define i32 @xor_to_xnor4(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xnor4(
+; CHECK-NEXT: [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT: [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT: [[OR1_DEMORGAN:%.*]] = and i32 [[A]], [[B]]
+; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[OR1_DEMORGAN]], -1
+; CHECK-NEXT: [[OR2:%.*]] = or i32 [[B]], [[A]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR2]], [[OR1]]
+; CHECK-NEXT: ret i32 [[XOR]]
+;
+ %a = fptosi float %fa to i32
+ %b = fptosi float %fb to i32
+ %nota = xor i32 %a, -1
+ %notb = xor i32 %b, -1
+ %or1 = or i32 %nota, %notb
+ %or2 = or i32 %b, %a
+ %xor = xor i32 %or1, %or2
+ ret i32 %xor
+}
diff --git a/test/Transforms/InstCombine/clamp-to-minmax.ll b/test/Transforms/InstCombine/clamp-to-minmax.ll
new file mode 100644
index 000000000000..b8cab29d5937
--- /dev/null
+++ b/test/Transforms/InstCombine/clamp-to-minmax.ll
@@ -0,0 +1,500 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; (X < C1) ? C1 : MIN(X, C2)
+define float @clamp_float_fast_ordered_strict_maxmin(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_ordered_strict_maxmin(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp fast olt float %x, 255.0
+ %min = select i1 %cmp2, float %x, float 255.0
+ %cmp1 = fcmp fast olt float %x, 1.0
+ %r = select i1 %cmp1, float 1.0, float %min
+ ret float %r
+}
+
+; (X <= C1) ? C1 : MIN(X, C2)
+define float @clamp_float_fast_ordered_nonstrict_maxmin(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_ordered_nonstrict_maxmin(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ole float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp fast olt float %x, 255.0
+ %min = select i1 %cmp2, float %x, float 255.0
+ %cmp1 = fcmp fast ole float %x, 1.0
+ %r = select i1 %cmp1, float 1.0, float %min
+ ret float %r
+}
+
+; (X > C1) ? C1 : MAX(X, C2)
+define float @clamp_float_fast_ordered_strict_minmax(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_ordered_strict_minmax(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast ogt float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ogt float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp fast ogt float %x, 1.0
+ %max = select i1 %cmp2, float %x, float 1.0
+ %cmp1 = fcmp fast ogt float %x, 255.0
+ %r = select i1 %cmp1, float 255.0, float %max
+ ret float %r
+}
+
+; (X >= C1) ? C1 : MAX(X, C2)
+define float @clamp_float_fast_ordered_nonstrict_minmax(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_ordered_nonstrict_minmax(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast ogt float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast oge float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp fast ogt float %x, 1.0
+ %max = select i1 %cmp2, float %x, float 1.0
+ %cmp1 = fcmp fast oge float %x, 255.0
+ %r = select i1 %cmp1, float 255.0, float %max
+ ret float %r
+}
+
+
+; The same for unordered
+
+; (X < C1) ? C1 : MIN(X, C2)
+define float @clamp_float_fast_unordered_strict_maxmin(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_unordered_strict_maxmin(
+; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ult float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp fast ult float %x, 255.0
+ %min = select i1 %cmp2, float %x, float 255.0
+ %cmp1 = fcmp fast ult float %x, 1.0
+ %r = select i1 %cmp1, float 1.0, float %min
+ ret float %r
+}
+
+; (X <= C1) ? C1 : MIN(X, C2)
+define float @clamp_float_fast_unordered_nonstrict_maxmin(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_unordered_nonstrict_maxmin(
+; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ule float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp fast ult float %x, 255.0
+ %min = select i1 %cmp2, float %x, float 255.0
+ %cmp1 = fcmp fast ule float %x, 1.0
+ %r = select i1 %cmp1, float 1.0, float %min
+ ret float %r
+}
+
+; (X > C1) ? C1 : MAX(X, C2)
+define float @clamp_float_fast_unordered_strict_minmax(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_unordered_strict_minmax(
+; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp fast ole float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ugt float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp fast ugt float %x, 1.0
+ %max = select i1 %cmp2, float %x, float 1.0
+ %cmp1 = fcmp fast ugt float %x, 255.0
+ %r = select i1 %cmp1, float 255.0, float %max
+ ret float %r
+}
+
+; (X >= C1) ? C1 : MAX(X, C2)
+define float @clamp_float_fast_unordered_nonstrict_minmax(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_unordered_nonstrict_minmax(
+; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp fast ole float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast uge float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp fast ugt float %x, 1.0
+ %max = select i1 %cmp2, float %x, float 1.0
+ %cmp1 = fcmp fast uge float %x, 255.0
+ %r = select i1 %cmp1, float 255.0, float %max
+ ret float %r
+}
+
+; Some more checks with fast
+
+; (X > 1.0) ? min(x, 255.0) : 1.0
+define float @clamp_test_1(float %x) {
+; CHECK-LABEL: @clamp_test_1(
+; CHECK-NEXT: [[INNER_CMP_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[INNER_SEL:%.*]] = select i1 [[INNER_CMP_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT: [[OUTER_CMP:%.*]] = fcmp fast ugt float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[OUTER_CMP]], float [[INNER_SEL]], float 1.000000e+00
+; CHECK-NEXT: ret float [[R]]
+;
+ %inner_cmp = fcmp fast ult float %x, 255.0
+ %inner_sel = select i1 %inner_cmp, float %x, float 255.0
+ %outer_cmp = fcmp fast ugt float %x, 1.0
+ %r = select i1 %outer_cmp, float %inner_sel, float 1.0
+ ret float %r
+}
+
+; And something negative
+
+; Like @clamp_test_1 but HighConst < LowConst
+define float @clamp_negative_wrong_const(float %x) {
+; CHECK-LABEL: @clamp_negative_wrong_const(
+; CHECK-NEXT: [[INNER_CMP_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[INNER_SEL:%.*]] = select i1 [[INNER_CMP_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT: [[OUTER_CMP:%.*]] = fcmp fast ugt float [[X]], 5.120000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[OUTER_CMP]], float [[INNER_SEL]], float 5.120000e+02
+; CHECK-NEXT: ret float [[R]]
+;
+ %inner_cmp = fcmp fast ult float %x, 255.0
+ %inner_sel = select i1 %inner_cmp, float %x, float 255.0
+ %outer_cmp = fcmp fast ugt float %x, 512.0
+ %r = select i1 %outer_cmp, float %inner_sel, float 512.0
+ ret float %r
+}
+
+; Like @clamp_test_1 but both are min
+define float @clamp_negative_same_op(float %x) {
+; CHECK-LABEL: @clamp_negative_same_op(
+; CHECK-NEXT: [[INNER_CMP_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[INNER_SEL:%.*]] = select i1 [[INNER_CMP_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT: [[OUTER_CMP:%.*]] = fcmp fast ult float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[OUTER_CMP]], float [[INNER_SEL]], float 1.000000e+00
+; CHECK-NEXT: ret float [[R]]
+;
+ %inner_cmp = fcmp fast ult float %x, 255.0
+ %inner_sel = select i1 %inner_cmp, float %x, float 255.0
+ %outer_cmp = fcmp fast ult float %x, 1.0
+ %r = select i1 %outer_cmp, float %inner_sel, float 1.0
+ ret float %r
+}
+
+
+; And now without fast.
+
+; First, check that we don't do bad things in the presence of signed zeros
+define float @clamp_float_with_zero1(float %x) {
+; CHECK-LABEL: @clamp_float_with_zero1(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ole float [[X]], 0.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 0.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp fast olt float %x, 255.0
+ %min = select i1 %cmp2, float %x, float 255.0
+ %cmp1 = fcmp ole float %x, 0.0
+ %r = select i1 %cmp1, float 0.0, float %min
+ ret float %r
+}
+
+define float @clamp_float_with_zero2(float %x) {
+; CHECK-LABEL: @clamp_float_with_zero2(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[X]], 0.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 0.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp fast olt float %x, 255.0
+ %min = select i1 %cmp2, float %x, float 255.0
+ %cmp1 = fcmp olt float %x, 0.0
+ %r = select i1 %cmp1, float 0.0, float %min
+ ret float %r
+}
+
+; Also, here we care more about the ordering of the inner min/max, so
+; two times more cases.
+; TODO: that is not implemented yet, so these checks are for the
+; future. This means that checks below can just check that
+; "fcmp.*%x" happens twice for each label.
+
+; (X < C1) ? C1 : MIN(X, C2)
+define float @clamp_float_ordered_strict_maxmin1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_strict_maxmin1(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp olt float %x, 255.0 ; X is NaN => false
+ %min = select i1 %cmp2, float %x, float 255.0 ; 255.0
+ %cmp1 = fcmp olt float %x, 1.0 ; false
+ %r = select i1 %cmp1, float 1.0, float %min ; min (255.0)
+ ret float %r
+}
+
+define float @clamp_float_ordered_strict_maxmin2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_strict_maxmin2(
+; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp ult float %x, 255.0 ; X is NaN => true
+ %min = select i1 %cmp2, float %x, float 255.0 ; NaN
+ %cmp1 = fcmp olt float %x, 1.0 ; false
+ %r = select i1 %cmp1, float 1.0, float %min ; min (NaN)
+ ret float %r
+}
+
+; (X <= C1) ? C1 : MIN(X, C2)
+define float @clamp_float_ordered_nonstrict_maxmin1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_nonstrict_maxmin1(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ole float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp olt float %x, 255.0 ; X is NaN => false
+ %min = select i1 %cmp2, float %x, float 255.0 ; 255.0
+ %cmp1 = fcmp ole float %x, 1.0 ; false
+ %r = select i1 %cmp1, float 1.0, float %min ; min (255.0)
+ ret float %r
+}
+
+define float @clamp_float_ordered_nonstrict_maxmin2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_nonstrict_maxmin2(
+; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ole float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp ult float %x, 255.0 ; x is NaN => true
+ %min = select i1 %cmp2, float %x, float 255.0 ; NaN
+ %cmp1 = fcmp ole float %x, 1.0 ; false
+ %r = select i1 %cmp1, float 1.0, float %min ; min (NaN)
+ ret float %r
+}
+
+; (X > C1) ? C1 : MAX(X, C2)
+define float @clamp_float_ordered_strict_minmax1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_strict_minmax1(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp ogt float %x, 1.0 ; x is NaN => false
+ %max = select i1 %cmp2, float %x, float 1.0 ; 1.0
+ %cmp1 = fcmp ogt float %x, 255.0 ; false
+ %r = select i1 %cmp1, float 255.0, float %max ; max (1.0)
+ ret float %r
+}
+
+define float @clamp_float_ordered_strict_minmax2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_strict_minmax2(
+; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp ole float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp ugt float %x, 1.0 ; x is NaN => true
+ %max = select i1 %cmp2, float %x, float 1.0 ; NaN
+ %cmp1 = fcmp ogt float %x, 255.0 ; false
+ %r = select i1 %cmp1, float 255.0, float %max ; max (NaN)
+ ret float %r
+}
+
+; (X >= C1) ? C1 : MAX(X, C2)
+define float @clamp_float_ordered_nonstrict_minmax1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_nonstrict_minmax1(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp oge float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp ogt float %x, 1.0 ; x is NaN => false
+ %max = select i1 %cmp2, float %x, float 1.0 ; 1.0
+ %cmp1 = fcmp oge float %x, 255.0 ; false
+ %r = select i1 %cmp1, float 255.0, float %max ; max (1.0)
+ ret float %r
+}
+
+define float @clamp_float_ordered_nonstrict_minmax2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_nonstrict_minmax2(
+; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp ole float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp oge float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp ugt float %x, 1.0 ; x is NaN => true
+ %max = select i1 %cmp2, float %x, float 1.0 ; NaN
+ %cmp1 = fcmp oge float %x, 255.0 ; false
+ %r = select i1 %cmp1, float 255.0, float %max ; max (NaN)
+ ret float %r
+}
+
+
+; The same for unordered
+
+; (X < C1) ? C1 : MIN(X, C2)
+define float @clamp_float_unordered_strict_maxmin1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_strict_maxmin1(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ult float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp olt float %x, 255.0 ; x is NaN => false
+ %min = select i1 %cmp2, float %x, float 255.0 ; 255.0
+ %cmp1 = fcmp ult float %x, 1.0 ; true
+ %r = select i1 %cmp1, float 1.0, float %min ; 1.0
+ ret float %r
+}
+
+define float @clamp_float_unordered_strict_maxmin2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_strict_maxmin2(
+; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ult float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp ult float %x, 255.0 ; x is NaN => true
+ %min = select i1 %cmp2, float %x, float 255.0 ; NaN
+ %cmp1 = fcmp ult float %x, 1.0 ; true
+ %r = select i1 %cmp1, float 1.0, float %min ; 1.0
+ ret float %r
+}
+
+; (X <= C1) ? C1 : MIN(X, C2)
+define float @clamp_float_unordered_nonstrict_maxmin1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_nonstrict_maxmin1(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ule float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp olt float %x, 255.0 ; x is NaN => false
+ %min = select i1 %cmp2, float %x, float 255.0 ; 255.0
+ %cmp1 = fcmp ule float %x, 1.0 ; true
+ %r = select i1 %cmp1, float 1.0, float %min ; 1.0
+ ret float %r
+}
+
+define float @clamp_float_unordered_nonstrict_maxmin2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_nonstrict_maxmin2(
+; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ule float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp ult float %x, 255.0 ; x is NaN => true
+ %min = select i1 %cmp2, float %x, float 255.0 ; NaN
+ %cmp1 = fcmp ule float %x, 1.0 ; true
+ %r = select i1 %cmp1, float 1.0, float %min ; 1.0
+ ret float %r
+}
+
+; (X > C1) ? C1 : MAX(X, C2)
+define float @clamp_float_unordered_strict_minmax1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_strict_minmax1(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp ogt float %x, 1.0 ; x is NaN => false
+ %max = select i1 %cmp2, float %x, float 1.0 ; 1.0
+ %cmp1 = fcmp ugt float %x, 255.0 ; true
+ %r = select i1 %cmp1, float 255.0, float %max ; 255.0
+ ret float %r
+}
+
+define float @clamp_float_unordered_strict_minmax2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_strict_minmax2(
+; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp ole float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp ugt float %x, 1.0 ; x is NaN => true
+ %max = select i1 %cmp2, float %x, float 1.0 ; NaN
+ %cmp1 = fcmp ugt float %x, 255.0 ; true
+ %r = select i1 %cmp1, float 255.0, float %max ; 255.0
+ ret float %r
+}
+
+; (X >= C1) ? C1 : MAX(X, C2)
+define float @clamp_float_unordered_nonstrict_minmax1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_nonstrict_minmax1(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp uge float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp ogt float %x, 1.0 ; x is NaN => false
+ %max = select i1 %cmp2, float %x, float 1.0 ; 1.0
+ %cmp1 = fcmp uge float %x, 255.0 ; true
+ %r = select i1 %cmp1, float 255.0, float %max ; 255.0
+ ret float %r
+}
+
+define float @clamp_float_unordered_nonstrict_minmax2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_nonstrict_minmax2(
+; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp ole float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp uge float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT: ret float [[R]]
+;
+ %cmp2 = fcmp ugt float %x, 1.0 ; x is NaN => true
+ %max = select i1 %cmp2, float %x, float 1.0 ; NaN
+ %cmp1 = fcmp uge float %x, 255.0 ; true
+ %r = select i1 %cmp1, float 255.0, float %max ; 255.0
+ ret float %r
+}
diff --git a/test/Transforms/InstCombine/extractinsert-tbaa.ll b/test/Transforms/InstCombine/extractinsert-tbaa.ll
new file mode 100644
index 000000000000..b2a3a1a1bf9b
--- /dev/null
+++ b/test/Transforms/InstCombine/extractinsert-tbaa.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+
+%Complex = type { double, double }
+
+; Check that instcombine preserves TBAA when narrowing loads
+define double @teststructextract(%Complex *%val) {
+; CHECK: load double, {{.*}}, !tbaa
+; CHECK-NOT: load %Complex
+ %loaded = load %Complex, %Complex *%val, !tbaa !1
+ %real = extractvalue %Complex %loaded, 0
+ ret double %real
+}
+
+define double @testarrayextract([2 x double] *%val) {
+; CHECK: load double, {{.*}}, !tbaa
+; CHECK-NOT: load [2 x double]
+ %loaded = load [2 x double], [2 x double] *%val, !tbaa !1
+ %real = extractvalue [2 x double] %loaded, 0
+ ret double %real
+}
+
+; Check that inscombine preserves TBAA when breaking up stores
+define void @teststructinsert(%Complex *%loc, double %a, double %b) {
+; CHECK: store double %a, {{.*}}, !tbaa
+; CHECK: store double %b, {{.*}}, !tbaa
+; CHECK-NOT: store %Complex
+ %inserted = insertvalue %Complex undef, double %a, 0
+ %inserted2 = insertvalue %Complex %inserted, double %b, 1
+ store %Complex %inserted2, %Complex *%loc, !tbaa !1
+ ret void
+}
+
+define void @testarrayinsert([2 x double] *%loc, double %a, double %b) {
+; CHECK: store double %a, {{.*}}, !tbaa
+; CHECK: store double %b, {{.*}}, !tbaa
+; CHECK-NOT: store [2 x double]
+ %inserted = insertvalue [2 x double] undef, double %a, 0
+ %inserted2 = insertvalue [2 x double] %inserted, double %b, 1
+ store [2 x double] %inserted2, [2 x double] *%loc, !tbaa !1
+ ret void
+}
+
+!0 = !{!"tbaa_root"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"Complex", !0, i64 0}
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
index af4ee85216ef..5dcdae108445 100644
--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -150,8 +150,8 @@ define i32 @test_simplify13(i32 %x) {
; ALL-LABEL: @test_simplify13(
; ALL-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
; ALL-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1
-; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 %x, 0
-; ALL-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
+; ALL-NEXT: [[TMP2:%.*]] = icmp eq i32 %x, 0
+; ALL-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
; ALL-NEXT: ret i32 [[TMP3]]
;
%ret = call i32 @ffs(i32 %x)
@@ -166,8 +166,8 @@ define i32 @test_simplify14(i32 %x) {
; TARGET-LABEL: @test_simplify14(
; TARGET-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
; TARGET-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1
-; TARGET-NEXT: [[TMP2:%.*]] = icmp ne i32 %x, 0
-; TARGET-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
+; TARGET-NEXT: [[TMP2:%.*]] = icmp eq i32 %x, 0
+; TARGET-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
; TARGET-NEXT: ret i32 [[TMP3]]
;
%ret = call i32 @ffsl(i32 %x)
@@ -183,8 +183,8 @@ define i32 @test_simplify15(i64 %x) {
; TARGET-NEXT: [[CTTZ:%.*]] = call i64 @llvm.cttz.i64(i64 %x, i1 true)
; TARGET-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[CTTZ]], 1
; TARGET-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
-; TARGET-NEXT: [[TMP3:%.*]] = icmp ne i64 %x, 0
-; TARGET-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
+; TARGET-NEXT: [[TMP3:%.*]] = icmp eq i64 %x, 0
+; TARGET-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
; TARGET-NEXT: ret i32 [[TMP4]]
;
%ret = call i32 @ffsll(i64 %x)
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index ed570da73c9e..127fde10e9f7 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -762,6 +762,22 @@ define i1 @test52(i32 %x1) {
ret i1 %A
}
+define i1 @test52b(i128 %x1) {
+; CHECK-LABEL: @test52b(
+; CHECK-NEXT: [[TMP1:%.*]] = and i128 [[X1:%.*]], 16711935
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i128 [[TMP1]], 4980863
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %conv = and i128 %x1, 255
+ %cmp = icmp eq i128 %conv, 127
+ %tmp2 = lshr i128 %x1, 16
+ %tmp3 = trunc i128 %tmp2 to i8
+ %cmp15 = icmp eq i8 %tmp3, 76
+
+ %A = and i1 %cmp, %cmp15
+ ret i1 %A
+}
+
; PR9838
define i1 @test53(i32 %a, i32 %b) {
; CHECK-LABEL: @test53(
@@ -2423,8 +2439,8 @@ define i32 @f7(i32 %a, i32 %b) {
; CHECK-LABEL: @f7(
; CHECK-NEXT: [[CMP_UNSHIFTED:%.*]] = xor i32 %a, %b
; CHECK-NEXT: [[CMP_MASK:%.*]] = and i32 [[CMP_UNSHIFTED]], 511
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CMP:%.*]].mask, 0
-; CHECK-NEXT: [[S:%.*]] = select i1 [[CMP]], i32 10000, i32 0
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CMP_MASK]], 0
+; CHECK-NEXT: [[S:%.*]] = select i1 [[CMP]], i32 0, i32 10000
; CHECK-NEXT: ret i32 [[S]]
;
%sext = shl i32 %a, 23
@@ -2959,3 +2975,63 @@ define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) {
ret <2 x i1> %C
}
+declare i32 @llvm.bswap.i32(i32)
+
+define i1 @bswap_ne(i32 %x, i32 %y) {
+; CHECK-LABEL: @bswap_ne(
+; CHECK-NEXT: [[SWAPX:%.*]] = call i32 @llvm.bswap.i32(i32 %x)
+; CHECK-NEXT: [[SWAPY:%.*]] = call i32 @llvm.bswap.i32(i32 %y)
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[SWAPX]], [[SWAPY]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %swapx = call i32 @llvm.bswap.i32(i32 %x)
+ %swapy = call i32 @llvm.bswap.i32(i32 %y)
+ %cmp = icmp ne i32 %swapx, %swapy
+ ret i1 %cmp
+}
+
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+
+define <8 x i1> @bswap_vec_eq(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: @bswap_vec_eq(
+; CHECK-NEXT: [[SWAPX:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %x)
+; CHECK-NEXT: [[SWAPY:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %y)
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[SWAPX]], [[SWAPY]]
+; CHECK-NEXT: ret <8 x i1> [[CMP]]
+;
+ %swapx = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %x)
+ %swapy = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %y)
+ %cmp = icmp eq <8 x i16> %swapx, %swapy
+ ret <8 x i1> %cmp
+}
+
+declare i64 @llvm.bitreverse.i64(i64)
+
+define i1 @bitreverse_eq(i64 %x, i64 %y) {
+; CHECK-LABEL: @bitreverse_eq(
+; CHECK-NEXT: [[REVX:%.*]] = call i64 @llvm.bitreverse.i64(i64 %x)
+; CHECK-NEXT: [[REVY:%.*]] = call i64 @llvm.bitreverse.i64(i64 %y)
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[REVX]], [[REVY]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %revx = call i64 @llvm.bitreverse.i64(i64 %x)
+ %revy = call i64 @llvm.bitreverse.i64(i64 %y)
+ %cmp = icmp eq i64 %revx, %revy
+ ret i1 %cmp
+}
+
+declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
+
+define <8 x i1> @bitreverse_vec_ne(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: @bitreverse_vec_ne(
+; CHECK-NEXT: [[REVX:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %x)
+; CHECK-NEXT: [[REVY:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %y)
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i16> [[REVX]], [[REVY]]
+; CHECK-NEXT: ret <8 x i1> [[CMP]]
+;
+ %revx = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %x)
+ %revy = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %y)
+ %cmp = icmp ne <8 x i16> %revx, %revy
+ ret <8 x i1> %cmp
+}
+
diff --git a/test/Transforms/InstCombine/logical-select.ll b/test/Transforms/InstCombine/logical-select.ll
index 6c00dec60ed6..4c0223aa6dd1 100644
--- a/test/Transforms/InstCombine/logical-select.ll
+++ b/test/Transforms/InstCombine/logical-select.ll
@@ -62,19 +62,15 @@ define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) {
ret i32 %t3
}
-; TODO: For the next 4 tests, are there potential canonicalizations and/or folds for these
-; in InstCombine? Independent of that, tests like this that may not show any transforms
-; still have value because they can help identify conflicting canonicalization rules that
-; lead to infinite looping.
-
; PR32791 - https://bugs.llvm.org//show_bug.cgi?id=32791
-; Fold two selects with inverted predicates and zero operands.
+; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
+
define i32 @fold_inverted_icmp_preds(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @fold_inverted_icmp_preds(
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 %c, i32 0
-; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 %a, %b
-; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 %d, i32 0
+; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %a, %b
+; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 %d
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
; CHECK-NEXT: ret i32 [[OR]]
;
@@ -86,12 +82,14 @@ define i32 @fold_inverted_icmp_preds(i32 %a, i32 %b, i32 %c, i32 %d) {
ret i32 %or
}
+; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
+
define i32 @fold_inverted_icmp_preds_reverse(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @fold_inverted_icmp_preds_reverse(
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 %c
-; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 %a, %b
-; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 %d
+; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %a, %b
+; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 %d, i32 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
; CHECK-NEXT: ret i32 [[OR]]
;
@@ -103,6 +101,8 @@ define i32 @fold_inverted_icmp_preds_reverse(i32 %a, i32 %b, i32 %c, i32 %d) {
ret i32 %or
}
+; TODO: Should fcmp have the same sort of predicate canonicalization as icmp?
+
define i32 @fold_inverted_fcmp_preds(float %a, float %b, i32 %c, i32 %d) {
; CHECK-LABEL: @fold_inverted_fcmp_preds(
; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float %a, %b
@@ -120,10 +120,12 @@ define i32 @fold_inverted_fcmp_preds(float %a, float %b, i32 %c, i32 %d) {
ret i32 %or
}
+; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
+
define <2 x i32> @fold_inverted_icmp_vector_preds(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
; CHECK-LABEL: @fold_inverted_icmp_vector_preds(
-; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i32> %a, %b
-; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> %c, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq <2 x i32> %a, %b
+; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> zeroinitializer, <2 x i32> %c
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq <2 x i32> %a, %b
; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> %d, <2 x i32> zeroinitializer
; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[SEL1]], [[SEL2]]
diff --git a/test/Transforms/InstCombine/max-of-nots.ll b/test/Transforms/InstCombine/max-of-nots.ll
index 519f1c6a90b0..0302c9ec6d79 100644
--- a/test/Transforms/InstCombine/max-of-nots.ll
+++ b/test/Transforms/InstCombine/max-of-nots.ll
@@ -93,14 +93,15 @@ define i32 @max_of_nots(i32 %x, i32 %y) {
; negative test case (i.e. can not simplify) : ABS(MIN(NOT x,y))
define i32 @abs_of_min_of_not(i32 %x, i32 %y) {
; CHECK-LABEL: @abs_of_min_of_not(
-; CHECK-NEXT: xor
-; CHECK-NEXT: add
-; CHECK-NEXT: icmp sge
-; CHECK-NEXT: select
-; CHECK-NEXT: icmp sgt
-; CHECK-NEXT: sub
-; CHECK-NEXT: select
-; CHECK-NEXT: ret
+; CHECK-NEXT: [[XORD:%.*]] = xor i32 %x, -1
+; CHECK-NEXT: [[YADD:%.*]] = add i32 %y, 2
+; CHECK-NEXT: [[COND_I:%.*]] = icmp slt i32 [[YADD]], [[XORD]]
+; CHECK-NEXT: [[MIN:%.*]] = select i1 [[COND_I]], i32 [[YADD]], i32 [[XORD]]
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[MIN]], -1
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[MIN]]
+; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP2]], i32 [[MIN]], i32 [[SUB]]
+; CHECK-NEXT: ret i32 [[ABS]]
+;
%xord = xor i32 %x, -1
%yadd = add i32 %y, 2
diff --git a/test/Transforms/InstCombine/memmove.ll b/test/Transforms/InstCombine/memmove.ll
index 96f230eece06..7bc6d9a675d1 100644
--- a/test/Transforms/InstCombine/memmove.ll
+++ b/test/Transforms/InstCombine/memmove.ll
@@ -1,6 +1,6 @@
; This test makes sure that memmove instructions are properly eliminated.
;
-; RUN: opt < %s -instcombine -S | not grep "call void @llvm.memmove"
+; RUN: opt < %s -instcombine -S | FileCheck %s
@S = internal constant [33 x i8] c"panic: restorelist inconsistency\00" ; <[33 x i8]*> [#uses=1]
@h = constant [2 x i8] c"h\00" ; <[2 x i8]*> [#uses=1]
@@ -8,32 +8,46 @@
@hello_u = constant [8 x i8] c"hello_u\00" ; <[8 x i8]*> [#uses=1]
define void @test1(i8* %A, i8* %B, i32 %N) {
- call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* %B, i32 0, i32 1, i1 false)
- ret void
+ ;; CHECK-LABEL: test1
+ ;; CHECK-NEXT: ret void
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* %B, i32 0, i32 1, i1 false)
+ ret void
}
define void @test2(i8* %A, i32 %N) {
- ;; dest can't alias source since we can't write to source!
- call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @S, i32 0, i32 0), i32 %N, i32 1, i1 false)
- ret void
+ ;; dest can't alias source since we can't write to source!
+ ;; CHECK-LABEL: test2
+ ;; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @S, i{{32|64}} 0, i{{32|64}} 0), i32 %N, i32 1, i1 false)
+ ;; CHECK-NEXT: ret void
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @S, i32 0, i32 0), i32 %N, i32 1, i1 false)
+ ret void
}
-define i32 @test3() {
- %h_p = getelementptr [2 x i8], [2 x i8]* @h, i32 0, i32 0 ; <i8*> [#uses=1]
- %hel_p = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0 ; <i8*> [#uses=1]
- %hello_u_p = getelementptr [8 x i8], [8 x i8]* @hello_u, i32 0, i32 0 ; <i8*> [#uses=1]
- %target = alloca [1024 x i8] ; <[1024 x i8]*> [#uses=1]
- %target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0 ; <i8*> [#uses=3]
- call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
- call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
- call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
- ret i32 0
+define i32 @test3([1024 x i8]* %target) { ; arg: [1024 x i8]*> [#uses=1]
+ ;; CHECK-LABEL: test3
+ ;; CHECK-NEXT: [[P1:%[^\s]+]] = bitcast [1024 x i8]* %target to i16*
+ ;; CHECK-NEXT: store i16 104, i16* [[P1]], align 2
+ ;; CHECK-NEXT: [[P2:%[^\s]+]] = bitcast [1024 x i8]* %target to i32*
+ ;; CHECK-NEXT: store i32 7103848, i32* [[P2]], align 4
+ ;; CHECK-NEXT: [[P3:%[^\s]+]] = bitcast [1024 x i8]* %target to i64*
+ ;; CHECK-NEXT: store i64 33037504440198504, i64* [[P3]], align 8
+ ;; CHECK-NEXT: ret i32 0
+ %h_p = getelementptr [2 x i8], [2 x i8]* @h, i32 0, i32 0 ; <i8*> [#uses=1]
+ %hel_p = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0 ; <i8*> [#uses=1]
+ %hello_u_p = getelementptr [8 x i8], [8 x i8]* @hello_u, i32 0, i32 0 ; <i8*> [#uses=1]
+ %target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0 ; <i8*> [#uses=3]
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
+ ret i32 0
}
; PR2370
define void @test4(i8* %a) {
+ ;; CHECK-LABEL: test4
+ ;; CHECK-NEXT: ret void
tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
ret void
}
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) argmemonly nounwind
diff --git a/test/Transforms/InstCombine/memset.ll b/test/Transforms/InstCombine/memset.ll
index dfafcf948d1c..08bbf8ba1ef3 100644
--- a/test/Transforms/InstCombine/memset.ll
+++ b/test/Transforms/InstCombine/memset.ll
@@ -1,7 +1,16 @@
-; RUN: opt < %s -instcombine -S | not grep "call.*llvm.memset"
+; RUN: opt < %s -instcombine -S | FileCheck %s
-define i32 @main() {
- %target = alloca [1024 x i8]
+define i32 @test([1024 x i8]* %target) {
+ ;; CHECK-LABEL: test
+ ;; CHECK-NEXT: [[P1:%[^\s]+]] = getelementptr inbounds [1024 x i8], [1024 x i8]* %target, i64 0, i64 0
+ ;; CHECK-NEXT: store i8 1, i8* [[P1]], align 1
+ ;; CHECK-NEXT: [[P2:%[^\s]+]] = bitcast [1024 x i8]* %target to i16*
+ ;; CHECK-NEXT: store i16 257, i16* [[P2]], align 2
+ ;; CHECK-NEXT: [[P3:%[^\s]+]] = bitcast [1024 x i8]* %target to i32*
+ ;; CHECK-NEXT: store i32 16843009, i32* [[P3]], align 4
+ ;; CHECK-NEXT: [[P4:%[^\s]+]] = bitcast [1024 x i8]* %target to i64*
+ ;; CHECK-NEXT: store i64 72340172838076673, i64* [[P4]], align 8
+ ;; CHECK-NEXT: ret i32 0
%target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0
call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 0, i32 1, i1 false)
call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 1, i32 1, i1 false)
@@ -11,4 +20,4 @@ define i32 @main() {
ret i32 0
}
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) argmemonly nounwind
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 6e67c7fa08a0..1d9d0a6a9fa0 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; This test makes sure that mul instructions are properly eliminated.
; RUN: opt < %s -instcombine -S | FileCheck %s
@@ -297,6 +298,15 @@ define i32 @test32(i32 %X) {
; CHECK-NEXT: ret i32 %[[shl]]
}
+define <2 x i32> @test32vec(<2 x i32> %X) {
+; CHECK-LABEL: @test32vec(
+; CHECK-NEXT: [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT: ret <2 x i32> [[MUL]]
+;
+ %mul = mul nsw <2 x i32> %X, <i32 -2147483648, i32 -2147483648>
+ ret <2 x i32> %mul
+}
+
define i32 @test33(i32 %X) {
; CHECK-LABEL: @test33
%mul = mul nsw i32 %X, 1073741824
@@ -304,3 +314,21 @@ define i32 @test33(i32 %X) {
; CHECK-NEXT: ret i32 %[[shl]]
ret i32 %mul
}
+
+define <2 x i32> @test33vec(<2 x i32> %X) {
+; CHECK-LABEL: @test33vec(
+; CHECK-NEXT: [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], <i32 30, i32 30>
+; CHECK-NEXT: ret <2 x i32> [[MUL]]
+;
+ %mul = mul nsw <2 x i32> %X, <i32 1073741824, i32 1073741824>
+ ret <2 x i32> %mul
+}
+
+define i128 @test34(i128 %X) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT: [[MUL:%.*]] = shl nsw i128 [[X:%.*]], 1
+; CHECK-NEXT: ret i128 [[MUL]]
+;
+ %mul = mul nsw i128 %X, 2
+ ret i128 %mul
+}
diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll
index af62c2dd4ba3..2164f0df8d27 100644
--- a/test/Transforms/InstCombine/or-xor.ll
+++ b/test/Transforms/InstCombine/or-xor.ll
@@ -178,7 +178,7 @@ define i32 @test13(i32 %x, i32 %y) {
; ((x | ~y) ^ (~x | y)) -> x ^ y
define i32 @test14(i32 %x, i32 %y) {
; CHECK-LABEL: @test14(
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 %x, %y
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 %y, %x
; CHECK-NEXT: ret i32 [[XOR]]
;
%noty = xor i32 %y, -1
@@ -191,7 +191,7 @@ define i32 @test14(i32 %x, i32 %y) {
define i32 @test14_commuted(i32 %x, i32 %y) {
; CHECK-LABEL: @test14_commuted(
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 %x, %y
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 %y, %x
; CHECK-NEXT: ret i32 [[XOR]]
;
%noty = xor i32 %y, -1
@@ -205,7 +205,7 @@ define i32 @test14_commuted(i32 %x, i32 %y) {
; ((x & ~y) ^ (~x & y)) -> x ^ y
define i32 @test15(i32 %x, i32 %y) {
; CHECK-LABEL: @test15(
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 %x, %y
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 %y, %x
; CHECK-NEXT: ret i32 [[XOR]]
;
%noty = xor i32 %y, -1
@@ -218,7 +218,7 @@ define i32 @test15(i32 %x, i32 %y) {
define i32 @test15_commuted(i32 %x, i32 %y) {
; CHECK-LABEL: @test15_commuted(
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 %x, %y
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 %y, %x
; CHECK-NEXT: ret i32 [[XOR]]
;
%noty = xor i32 %y, -1
@@ -344,3 +344,71 @@ define i8 @test18(i8 %A, i8 %B) {
%res = mul i8 %or, %xor2 ; to increase the use count for the xor
ret i8 %res
}
+
+; ((x | y) ^ (~x | ~y)) -> ~(x ^ y)
+define i32 @test19(i32 %x, i32 %y) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT: [[OR1:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR2_DEMORGAN:%.*]] = and i32 [[X]], [[Y]]
+; CHECK-NEXT: [[OR2:%.*]] = xor i32 [[OR2_DEMORGAN]], -1
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR1]], [[OR2]]
+; CHECK-NEXT: ret i32 [[XOR]]
+;
+ %noty = xor i32 %y, -1
+ %notx = xor i32 %x, -1
+ %or1 = or i32 %x, %y
+ %or2 = or i32 %notx, %noty
+ %xor = xor i32 %or1, %or2
+ ret i32 %xor
+}
+
+; ((x | y) ^ (~y | ~x)) -> ~(x ^ y)
+define i32 @test20(i32 %x, i32 %y) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: [[OR1:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR2_DEMORGAN:%.*]] = and i32 [[Y]], [[X]]
+; CHECK-NEXT: [[OR2:%.*]] = xor i32 [[OR2_DEMORGAN]], -1
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR1]], [[OR2]]
+; CHECK-NEXT: ret i32 [[XOR]]
+;
+ %noty = xor i32 %y, -1
+ %notx = xor i32 %x, -1
+ %or1 = or i32 %x, %y
+ %or2 = or i32 %noty, %notx
+ %xor = xor i32 %or1, %or2
+ ret i32 %xor
+}
+
+; ((~x | ~y) ^ (x | y)) -> ~(x ^ y)
+define i32 @test21(i32 %x, i32 %y) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT: [[OR1_DEMORGAN:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[OR1_DEMORGAN]], -1
+; CHECK-NEXT: [[OR2:%.*]] = or i32 [[X]], [[Y]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR2]], [[OR1]]
+; CHECK-NEXT: ret i32 [[XOR]]
+;
+ %noty = xor i32 %y, -1
+ %notx = xor i32 %x, -1
+ %or1 = or i32 %notx, %noty
+ %or2 = or i32 %x, %y
+ %xor = xor i32 %or1, %or2
+ ret i32 %xor
+}
+
+; ((~x | ~y) ^ (y | x)) -> ~(x ^ y)
+define i32 @test22(i32 %x, i32 %y) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: [[OR1_DEMORGAN:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[OR1_DEMORGAN]], -1
+; CHECK-NEXT: [[OR2:%.*]] = or i32 [[Y]], [[X]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR2]], [[OR1]]
+; CHECK-NEXT: ret i32 [[XOR]]
+;
+ %noty = xor i32 %y, -1
+ %notx = xor i32 %x, -1
+ %or1 = or i32 %notx, %noty
+ %or2 = or i32 %y, %x
+ %xor = xor i32 %or1, %or2
+ ret i32 %xor
+}
diff --git a/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll b/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll
index 894bf6db0a42..3ac02795b478 100644
--- a/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll
+++ b/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll
@@ -318,10 +318,33 @@ entry:
ret i16 %cond
}
+define i128 @test7(i128 %x) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i128 @llvm.ctlz.i128(i128 [[X:%.*]], i1 false), !range !3
+; CHECK-NEXT: ret i128 [[TMP1]]
+;
+ %1 = tail call i128 @llvm.ctlz.i128(i128 %x, i1 true)
+ %tobool = icmp ne i128 %x, 0
+ %cond = select i1 %tobool, i128 %1, i128 128
+ ret i128 %cond
+}
+
+define i128 @test8(i128 %x) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i128 @llvm.cttz.i128(i128 [[X:%.*]], i1 false), !range !3
+; CHECK-NEXT: ret i128 [[TMP1]]
+;
+ %1 = tail call i128 @llvm.cttz.i128(i128 %x, i1 true)
+ %tobool = icmp ne i128 %x, 0
+ %cond = select i1 %tobool, i128 %1, i128 128
+ ret i128 %cond
+}
declare i16 @llvm.ctlz.i16(i16, i1)
declare i32 @llvm.ctlz.i32(i32, i1)
declare i64 @llvm.ctlz.i64(i64, i1)
+declare i128 @llvm.ctlz.i128(i128, i1)
declare i16 @llvm.cttz.i16(i16, i1)
declare i32 @llvm.cttz.i32(i32, i1)
declare i64 @llvm.cttz.i64(i64, i1)
+declare i128 @llvm.cttz.i128(i128, i1)
diff --git a/test/Transforms/InstCombine/select-with-bitwise-ops.ll b/test/Transforms/InstCombine/select-with-bitwise-ops.ll
index caec9412a7fd..437f09be2e7c 100644
--- a/test/Transforms/InstCombine/select-with-bitwise-ops.ll
+++ b/test/Transforms/InstCombine/select-with-bitwise-ops.ll
@@ -104,10 +104,10 @@ define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) {
define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_or_8(
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1073741824
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT: [[OR:%.*]] = or i8 [[Y:%.*]], 8
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i8 [[Y]], i8 [[OR]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 1073741824
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT: [[OR:%.*]] = or i8 %y, 8
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i8 [[OR]], i8 %y
; CHECK-NEXT: ret i8 [[SELECT]]
;
%and = and i32 %x, 1073741824
@@ -119,10 +119,10 @@ define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
define i32 @select_icmp_ne_0_and_8_or_1073741824(i8 %x, i32 %y) {
; CHECK-LABEL: @select_icmp_ne_0_and_8_or_1073741824(
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 8
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[AND]], 0
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 1073741824
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
+; CHECK-NEXT: [[AND:%.*]] = and i8 %x, 8
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT: [[OR:%.*]] = or i32 %y, 1073741824
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 %y
; CHECK-NEXT: ret i32 [[SELECT]]
;
%and = and i8 %x, 8
@@ -271,8 +271,8 @@ define i32 @test65(i64 %x) {
define i32 @test66(i64 %x) {
; CHECK-LABEL: @test66(
; CHECK-NEXT: [[TMP1:%.*]] = and i64 %x, 4294967296
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 40, i32 42
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 42, i32 40
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = and i64 %x, 4294967296
@@ -376,10 +376,10 @@ define i32 @no_shift_xor_multiuse_or(i32 %x, i32 %y) {
define i32 @shift_xor_multiuse_or(i32 %x, i32 %y) {
; CHECK-LABEL: @shift_xor_multiuse_or(
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 4096
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT: [[OR:%.*]] = or i32 %y, 2048
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 %y
; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[OR]]
; CHECK-NEXT: ret i32 [[RES]]
;
@@ -430,11 +430,11 @@ define i32 @no_shift_no_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
define i32 @no_shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: @no_shift_xor_multiuse_cmp(
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 4096
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y:%.*]]
-; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], %y
+; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 %w, i32 %z
; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]]
; CHECK-NEXT: ret i32 [[RES]]
;
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 0f94235982b9..c8f2a50b72ed 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -1220,12 +1220,13 @@ entry:
}
define i32 @test_select_select0(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) {
- ; CHECK-LABEL: @test_select_select0(
- ; CHECK: %[[C0:.*]] = icmp sge i32 %a, %v1
- ; CHECK-NEXT: %[[C1:.*]] = icmp slt i32 %a, %v2
- ; CHECK-NEXT: %[[C:.*]] = and i1 %[[C1]], %[[C0]]
- ; CHECK-NEXT: %[[SEL:.*]] = select i1 %[[C]], i32 %r0, i32 %r1
- ; CHECK-NEXT: ret i32 %[[SEL]]
+; CHECK-LABEL: @test_select_select0(
+; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 %a, %v1
+; CHECK-NEXT: [[S0:%.*]] = select i1 [[C0]], i32 %r1, i32 %r0
+; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 %a, %v2
+; CHECK-NEXT: [[S1:%.*]] = select i1 [[C1]], i32 [[S0]], i32 %r1
+; CHECK-NEXT: ret i32 [[S1]]
+;
%c0 = icmp sge i32 %a, %v1
%s0 = select i1 %c0, i32 %r0, i32 %r1
%c1 = icmp slt i32 %a, %v2
@@ -1234,12 +1235,13 @@ define i32 @test_select_select0(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) {
}
define i32 @test_select_select1(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) {
- ; CHECK-LABEL: @test_select_select1(
- ; CHECK: %[[C0:.*]] = icmp sge i32 %a, %v1
- ; CHECK-NEXT: %[[C1:.*]] = icmp slt i32 %a, %v2
- ; CHECK-NEXT: %[[C:.*]] = or i1 %[[C1]], %[[C0]]
- ; CHECK-NEXT: %[[SEL:.*]] = select i1 %[[C]], i32 %r0, i32 %r1
- ; CHECK-NEXT: ret i32 %[[SEL]]
+; CHECK-LABEL: @test_select_select1(
+; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 %a, %v1
+; CHECK-NEXT: [[S0:%.*]] = select i1 [[C0]], i32 %r1, i32 %r0
+; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 %a, %v2
+; CHECK-NEXT: [[S1:%.*]] = select i1 [[C1]], i32 %r0, i32 [[S0]]
+; CHECK-NEXT: ret i32 [[S1]]
+;
%c0 = icmp sge i32 %a, %v1
%s0 = select i1 %c0, i32 %r0, i32 %r1
%c1 = icmp slt i32 %a, %v2
diff --git a/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
index a038fd1a411b..c8efb41ce737 100644
--- a/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
+++ b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
@@ -774,3 +774,28 @@ define void @load_factor2_fp128(<4 x fp128>* %ptr) {
%v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> <i32 1, i32 3>
ret void
}
+
+define <4 x i1> @load_large_vector(<12 x i64 *>* %p) {
+; NEON-LABEL: @load_large_vector(
+; NEON: [[LDN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>*
+; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 1
+; NEON-NEXT: [[TMP2:%.*]] = inttoptr <2 x i64> [[TMP1]] to <2 x i64*>
+; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 0
+; NEON-NEXT: [[TMP4:%.*]] = inttoptr <2 x i64> [[TMP3]] to <2 x i64*>
+; NEON: [[LDN1:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>*
+; NEON-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN1]], 1
+; NEON-NEXT: [[TMP6:%.*]] = inttoptr <2 x i64> [[TMP5]] to <2 x i64*>
+; NEON-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN1]], 0
+; NEON-NEXT: [[TMP8:%.*]] = inttoptr <2 x i64> [[TMP7]] to <2 x i64*>
+; NEON-NEXT: shufflevector <2 x i64*> [[TMP2]], <2 x i64*> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT: shufflevector <2 x i64*> [[TMP4]], <2 x i64*> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NO_NEON-LABEL: @load_large_vector(
+; NO_NEON-NOT: @llvm.aarch64.neon
+; NO_NEON: ret
+;
+ %l = load <12 x i64 *>, <12 x i64 *>* %p
+ %s1 = shufflevector <12 x i64 *> %l, <12 x i64 *> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+ %s2 = shufflevector <12 x i64 *> %l, <12 x i64 *> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+ %ret = icmp ne <4 x i64 *> %s1, %s2
+ ret <4 x i1> %ret
+}
diff --git a/test/Transforms/JumpThreading/range-compare.ll b/test/Transforms/JumpThreading/range-compare.ll
new file mode 100644
index 000000000000..54e94d06649b
--- /dev/null
+++ b/test/Transforms/JumpThreading/range-compare.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;RUN: opt < %s -jump-threading -S | FileCheck %s
+
+
+declare void @bar(...)
+declare void @baz(...)
+
+; Make sure we thread the end of the bar block to the end of the function.
+define void @test1(i32 %x) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], 9
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_END_THREAD:%.*]], label [[IF_END:%.*]]
+; CHECK: if.end.thread:
+; CHECK-NEXT: call void (...) @bar()
+; CHECK-NEXT: br label [[IF_END4:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], -3
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X_OFF]], 5
+; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN3:%.*]], label [[IF_END4]]
+; CHECK: if.then3:
+; CHECK-NEXT: call void (...) @baz()
+; CHECK-NEXT: br label [[IF_END4]]
+; CHECK: if.end4:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp sgt i32 %x, 9
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ call void (...) @bar()
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %x.off = add i32 %x, -3
+ %0 = icmp ult i32 %x.off, 5
+ br i1 %0, label %if.then3, label %if.end4
+
+if.then3: ; preds = %if.end
+ call void (...) @baz()
+ br label %if.end4
+
+if.end4: ; preds = %if.then3, %if.end
+ ret void
+}
+
+; Make sure we thread the false side of the first if to the end of the function.
+define void @test2(i32 %x) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 9
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_END:%.*]], label [[IF_END4:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: call void (...) @bar()
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], -3
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X_OFF]], 5
+; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN3:%.*]], label [[IF_END4]]
+; CHECK: if.then3:
+; CHECK-NEXT: call void (...) @baz()
+; CHECK-NEXT: br label [[IF_END4]]
+; CHECK: if.end4:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp slt i32 %x, 9
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ call void (...) @bar()
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %x.off = add i32 %x, -3
+ %0 = icmp ult i32 %x.off, 5
+ br i1 %0, label %if.then3, label %if.end4
+
+if.then3: ; preds = %if.end
+ call void (...) @baz()
+ br label %if.end4
+
+if.end4: ; preds = %if.then3, %if.end
+ ret void
+}
+
+; Negative test to make sure we don't thread when the ranges overlap.
+define void @test3(i32 %x) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], 6
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: call void (...) @bar()
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], -3
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X_OFF]], 5
+; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN3:%.*]], label [[IF_END4:%.*]]
+; CHECK: if.then3:
+; CHECK-NEXT: call void (...) @baz()
+; CHECK-NEXT: br label [[IF_END4]]
+; CHECK: if.end4:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp sgt i32 %x, 6
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ call void (...) @bar()
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %x.off = add i32 %x, -3
+ %0 = icmp ult i32 %x.off, 5
+ br i1 %0, label %if.then3, label %if.end4
+
+if.then3: ; preds = %if.end
+ call void (...) @baz()
+ br label %if.end4
+
+if.end4: ; preds = %if.then3, %if.end
+ ret void
+}
+
diff --git a/test/Transforms/LICM/dropped-tbaa.ll b/test/Transforms/LICM/dropped-tbaa.ll
new file mode 100644
index 000000000000..7d37ca55c188
--- /dev/null
+++ b/test/Transforms/LICM/dropped-tbaa.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -scoped-noalias -tbaa -licm -S | FileCheck %s
+
+; This test case case is generated from the following C code with -fstrict-aliasing,
+; and after passing through -inline -mem2reg -loop-rotate -instcombine
+; void add(double *restrict data, int *restrict addend) {
+; *data += *addend;
+; }
+;
+; void foo(double *data, int *addend) {
+; for (int i = 0; i < 1000; ++i) {
+; *data += *addend;
+; add(data, addend);
+; }
+; }
+; We want to make sure the load of addend gets hoisted, independent of the second load
+; load having different noalias metadata.
+
+define void @foo(double* %data, i32* %addend) #0 {
+; CHECK: for.body.lr.ph:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ADDEND:%.*]], align 4, !tbaa !1
+; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ADDEND]], align 4, !tbaa !1, !alias.scope !5, !noalias !8
+; CHECK-NEXT: [[CONV_I:%.*]] = sitofp i32 [[TMP2]] to double
+entry:
+ %i = alloca i32, align 4
+ %0 = bitcast i32* %i to i8*
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #2
+ store i32 0, i32* %i, align 4, !tbaa !1
+ br i1 true, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge: ; preds = %for.inc
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry
+ %1 = bitcast i32* %i to i8*
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* %1) #2
+ br label %for.end
+
+for.body: ; preds = %for.body.lr.ph, %for.inc
+ %2 = load i32, i32* %addend, align 4, !tbaa !1
+ %conv = sitofp i32 %2 to double
+ %3 = load i32, i32* %i, align 4, !tbaa !1
+ %idxprom = sext i32 %3 to i64
+ %arrayidx = getelementptr inbounds double, double* %data, i64 %idxprom
+ %4 = load double, double* %arrayidx, align 8, !tbaa !5
+ %add = fadd double %4, %conv
+ store double %add, double* %arrayidx, align 8, !tbaa !5
+ %idxprom1 = sext i32 %3 to i64
+ %arrayidx2 = getelementptr inbounds double, double* %data, i64 %idxprom1
+ %5 = load i32, i32* %addend, align 4, !tbaa !1, !alias.scope !7, !noalias !10
+ %conv.i = sitofp i32 %5 to double
+ %6 = load double, double* %arrayidx2, align 8, !tbaa !5, !alias.scope !10, !noalias !7
+ %add.i = fadd double %6, %conv.i
+ store double %add.i, double* %arrayidx2, align 8, !tbaa !5, !alias.scope !10, !noalias !7
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %7 = load i32, i32* %i, align 4, !tbaa !1
+ %inc = add nsw i32 %7, 1
+ store i32 %inc, i32* %i, align 4, !tbaa !1
+ %cmp = icmp slt i32 %inc, 1000
+ br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
+
+for.end: ; preds = %for.cond.cleanup
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
+
+attributes #0 = { argmemonly nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 (llvm/trunk 299971)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"double", !3, i64 0}
+!7 = !{!8}
+!8 = distinct !{!8, !9, !"add: %addend"}
+!9 = distinct !{!9, !"add"}
+!10 = !{!11}
+!11 = distinct !{!11, !9, !"add: %data"}
diff --git a/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll b/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll
new file mode 100644
index 000000000000..b2930dc5f89e
--- /dev/null
+++ b/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll
@@ -0,0 +1,169 @@
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor -enable-falkor-hwpf-unroll-fix=0 | FileCheck %s --check-prefix=NOHWPF
+
+; Check that loop unroller doesn't exhaust HW prefetcher resources.
+
+; Partial unroll 2 times for this loop on falkor instead of 4.
+; NOHWPF-LABEL: @unroll1(
+; NOHWPF-LABEL: loop:
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: icmp
+; NOHWPF-NEXT: br
+; NOHWPF-NEXT-LABEL: exit:
+;
+; CHECK-LABEL: @unroll1(
+; CHECK-LABEL: loop:
+; CHECK-NEXT: phi
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+; CHECK-NEXT: br
+; CHECK-NEXT-LABEL: exit:
+define void @unroll1(i32* %p, i32* %p2) {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+
+ %gep = getelementptr inbounds i32, i32* %p, i32 %iv
+ %load = load volatile i32, i32* %gep
+
+ %gep2 = getelementptr inbounds i32, i32* %p2, i32 %iv
+ %load2 = load volatile i32, i32* %gep2
+
+ %inc = add i32 %iv, 1
+ %exitcnd = icmp uge i32 %inc, 1024
+ br i1 %exitcnd, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Partial unroll 4 times for this loop on falkor instead of 8.
+; NOHWPF-LABEL: @unroll2(
+; NOHWPF-LABEL: loop2:
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: icmp
+; NOHWPF-NEXT: br
+; NOHWPF-NEXT-LABEL: exit2:
+;
+; CHECK-LABEL: @unroll2(
+; CHECK-LABEL: loop2:
+; CHECK-NEXT: phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+; CHECK-NEXT: br
+; CHECK-NEXT-LABEL: exit2:
+
+define void @unroll2(i32* %p) {
+entry:
+ br label %loop1
+
+loop1:
+ %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
+ %outer.sum = phi i32 [ 0, %entry ], [ %sum, %loop1.latch ]
+ br label %loop2.header
+
+loop2.header:
+ br label %loop2
+
+loop2:
+ %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
+ %sum = phi i32 [ %outer.sum, %loop2.header ], [ %sum.inc, %loop2 ]
+ %gep = getelementptr inbounds i32, i32* %p, i32 %iv2
+ %load = load i32, i32* %gep
+ %sum.inc = add i32 %sum, %load
+ %inc2 = add i32 %iv2, 1
+ %exitcnd2 = icmp uge i32 %inc2, 1024
+ br i1 %exitcnd2, label %exit2, label %loop2
+
+exit2:
+ br label %loop1.latch
+
+loop1.latch:
+ %inc1 = add i32 %iv1, 1
+ %exitcnd1 = icmp uge i32 %inc1, 1024
+ br i1 %exitcnd2, label %exit, label %loop1
+
+exit:
+ ret void
+}
+
diff --git a/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
new file mode 100644
index 000000000000..1f31a133e34d
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
@@ -0,0 +1,279 @@
+; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -S| FileCheck %s
+; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine
+
+; the second RUN generates an epilog remainder block for all the test
+; cases below (it does not generate a loop).
+
+; test with three exiting and three exit blocks.
+; none of the exit blocks have successors
+define void @test1(i64 %trip, i1 %cond) {
+; CHECK-LABEL: test1
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
+; CHECK-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]]
+; CHECK: entry.new:
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TRIP]], [[XTRAITER]]
+; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
+; CHECK-LABEL: loop_latch.epil:
+; CHECK-NEXT: %epil.iter.sub = add i64 %epil.iter, -1
+; CHECK-NEXT: %epil.iter.cmp = icmp eq i64 %epil.iter.sub, 0
+; CHECK-NEXT: br i1 %epil.iter.cmp, label %exit2.loopexit.epilog-lcssa, label %loop_header.epil
+; CHECK-LABEL: loop_latch.7:
+; CHECK-NEXT: %niter.nsub.7 = add i64 %niter, -8
+; CHECK-NEXT: %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; CHECK-NEXT: br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header
+entry:
+ br label %loop_header
+
+loop_header:
+ %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+ br i1 %cond, label %loop_latch, label %loop_exiting_bb1
+
+loop_exiting_bb1:
+ br i1 false, label %loop_exiting_bb2, label %exit1
+
+loop_exiting_bb2:
+ br i1 false, label %loop_latch, label %exit3
+
+exit3:
+ ret void
+
+loop_latch:
+ %iv_next = add i64 %iv, 1
+ %cmp = icmp ne i64 %iv_next, %trip
+ br i1 %cmp, label %loop_header, label %exit2.loopexit
+
+exit1:
+ ret void
+
+exit2.loopexit:
+ ret void
+}
+
+
+; test with three exiting and two exit blocks.
+; The non-latch exit block has 2 unique predecessors.
+; There are 2 values passed to the exit blocks that are calculated at every iteration.
+; %sum.02 and %add. Both of these are incoming values for phi from every exiting
+; unrolled block.
+define i32 @test2(i32* nocapture %a, i64 %n) {
+; CHECK-LABEL: test2
+; CHECK-LABEL: for.exit2.loopexit:
+; CHECK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ],
+; CHECK-NEXT: br label %for.exit2
+; CHECK-LABEL: for.exit2.loopexit2:
+; CHECK-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
+; CHECK-NEXT: br label %for.exit2
+; CHECK-LABEL: for.exit2:
+; CHECK-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
+; CHECK-NEXT: ret i32 %retval
+; CHECK: %niter.nsub.7 = add i64 %niter, -8
+entry:
+ br label %header
+
+header:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ br i1 false, label %for.exit2, label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %for.exit2, label %for.body
+
+for.body:
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.02
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.end, label %header
+
+for.end: ; preds = %for.body
+ %sum.0.lcssa = phi i32 [ %add, %for.body ]
+ ret i32 %sum.0.lcssa
+
+for.exit2:
+ %retval = phi i32 [ %sum.02, %header ], [ 42, %for.exiting_block ]
+ ret i32 %retval
+}
+
+; test with two exiting and three exit blocks.
+; the non-latch exiting block has a switch.
+define void @test3(i64 %trip, i64 %add) {
+; CHECK-LABEL: test3
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
+; CHECK-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]]
+; CHECK: entry.new:
+; CHECK-NEXT: %unroll_iter = sub i64 [[TRIP]], [[XTRAITER]]
+; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
+; CHECK-LABEL: loop_header:
+; CHECK-NEXT: %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ]
+; CHECK-NEXT: %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
+; CHECK-LABEL: loop_exiting_bb1.7:
+; CHECK-NEXT: switch i64 %sum.next.6, label %loop_latch.7
+; CHECK-LABEL: loop_latch.7:
+; CHECK-NEXT: %sum.next.7 = add i64 %sum.next.6, %add
+; CHECK-NEXT: %niter.nsub.7 = add i64 %niter, -8
+; CHECK-NEXT: %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; CHECK-NEXT: br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header
+entry:
+ br label %loop_header
+
+loop_header:
+ %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+ %sum = phi i64 [ 0, %entry ], [ %sum.next, %loop_latch ]
+ br i1 undef, label %loop_latch, label %loop_exiting_bb1
+
+loop_exiting_bb1:
+ switch i64 %sum, label %loop_latch [
+ i64 24, label %exit1
+ i64 42, label %exit3
+ ]
+
+exit3:
+ ret void
+
+loop_latch:
+ %iv_next = add nuw nsw i64 %iv, 1
+ %sum.next = add i64 %sum, %add
+ %cmp = icmp ne i64 %iv_next, %trip
+ br i1 %cmp, label %loop_header, label %exit2.loopexit
+
+exit1:
+ ret void
+
+exit2.loopexit:
+ ret void
+}
+
+; FIXME: Support multiple exiting blocks to the same latch exit block.
+define i32 @test4(i32* nocapture %a, i64 %n, i1 %cond) {
+; CHECK-LABEL: test4
+; CHECK-NOT: .unr
+; CHECK-NOT: .epil
+entry:
+ br label %header
+
+header:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ br i1 %cond, label %for.end, label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %for.exit2, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.02
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.end, label %header
+
+for.end: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i32 [ 0, %header ], [ %add, %for.body ]
+ ret i32 %sum.0.lcssa
+
+for.exit2:
+ ret i32 42
+}
+
+; two exiting and two exit blocks.
+; the non-latch exiting block has duplicate edges to the non-latch exit block.
+define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
+; CHECK-LABEL: test5
+; CHECK-LABEL: exit1.loopexit:
+; CHECK-NEXT: %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ],
+; CHECK-NEXT: br label %exit1
+; CHECK-LABEL: exit1.loopexit2:
+; CHECK-NEXT: %ivy.epil = add i64 %iv.epil, %add
+; CHECK-NEXT: br label %exit1
+; CHECK-LABEL: exit1:
+; CHECK-NEXT: %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.epil, %exit1.loopexit2 ]
+; CHECK-NEXT: ret i64 %result
+; CHECK-LABEL: loop_latch.7:
+; CHECK: %niter.nsub.7 = add i64 %niter, -8
+entry:
+ br label %loop_header
+
+loop_header:
+ %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+ %sum = phi i64 [ 0, %entry ], [ %sum.next, %loop_latch ]
+ br i1 %cond, label %loop_latch, label %loop_exiting
+
+loop_exiting:
+ %ivy = add i64 %iv, %add
+ switch i64 %sum, label %loop_latch [
+ i64 24, label %exit1
+ i64 42, label %exit1
+ ]
+
+loop_latch:
+ %iv_next = add nuw nsw i64 %iv, 1
+ %sum.next = add i64 %sum, %add
+ %cmp = icmp ne i64 %iv_next, %trip
+ br i1 %cmp, label %loop_header, label %latchexit
+
+exit1:
+ %result = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ]
+ ret i64 %result
+
+latchexit:
+ ret i64 %sum.next
+}
+
+; test when exit blocks have successors.
+define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
+; CHECK-LABEL: test6
+; CHECK-LABEL: for.exit2.loopexit:
+; CHECK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ],
+; CHECK-NEXT: br label %for.exit2
+; CHECK-LABEL: for.exit2.loopexit2:
+; CHECK-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
+; CHECK-NEXT: br label %for.exit2
+; CHECK-LABEL: for.exit2:
+; CHECK-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
+; CHECK-NEXT: br i1 %cond, label %exit_true, label %exit_false
+; CHECK-LABEL: latch.7:
+; CHECK: %niter.nsub.7 = add i64 %niter, -8
+entry:
+ br label %header
+
+header:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+ %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+ br i1 false, label %for.exit2, label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %for.exit2, label %latch
+
+latch:
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %load = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %load, %sum.02
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %latch_exit, label %header
+
+latch_exit:
+ %sum.0.lcssa = phi i32 [ %add, %latch ]
+ ret i32 %sum.0.lcssa
+
+for.exit2:
+ %retval = phi i32 [ %sum.02, %header ], [ 42, %for.exiting_block ]
+ %addx = add i32 %retval, %x
+ br i1 %cond, label %exit_true, label %exit_false
+
+exit_true:
+ ret i32 %retval
+
+exit_false:
+ ret i32 %addx
+}
diff --git a/test/Transforms/LoopUnroll/unroll-maxcount.ll b/test/Transforms/LoopUnroll/unroll-maxcount.ll
new file mode 100644
index 000000000000..4cbd757aec22
--- /dev/null
+++ b/test/Transforms/LoopUnroll/unroll-maxcount.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -S -loop-unroll -unroll-allow-partial -unroll-max-count=1 | FileCheck %s
+; Checks that unroll MaxCount is honored.
+;
+; CHECK-LABEL: @foo(
+; CHECK-LABEL: for.body:
+; CHECK-NEXT: phi
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: store
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+; CHECK-NEXT: br
+define void @foo(i32* nocapture %a) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
index 8d139ac7e5af..46fd022af665 100644
--- a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
@@ -3,10 +3,11 @@
; CHECK: LV: Loop hints: force=enabled
; CHECK: LV: Loop hints: force=?
+; CHECK: LV: Loop hints: force=?
; No more loops in the module
; CHECK-NOT: LV: Loop hints: force=
-; CHECK: 2 loop-vectorize - Number of loops analyzed for vectorization
-; CHECK: 1 loop-vectorize - Number of loops vectorized
+; CHECK: 3 loop-vectorize - Number of loops analyzed for vectorization
+; CHECK: 2 loop-vectorize - Number of loops vectorized
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@@ -71,3 +72,29 @@ for.end:
!3 = !{!3}
+;
+; This loop will be vectorized as the trip count is below the threshold but no
+; scalar iterations are needed.
+;
+define void @vectorized2(float* noalias nocapture %A, float* noalias nocapture readonly %B) {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
+ %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ %1 = load float, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
+ %add = fadd fast float %0, %1
+ store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 16
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
+
+for.end:
+ ret void
+}
+
+!4 = !{!4}
+
diff --git a/test/Transforms/LoopVectorize/first-order-recurrence.ll b/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 3d1c78038e32..0ff94c1450ac 100644
--- a/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -2,6 +2,8 @@
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s --check-prefix=UNROLL
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC
; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S | FileCheck %s --check-prefix=UNROLL-NO-VF
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s --check-prefix=SINK-AFTER
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s --check-prefix=NO-SINK-AFTER
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
@@ -295,14 +297,14 @@ for.cond.cleanup3:
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load i32, i32* {{.*}}
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = load i32, i32* {{.*}}
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = load i32, i32* {{.*}}
-; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = load i32, i32* {{.*}}
-; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = load i32, i32* {{.*}}
-; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = load i32, i32* {{.*}}
-; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = load i32, i32* {{.*}}
; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> undef, i32 [[TMP27]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = insertelement <4 x i32> [[TMP35]], i32 [[TMP28]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP36]], i32 [[TMP29]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP30]], i32 3
+; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = load i32, i32* {{.*}}
+; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = load i32, i32* {{.*}}
+; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = load i32, i32* {{.*}}
+; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = load i32, i32* {{.*}}
; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = insertelement <4 x i32> undef, i32 [[TMP31]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP32]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP33]], i32 2
@@ -396,3 +398,132 @@ for.body:
for.end:
ret i32 %val.phi
}
+
+; We vectorize this first order recurrence, with a set of insertelements for
+; each unrolled part. Make sure these insertelements are generated in-order,
+; because the shuffle of the first order recurrence will be added after the
+; insertelement of the last part UF - 1, assuming the latter appears after the
+; insertelements of all other parts.
+;
+; int PR33613(double *b, double j, int d) {
+; int a = 0;
+; for(int i = 0; i < 10240; i++, b+=25) {
+; double f = b[d]; // Scalarize to form insertelements
+; if (j * f)
+; a++;
+; j = f;
+; }
+; return a;
+; }
+;
+; UNROLL-NO-IC-LABEL: @PR33613(
+; UNROLL-NO-IC: vector.body:
+; UNROLL-NO-IC: [[VECTOR_RECUR:%.*]] = phi <4 x double>
+; UNROLL-NO-IC: shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> {{.*}}, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT: shufflevector <4 x double> {{.*}}, <4 x double> {{.*}}, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NOT: insertelement <4 x double>
+; UNROLL-NO-IC: middle.block:
+;
+define i32 @PR33613(double* %b, double %j, i32 %d) {
+entry:
+ %idxprom = sext i32 %d to i64
+ br label %for.body
+
+for.cond.cleanup:
+ %a.1.lcssa = phi i32 [ %a.1, %for.body ]
+ ret i32 %a.1.lcssa
+
+for.body:
+ %b.addr.012 = phi double* [ %b, %entry ], [ %add.ptr, %for.body ]
+ %i.011 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
+ %a.010 = phi i32 [ 0, %entry ], [ %a.1, %for.body ]
+ %j.addr.09 = phi double [ %j, %entry ], [ %0, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b.addr.012, i64 %idxprom
+ %0 = load double, double* %arrayidx, align 8
+ %mul = fmul double %j.addr.09, %0
+ %tobool = fcmp une double %mul, 0.000000e+00
+ %inc = zext i1 %tobool to i32
+ %a.1 = add nsw i32 %a.010, %inc
+ %inc1 = add nuw nsw i32 %i.011, 1
+ %add.ptr = getelementptr inbounds double, double* %b.addr.012, i64 25
+ %exitcond = icmp eq i32 %inc1, 10240
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; void sink_after(short *a, int n, int *b) {
+; for(int i = 0; i < n; i++)
+; b[i] = (a[i] * a[i + 1]);
+; }
+;
+; SINK-AFTER-LABEL: sink_after
+; Check that the sext sank after the load in the vector loop.
+; SINK-AFTER: vector.body
+; SINK-AFTER: %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ %wide.load, %vector.body ]
+; SINK-AFTER: %wide.load = load <4 x i16>
+; SINK-AFTER: %[[VSHUF:.+]] = shufflevector <4 x i16> %vector.recur, <4 x i16> %wide.load, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; SINK-AFTER: %[[VCONV:.+]] = sext <4 x i16> %[[VSHUF]] to <4 x i32>
+; SINK-AFTER: %[[VCONV3:.+]] = sext <4 x i16> %wide.load to <4 x i32>
+; SINK-AFTER: mul nsw <4 x i32> %[[VCONV3]], %[[VCONV]]
+; Check also that the sext sank after the load in the scalar loop.
+; SINK-AFTER: for.body
+; SINK-AFTER: %scalar.recur = phi i16 [ %scalar.recur.init, %scalar.ph ], [ %[[LOAD:.+]], %for.body ]
+; SINK-AFTER: %[[LOAD]] = load i16, i16* %arrayidx2
+; SINK-AFTER: %[[CONV:.+]] = sext i16 %scalar.recur to i32
+; SINK-AFTER: %[[CONV3:.+]] = sext i16 %[[LOAD]] to i32
+; SINK-AFTER: %mul = mul nsw i32 %[[CONV3]], %[[CONV]]
+;
+define void @sink_after(i16* %a, i32* %b, i64 %n) {
+entry:
+ %.pre = load i16, i16* %a
+ br label %for.body
+
+for.body:
+ %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %conv = sext i16 %0 to i32
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds i16, i16* %a, i64 %indvars.iv.next
+ %1 = load i16, i16* %arrayidx2
+ %conv3 = sext i16 %1 to i32
+ %mul = mul nsw i32 %conv3, %conv
+ %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ store i32 %mul, i32* %arrayidx5
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+; void no_sink_after(short *a, int n, int *b) {
+; for(int i = 0; i < n; i++)
+; b[i] = ((a[i] + 2) * a[i + 1]);
+; }
+;
+; NO-SINK-AFTER-LABEL: no_sink_after
+; NO-SINK-AFTER-NOT: vector.ph:
+; NO-SINK-AFTER: }
+;
+define void @no_sink_after(i16* %a, i32* %b, i64 %n) {
+entry:
+ %.pre = load i16, i16* %a
+ br label %for.body
+
+for.body:
+ %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %conv = sext i16 %0 to i32
+ %add = add nsw i32 %conv, 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds i16, i16* %a, i64 %indvars.iv.next
+ %1 = load i16, i16* %arrayidx2
+ %conv3 = sext i16 %1 to i32
+ %mul = mul nsw i32 %add, %conv3
+ %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ store i32 %mul, i32* %arrayidx5
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
index d3a16e2075d1..ad50e0b00fc6 100644
--- a/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -18,7 +18,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
;CHECK-LABEL: @function0(
;CHECK: load <4 x i32>
-;CHECK: icmp sle <4 x i32>
+;CHECK: icmp sgt <4 x i32>
;CHECK: mul <4 x i32>
;CHECK: add <4 x i32>
;CHECK: select <4 x i1>
diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll
index 19a401213fd5..fd5ad7c38b09 100644
--- a/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -244,7 +244,7 @@ for.end:
; SGE -> SLT
; Turn this into a min reduction (select inputs are reversed).
; CHECK-LABEL: @sge_min_red(
-; CHECK: icmp sge <2 x i32>
+; CHECK: icmp slt <2 x i32>
; CHECK: select <2 x i1>
; CHECK: middle.block
; CHECK: icmp slt <2 x i32>
@@ -273,7 +273,7 @@ for.end:
; SLE -> SGT
; Turn this into a max reduction (select inputs are reversed).
; CHECK-LABEL: @sle_min_red(
-; CHECK: icmp sle <2 x i32>
+; CHECK: icmp sgt <2 x i32>
; CHECK: select <2 x i1>
; CHECK: middle.block
; CHECK: icmp sgt <2 x i32>
@@ -302,7 +302,7 @@ for.end:
; UGE -> ULT
; Turn this into a min reduction (select inputs are reversed).
; CHECK-LABEL: @uge_min_red(
-; CHECK: icmp uge <2 x i32>
+; CHECK: icmp ult <2 x i32>
; CHECK: select <2 x i1>
; CHECK: middle.block
; CHECK: icmp ult <2 x i32>
@@ -331,7 +331,7 @@ for.end:
; ULE -> UGT
; Turn this into a max reduction (select inputs are reversed).
; CHECK-LABEL: @ule_min_red(
-; CHECK: icmp ule <2 x i32>
+; CHECK: icmp ugt <2 x i32>
; CHECK: select <2 x i1>
; CHECK: middle.block
; CHECK: icmp ugt <2 x i32>
diff --git a/test/Transforms/LoopVectorize/small-loop.ll b/test/Transforms/LoopVectorize/small-loop.ll
index 9a5dc4aa1b74..378283b464b9 100644
--- a/test/Transforms/LoopVectorize/small-loop.ll
+++ b/test/Transforms/LoopVectorize/small-loop.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
@c = common global [2048 x i32] zeroinitializer, align 16
;CHECK-LABEL: @example1(
-;CHECK-NOT: load <4 x i32>
+;CHECK: load <4 x i32>
;CHECK: ret void
define void @example1() nounwind uwtable ssp {
br label %1
@@ -23,8 +23,8 @@ define void @example1() nounwind uwtable ssp {
store i32 %6, i32* %7, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp eq i32 %lftr.wideiv, 8 ; <----- A really small trip count.
- br i1 %exitcond, label %8, label %1
+ %exitcond = icmp eq i32 %lftr.wideiv, 8 ; <----- A really small trip count
+ br i1 %exitcond, label %8, label %1 ; w/o scalar iteration overhead.
; <label>:8 ; preds = %1
ret void
diff --git a/test/Transforms/LowerTypeTests/export-icall.ll b/test/Transforms/LowerTypeTests/export-icall.ll
index f53b63af496c..ad3604899306 100644
--- a/test/Transforms/LowerTypeTests/export-icall.ll
+++ b/test/Transforms/LowerTypeTests/export-icall.ll
@@ -60,6 +60,11 @@ declare !type !8 void @f(i32 %x)
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: WPDRes:
-; SUMMARY: CfiFunctionDefs: [ f, g, h ]
-; SUMMARY-NEXT: CfiFunctionDecls: [ external, external_weak ]
+; SUMMARY: CfiFunctionDefs:
+; SUMMARY-NEXT: - f
+; SUMMARY-NEXT: - g
+; SUMMARY-NEXT: - h
+; SUMMARY-NEXT: CfiFunctionDecls:
+; SUMMARY-NEXT: - external
+; SUMMARY-NEXT: - external_weak
; SUMMARY-NEXT: ...
diff --git a/test/Transforms/Reassociate/erase_inst_made_change.ll b/test/Transforms/Reassociate/erase_inst_made_change.ll
new file mode 100644
index 000000000000..febb9447e2b4
--- /dev/null
+++ b/test/Transforms/Reassociate/erase_inst_made_change.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -inline -reassociate -S | FileCheck %s
+
+; This test case exposed a bug in reassociate where EraseInst's
+; removal of a dead call wasn't recognized as changing the IR.
+; So when runOnFunction propagated the "made changes" upwards
+; to the CallGraphSCCPass it signalled that no changes had been
+; made, so CallGraphSCCPass assumed that the old CallGraph,
+; as known by that pass manager, still was up-to-date.
+;
+; This was detected as an assert when trying to remove the
+; no longer used function 'bar' (due to incorrect reference
+; count in the CallGraph).
+
+define void @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret void
+entry:
+ call void @bar()
+ ret void
+}
+
+define internal void @bar() noinline nounwind readnone {
+; CHECK-NOT: bar
+entry:
+ ret void
+}
+
+
diff --git a/test/Transforms/SLPVectorizer/X86/limit.ll b/test/Transforms/SLPVectorizer/X86/limit.ll
new file mode 100644
index 000000000000..41db490a754f
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/limit.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s --instcombine -slp-vectorizer -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@b = common global [4 x i32] zeroinitializer, align 16
+@c = common global [4 x i32] zeroinitializer, align 16
+@d = common global [4 x i32] zeroinitializer, align 16
+@e = common global [4 x i32] zeroinitializer, align 16
+@a = common global [4 x i32] zeroinitializer, align 16
+@fb = common global [4 x float] zeroinitializer, align 16
+@fc = common global [4 x float] zeroinitializer, align 16
+@fa = common global [4 x float] zeroinitializer, align 16
+@fd = common global [4 x float] zeroinitializer, align 16
+
+define void @addsub() {
+; CHECK-LABEL: @addsub(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 16
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @c to <4 x i32>*), align 16
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @d to <4 x i32>*), align 16
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @e to <4 x i32>*), align 16
+; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <4 x i32> [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4
+ %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4
+ %add = add nsw i32 %0, %1
+ br label %bb1
+bb1: ; preds = %entry
+ %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4
+ %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4
+ %add1 = add nsw i32 %2, %3
+ %add2 = add nsw i32 %add, %add1
+ store i32 %add2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4
+ %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4
+ %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4
+ %add3 = add nsw i32 %4, %5
+ %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4
+ %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4
+ %add4 = add nsw i32 %6, %7
+ %sub = sub nsw i32 %add3, %add4
+ store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4
+ %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4
+ %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4
+ %add5 = add nsw i32 %8, %9
+ %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4
+ %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4
+ %add6 = add nsw i32 %10, %11
+ %add7 = add nsw i32 %add5, %add6
+ store i32 %add7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4
+ %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4
+ %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4
+ %add8 = add nsw i32 %12, %13
+ %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4
+ %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4
+ %add9 = add nsw i32 %14, %15
+ %sub10 = sub nsw i32 %add8, %add9
+ store i32 %sub10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4
+ ret void
+}
+
diff --git a/test/Transforms/SROA/alloca-address-space.ll b/test/Transforms/SROA/alloca-address-space.ll
index 6b3b3abbff5f..9d9f78f07ca1 100644
--- a/test/Transforms/SROA/alloca-address-space.ll
+++ b/test/Transforms/SROA/alloca-address-space.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -sroa -S | FileCheck %s
-target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64-A2"
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64-A2"
declare void @llvm.memcpy.p2i8.p2i8.i32(i8 addrspace(2)* nocapture, i8 addrspace(2)* nocapture readonly, i32, i32, i1)
declare void @llvm.memcpy.p1i8.p2i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture readonly, i32, i32, i1)
@@ -82,3 +82,32 @@ define void @pr27557() {
store i32 addrspace(3)* @l, i32 addrspace(3)* addrspace(2)* %3, align 8
ret void
}
+
+; Test load from and store to non-zero address space.
+define void @test_load_store_diff_addr_space([2 x float] addrspace(1)* %complex1, [2 x float] addrspace(1)* %complex2) {
+; CHECK-LABEL: @test_load_store_diff_addr_space
+; CHECK-NOT: alloca
+; CHECK: load i32, i32 addrspace(1)*
+; CHECK: load i32, i32 addrspace(1)*
+; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
+; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
+ %a0 = alloca [2 x i64], align 8, addrspace(2)
+ %a = getelementptr [2 x i64], [2 x i64] addrspace(2)* %a0, i32 0, i32 0
+ %a.cast = bitcast i64 addrspace(2)* %a to [2 x float] addrspace(2)*
+ %a.gep1 = getelementptr [2 x float], [2 x float] addrspace(2)* %a.cast, i32 0, i32 0
+ %a.gep2 = getelementptr [2 x float], [2 x float] addrspace(2)* %a.cast, i32 0, i32 1
+ %complex1.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex1, i32 0, i32 0
+ %p1 = bitcast float addrspace(1)* %complex1.gep to i64 addrspace(1)*
+ %v1 = load i64, i64 addrspace(1)* %p1
+ store i64 %v1, i64 addrspace(2)* %a
+ %f1 = load float, float addrspace(2)* %a.gep1
+ %f2 = load float, float addrspace(2)* %a.gep2
+ %sum = fadd float %f1, %f2
+ store float %sum, float addrspace(2)* %a.gep1
+ store float %sum, float addrspace(2)* %a.gep2
+ %v2 = load i64, i64 addrspace(2)* %a
+ %complex2.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex2, i32 0, i32 0
+ %p2 = bitcast float addrspace(1)* %complex2.gep to i64 addrspace(1)*
+ store i64 %v2, i64 addrspace(1)* %p2
+ ret void
+}
diff --git a/test/Transforms/SROA/preserve-nonnull.ll b/test/Transforms/SROA/preserve-nonnull.ll
index fc5ce6a445fa..a29da6dc2c37 100644
--- a/test/Transforms/SROA/preserve-nonnull.ll
+++ b/test/Transforms/SROA/preserve-nonnull.ll
@@ -3,24 +3,90 @@
; Make sure that SROA doesn't lose nonnull metadata
; on loads from allocas that get optimized out.
-; CHECK-LABEL: define float* @yummy_nonnull
-; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
-; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
-; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
-; CHECK: ret float* {{.*}}[[RETURN]]
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
-define float* @yummy_nonnull(float** %arg) {
-entry-block:
- %buf = alloca float*
+; Check that we do basic propagation of nonnull when rewriting.
+define i8* @propagate_nonnull(i32* %v) {
+; CHECK-LABEL: define i8* @propagate_nonnull(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[A:.*]] = alloca i8*
+; CHECK-NEXT: %[[V_CAST:.*]] = bitcast i32* %v to i8*
+; CHECK-NEXT: store i8* %[[V_CAST]], i8** %[[A]]
+; CHECK-NEXT: %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], !nonnull !0
+; CHECK-NEXT: ret i8* %[[LOAD]]
+entry:
+ %a = alloca [2 x i8*]
+ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
+ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
+ %a.gep0.cast = bitcast i8** %a.gep0 to i32**
+ %a.gep1.cast = bitcast i8** %a.gep1 to i32**
+ store i32* %v, i32** %a.gep1.cast
+ store i32* null, i32** %a.gep0.cast
+ %load = load volatile i8*, i8** %a.gep1, !nonnull !0
+ ret i8* %load
+}
- %_arg_i8 = bitcast float** %arg to i8*
- %_buf_i8 = bitcast float** %buf to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
+define float* @turn_nonnull_into_assume(float** %arg) {
+; CHECK-LABEL: define float* @turn_nonnull_into_assume(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[RETURN:.*]] = load float*, float** %arg, align 8
+; CHECK-NEXT: %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
+; CHECK-NEXT: call void @llvm.assume(i1 %[[ASSUME]])
+; CHECK-NEXT: ret float* %[[RETURN]]
+entry:
+ %buf = alloca float*
+ %_arg_i8 = bitcast float** %arg to i8*
+ %_buf_i8 = bitcast float** %buf to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
+ %ret = load float*, float** %buf, align 8, !nonnull !0
+ ret float* %ret
+}
- %ret = load float*, float** %buf, align 8, !nonnull !0
- ret float* %ret
+; Make sure we properly handle the !nonnull attribute when we convert
+; a pointer load to an integer load.
+; FIXME: While this doesn't do anythnig actively harmful today, it really
+; should propagate the !nonnull metadata to range metadata. The irony is, it
+; *does* initially, but then we lose that !range metadata before we finish
+; SROA.
+define i8* @propagate_nonnull_to_int() {
+; CHECK-LABEL: define i8* @propagate_nonnull_to_int(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[A:.*]] = alloca i64
+; CHECK-NEXT: store i64 42, i64* %[[A]]
+; CHECK-NEXT: %[[LOAD:.*]] = load volatile i64, i64* %[[A]]
+; CHECK-NEXT: %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8*
+; CHECK-NEXT: ret i8* %[[CAST]]
+entry:
+ %a = alloca [2 x i8*]
+ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
+ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
+ %a.gep0.cast = bitcast i8** %a.gep0 to i64*
+ %a.gep1.cast = bitcast i8** %a.gep1 to i64*
+ store i64 42, i64* %a.gep1.cast
+ store i64 0, i64* %a.gep0.cast
+ %load = load volatile i8*, i8** %a.gep1, !nonnull !0
+ ret i8* %load
}
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+; Make sure we properly handle the !nonnull attribute when we convert
+; a pointer load to an integer load and immediately promote it to an SSA
+; register. This can fail in interesting ways due to the rewrite iteration of
+; SROA, resulting in PR32902.
+define i8* @propagate_nonnull_to_int_and_promote() {
+; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8*
+; CHECK-NEXT: ret i8* %[[PROMOTED_VALUE]]
+entry:
+ %a = alloca [2 x i8*], align 8
+ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
+ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
+ %a.gep0.cast = bitcast i8** %a.gep0 to i64*
+ %a.gep1.cast = bitcast i8** %a.gep1 to i64*
+ store i64 42, i64* %a.gep1.cast
+ store i64 0, i64* %a.gep0.cast
+ %load = load i8*, i8** %a.gep1, align 8, !nonnull !0
+ ret i8* %load
+}
!0 = !{}
diff --git a/test/Transforms/SimplifyCFG/Hexagon/lit.local.cfg b/test/Transforms/SimplifyCFG/Hexagon/lit.local.cfg
new file mode 100644
index 000000000000..a1f0ecbf6792
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/Hexagon/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Hexagon' in targets:
+ config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll b/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
new file mode 100644
index 000000000000..4bc1251572aa
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
@@ -0,0 +1,62 @@
+; RUN: opt -S -O2 < %s | FileCheck %s -check-prefix=ENABLE
+; RUN: opt -S -hexagon-emit-lookup-tables=true -O2 < %s | FileCheck %s -check-prefix=ENABLE
+; RUN: opt -S -hexagon-emit-lookup-tables=false -O2 < %s | FileCheck %s -check-prefix=DISABLE
+
+
+; ENABLE: @{{.*}} = private unnamed_addr constant [6 x i32] [i32 9, i32 20, i32 14, i32 22, i32 12, i32 5]
+; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [6 x i32] [i32 9, i32 20, i32 14, i32 22, i32 12, i32 5]
+; DISABLE : = phi i32 [ 19, %{{.*}} ], [ 5, %{{.*}} ], [ 12, %{{.*}} ], [ 22, %{{.*}} ], [ 14, %{{.*}} ], [ 20, %{{.*}} ], [ 9, %{{.*}} ]
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon-unknown--elf"
+
+; Function Attrs: noinline nounwind
+define i32 @foo(i32 %x) #0 section ".tcm_text" {
+entry:
+ %retval = alloca i32, align 4
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ %0 = load i32, i32* %x.addr, align 4
+ switch i32 %0, label %sw.default [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ i32 4, label %sw.bb4
+ i32 5, label %sw.bb5
+ ]
+
+sw.bb: ; preds = %entry
+ store i32 9, i32* %retval, align 4
+ br label %return
+
+sw.bb1: ; preds = %entry
+ store i32 20, i32* %retval, align 4
+ br label %return
+
+sw.bb2: ; preds = %entry
+ store i32 14, i32* %retval, align 4
+ br label %return
+
+sw.bb3: ; preds = %entry
+ store i32 22, i32* %retval, align 4
+ br label %return
+
+sw.bb4: ; preds = %entry
+ store i32 12, i32* %retval, align 4
+ br label %return
+
+sw.bb5: ; preds = %entry
+ store i32 5, i32* %retval, align 4
+ br label %return
+
+sw.default: ; preds = %entry
+ store i32 19, i32* %retval, align 4
+ br label %return
+
+return: ; preds = %sw.default, %sw.bb5, %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
+ %1 = load i32, i32* %retval, align 4
+ ret i32 %1
+}
+
+attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll b/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
index ae6ff6d10bcf..e335c4078651 100644
--- a/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-darwin12.0.0"
; CHECK: entry:
; CHECK-NEXT: sub i3 %arg, -4
; CHECK-NEXT: zext i3 %switch.tableidx to i4
-; CHECK-NEXT: getelementptr inbounds [8 x i64], [8 x i64]* @switch.table, i32 0, i4 %switch.tableidx.zext
+; CHECK-NEXT: getelementptr inbounds [8 x i64], [8 x i64]* @switch.table.test, i32 0, i4 %switch.tableidx.zext
; CHECK-NEXT: load i64, i64* %switch.gep
; CHECK-NEXT: add i64
; CHECK-NEXT: ret i64
diff --git a/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll b/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
index 734312bc7285..bd4e03cf9182 100644
--- a/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-darwin12.0.0"
; CHECK: entry:
; CHECK-NEXT: sub i2 %0, -2
; CHECK-NEXT: zext i2 %switch.tableidx to i3
-; CHECK-NEXT: getelementptr inbounds [4 x i64], [4 x i64]* @switch.table, i32 0, i3 %switch.tableidx.zext
+; CHECK-NEXT: getelementptr inbounds [4 x i64], [4 x i64]* @switch.table._TFO6reduce1E5toRawfS0_FT_Si, i32 0, i3 %switch.tableidx.zext
; CHECK-NEXT: load i64, i64* %switch.gep
; CHECK-NEXT: ret i64 %switch.load
define i64 @_TFO6reduce1E5toRawfS0_FT_Si(i2) {
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 4b9227b029ec..656a276969f3 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -4,25 +4,25 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-unknown-linux-gnu"
; The table for @f
-; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
+; CHECK: @switch.table.f = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
; The float table for @h
-; CHECK: @switch.table.1 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
+; CHECK: @switch.table.h = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
; The table for @foostring
-; CHECK: @switch.table.2 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str3, i64 0, i64 0)]
+; CHECK: @switch.table.foostring = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str3, i64 0, i64 0)]
; The table for @earlyreturncrash
-; CHECK: @switch.table.3 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5]
+; CHECK: @switch.table.earlyreturncrash = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5]
-; The table for @large.
-; CHECK: @switch.table.4 = private unnamed_addr constant [199 x i32] [i32 1, i32 4, i32 9,
+; The table for @large
+; CHECK: @switch.table.large = private unnamed_addr constant [199 x i32] [i32 1, i32 4, i32 9,
; The table for @cprop
-; CHECK: @switch.table.5 = private unnamed_addr constant [7 x i32] [i32 5, i32 42, i32 126, i32 -452, i32 128, i32 6, i32 7]
+; CHECK: @switch.table.cprop = private unnamed_addr constant [7 x i32] [i32 5, i32 42, i32 126, i32 -452, i32 128, i32 6, i32 7]
; The table for @unreachable_case
-; CHECK: @switch.table.6 = private unnamed_addr constant [9 x i32] [i32 0, i32 0, i32 0, i32 2, i32 -1, i32 1, i32 1, i32 1, i32 1]
+; CHECK: @switch.table.unreachable_case = private unnamed_addr constant [9 x i32] [i32 0, i32 0, i32 0, i32 2, i32 -1, i32 1, i32 1, i32 1, i32 1]
; A simple int-to-int selection switch.
; It is dense enough to be replaced by table lookup.
@@ -58,7 +58,7 @@ return:
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 7
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.f, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i32, i32* %switch.gep
; CHECK-NEXT: ret i32 %switch.load
; CHECK: return:
@@ -97,7 +97,7 @@ sw.epilog:
; CHECK-NEXT: %switch.shiftamt = mul i32 %switch.tableidx, 8
; CHECK-NEXT: %switch.downshift = lshr i32 89655594, %switch.shiftamt
; CHECK-NEXT: %switch.masked = trunc i32 %switch.downshift to i8
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x float], [4 x float]* @switch.table.1, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x float], [4 x float]* @switch.table.h, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load float, float* %switch.gep
; CHECK-NEXT: br label %sw.epilog
; CHECK: sw.epilog:
@@ -144,7 +144,7 @@ return:
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*], [4 x i8*]* @switch.table.2, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*], [4 x i8*]* @switch.table.foostring, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i8*, i8** %switch.gep
; CHECK-NEXT: ret i8* %switch.load
}
@@ -173,7 +173,7 @@ sw.epilog:
; CHECK-LABEL: @earlyreturncrash(
; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.3, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.earlyreturncrash, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i32, i32* %switch.gep
; CHECK-NEXT: ret i32 %switch.load
; CHECK: sw.epilog:
@@ -749,7 +749,7 @@ return:
; CHECK-LABEL: @cprop(
; CHECK: switch.lookup:
-; CHECK: %switch.gep = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.5, i32 0, i32 %switch.tableidx
+; CHECK: %switch.gep = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.cprop, i32 0, i32 %switch.tableidx
}
define i32 @unreachable_case(i32 %x) {
@@ -778,7 +778,7 @@ return:
; CHECK-LABEL: @unreachable_case(
; CHECK: switch.lookup:
-; CHECK: getelementptr inbounds [9 x i32], [9 x i32]* @switch.table.6, i32 0, i32 %switch.tableidx
+; CHECK: getelementptr inbounds [9 x i32], [9 x i32]* @switch.table.unreachable_case, i32 0, i32 %switch.tableidx
}
define i32 @unreachable_default(i32 %x) {
@@ -805,7 +805,7 @@ return:
; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
; CHECK-NOT: icmp
; CHECK-NOT: br 1i
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.7, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.unreachable_default, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i32, i32* %switch.gep
; CHECK-NEXT: ret i32 %switch.load
}
@@ -919,7 +919,7 @@ define i32 @threecases(i32 %c) {
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 3
; CHECK-NEXT: br i1 [[TMP0]], label %switch.lookup, label %return
; CHECK: switch.lookup:
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @switch.table.10, i32 0, i32 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @switch.table.threecases, i32 0, i32 [[SWITCH_TABLEIDX]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
; CHECK: return:
diff --git a/test/tools/llvm-cvtres/symbols.test b/test/tools/llvm-cvtres/symbols.test
new file mode 100644
index 000000000000..2ca3a193ac40
--- /dev/null
+++ b/test/tools/llvm-cvtres/symbols.test
@@ -0,0 +1,33 @@
+// Check COFF emission of cvtres
+// The input was generated with the following command, using the original Windows
+// rc.exe:
+// > rc /fo test_resource.res /nologo test_resource.rc
+// The object file we are comparing against was generated with this command using
+// the original Windows cvtres.exe.
+// > cvtres /machine:X86 /readonly /nologo /out:test_resource.obj.coff \
+// test_resource.res
+
+RUN: llvm-cvtres /verbose /out:%t %p/Inputs/test_resource.res
+RUN: llvm-readobj -symbols %t | FileCheck %s
+
+CHECK: Name: $R000000
+CHECK-NEXT: Value: 0
+CHECK-NEXT: Section: .rsrc$02
+CHECK: Name: $R000018
+CHECK-NEXT: Value: 24
+CHECK-NEXT: Section: .rsrc$02
+CHECK: Name: $R000340
+CHECK-NEXT: Value: 832
+CHECK-NEXT: Section: .rsrc$02
+CHECK: Name: $R000668
+CHECK-NEXT: Value: 1640
+CHECK-NEXT: Section: .rsrc$02
+CHECK: Name: $R000698
+CHECK-NEXT: Value: 1688
+CHECK-NEXT: Section: .rsrc$02
+CHECK: Name: $R000720
+CHECK-NEXT: Value: 1824
+CHECK-NEXT: Section: .rsrc$02
+CHECK: Name: $R000750
+CHECK-NEXT: Value: 1872
+CHECK-NEXT: Section: .rsrc$02
diff --git a/test/tools/llvm-dwarfdump/X86/apple_names_verify_buckets.s b/test/tools/llvm-dwarfdump/X86/apple_names_verify_buckets.s
deleted file mode 100644
index 7b61a946281b..000000000000
--- a/test/tools/llvm-dwarfdump/X86/apple_names_verify_buckets.s
+++ /dev/null
@@ -1,192 +0,0 @@
-# RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o - \
-# RUN: | not llvm-dwarfdump -verify - \
-# RUN: | FileCheck %s
-
-# CHECK: Verifying .apple_names...
-# CHECK-NEXT: error: Bucket[0] has invalid hash index: [-2]
-
-# This test is meant to verify that the -verify option
-# in llvm-dwarfdump, correctly identifies
-# an invalid hash index for bucket[0] in the .apple_names section.
-
- .section __TEXT,__text,regular,pure_instructions
- .file 1 "basic.c"
- .comm _i,4,2 ## @i
- .section __DWARF,__debug_str,regular,debug
-Linfo_string:
- .asciz "basic.c" ## string offset=42
- .asciz "i" ## string offset=84
- .asciz "int" ## string offset=86
- .section __DWARF,__debug_loc,regular,debug
-Lsection_debug_loc:
- .section __DWARF,__debug_abbrev,regular,debug
-Lsection_abbrev:
- .byte 1 ## Abbreviation Code
- .byte 17 ## DW_TAG_compile_unit
- .byte 1 ## DW_CHILDREN_yes
- .byte 37 ## DW_AT_producer
- .byte 14 ## DW_FORM_strp
- .byte 19 ## DW_AT_language
- .byte 5 ## DW_FORM_data2
- .byte 3 ## DW_AT_name
- .byte 14 ## DW_FORM_strp
- .byte 16 ## DW_AT_stmt_list
- .byte 23 ## DW_FORM_sec_offset
- .byte 27 ## DW_AT_comp_dir
- .byte 14 ## DW_FORM_strp
- .byte 0 ## EOM(1)
- .byte 0 ## EOM(2)
- .byte 2 ## Abbreviation Code
- .byte 52 ## DW_TAG_variable
- .byte 0 ## DW_CHILDREN_no
- .byte 3 ## DW_AT_name
- .byte 14 ## DW_FORM_strp
- .byte 73 ## DW_AT_type
- .byte 19 ## DW_FORM_ref4
- .byte 63 ## DW_AT_external
- .byte 25 ## DW_FORM_flag_present
- .byte 58 ## DW_AT_decl_file
- .byte 11 ## DW_FORM_data1
- .byte 59 ## DW_AT_decl_line
- .byte 11 ## DW_FORM_data1
- .byte 2 ## DW_AT_location
- .byte 24 ## DW_FORM_exprloc
- .byte 0 ## EOM(1)
- .byte 0 ## EOM(2)
- .byte 3 ## Abbreviation Code
- .byte 36 ## DW_TAG_base_type
- .byte 0 ## DW_CHILDREN_no
- .byte 3 ## DW_AT_name
- .byte 14 ## DW_FORM_strp
- .byte 62 ## DW_AT_encoding
- .byte 11 ## DW_FORM_data1
- .byte 11 ## DW_AT_byte_size
- .byte 11 ## DW_FORM_data1
- .byte 0 ## EOM(1)
- .byte 0 ## EOM(2)
- .byte 0 ## EOM(3)
- .section __DWARF,__debug_info,regular,debug
-Lsection_info:
-Lcu_begin0:
- .long 55 ## Length of Unit
- .short 4 ## DWARF version number
-Lset0 = Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section
- .long Lset0
- .byte 8 ## Address Size (in bytes)
- .byte 1 ## Abbrev [1] 0xb:0x30 DW_TAG_compile_unit
- .long 0 ## DW_AT_producer
- .short 12 ## DW_AT_language
- .long 42 ## DW_AT_name
-Lset1 = Lline_table_start0-Lsection_line ## DW_AT_stmt_list
- .long Lset1
- .long 50 ## DW_AT_comp_dir
- .byte 2 ## Abbrev [2] 0x1e:0x15 DW_TAG_variable
- .long 84 ## DW_AT_name
- .long 51 ## DW_AT_type
- ## DW_AT_external
- .byte 1 ## DW_AT_decl_file
- .byte 1 ## DW_AT_decl_line
- .byte 9 ## DW_AT_location
- .byte 3
- .quad _i
- .byte 3 ## Abbrev [3] 0x33:0x7 DW_TAG_base_type
- .long 86 ## DW_AT_name
- .byte 5 ## DW_AT_encoding
- .byte 4 ## DW_AT_byte_size
- .byte 0 ## End Of Children Mark
- .section __DWARF,__debug_ranges,regular,debug
-Ldebug_range:
- .section __DWARF,__debug_macinfo,regular,debug
-Ldebug_macinfo:
-Lcu_macro_begin0:
- .byte 0 ## End Of Macro List Mark
- .section __DWARF,__apple_names,regular,debug
-Lnames_begin:
- .long 1212240712 ## Header Magic
- .short 1 ## Header Version
- .short 0 ## Header Hash Function
- .long 1 ## Header Bucket Count
- .long 1 ## Header Hash Count
- .long 12 ## Header Data Length
- .long 0 ## HeaderData Die Offset Base
- .long 1 ## HeaderData Atom Count
- .short 1 ## DW_ATOM_die_offset
- .short 6 ## DW_FORM_data4
- .long -2 ## Bucket 0 -- error: Bucket[0] has invalid hash index: [-2]
- .long 177678 ## Hash in Bucket 0
- .long LNames0-Lnames_begin ## Offset in Bucket 0
-LNames0:
- .long 84 ## i
- .long 1 ## Num DIEs
- .long 30
- .long 0
- .section __DWARF,__apple_objc,regular,debug
-Lobjc_begin:
- .long 1212240712 ## Header Magic
- .short 1 ## Header Version
- .short 0 ## Header Hash Function
- .long 1 ## Header Bucket Count
- .long 0 ## Header Hash Count
- .long 12 ## Header Data Length
- .long 0 ## HeaderData Die Offset Base
- .long 1 ## HeaderData Atom Count
- .short 1 ## DW_ATOM_die_offset
- .short 6 ## DW_FORM_data4
- .long -1 ## Bucket 0
- .section __DWARF,__apple_namespac,regular,debug
-Lnamespac_begin:
- .long 1212240712 ## Header Magic
- .short 1 ## Header Version
- .short 0 ## Header Hash Function
- .long 1 ## Header Bucket Count
- .long 0 ## Header Hash Count
- .long 12 ## Header Data Length
- .long 0 ## HeaderData Die Offset Base
- .long 1 ## HeaderData Atom Count
- .short 1 ## DW_ATOM_die_offset
- .short 6 ## DW_FORM_data4
- .long -1 ## Bucket 0
- .section __DWARF,__apple_types,regular,debug
-Ltypes_begin:
- .long 1212240712 ## Header Magic
- .short 1 ## Header Version
- .short 0 ## Header Hash Function
- .long 1 ## Header Bucket Count
- .long 1 ## Header Hash Count
- .long 20 ## Header Data Length
- .long 0 ## HeaderData Die Offset Base
- .long 3 ## HeaderData Atom Count
- .short 1 ## DW_ATOM_die_offset
- .short 6 ## DW_FORM_data4
- .short 3 ## DW_ATOM_die_tag
- .short 5 ## DW_FORM_data2
- .short 4 ## DW_ATOM_type_flags
- .short 11 ## DW_FORM_data1
- .long 0 ## Bucket 0
- .long 193495088 ## Hash in Bucket 0
- .long Ltypes0-Ltypes_begin ## Offset in Bucket 0
-Ltypes0:
- .long 86 ## int
- .long 1 ## Num DIEs
- .long 51
- .short 36
- .byte 0
- .long 0
- .section __DWARF,__apple_exttypes,regular,debug
-Lexttypes_begin:
- .long 1212240712 ## Header Magic
- .short 1 ## Header Version
- .short 0 ## Header Hash Function
- .long 1 ## Header Bucket Count
- .long 0 ## Header Hash Count
- .long 12 ## Header Data Length
- .long 0 ## HeaderData Die Offset Base
- .long 1 ## HeaderData Atom Count
- .short 7 ## DW_ATOM_ext_types
- .short 6 ## DW_FORM_data4
- .long -1 ## Bucket 0
-
-.subsections_via_symbols
- .section __DWARF,__debug_line,regular,debug
-Lsection_line:
-Lline_table_start0:
diff --git a/test/tools/llvm-dwarfdump/X86/apple_names_verify_data.s b/test/tools/llvm-dwarfdump/X86/apple_names_verify_data.s
new file mode 100644
index 000000000000..6d548543e4b9
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/apple_names_verify_data.s
@@ -0,0 +1,64 @@
+# RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o - \
+# RUN: | not llvm-dwarfdump -verify - \
+# RUN: | FileCheck %s
+
+# CHECK: Verifying .apple_names...
+# CHECK-NEXT: error: Bucket[0] has invalid hash index: 4294967294
+# CHECK-NEXT: error: Hash[0] has invalid HashData offset: 0x000000b4
+# CHECK-NEXT: error: .apple_names Bucket[1] Hash[1] = 0x0002b60f Str[0] = 0x0000005a DIE[0] = 0x00000001 is not a valid DIE offset for "j".
+
+# This test is meant to verify that the -verify option
+# in llvm-dwarfdump, correctly identifies
+# an invalid hash index for bucket[0] in the .apple_names section,
+# an invalid HashData offset for Hash[0], as well as
+# an invalid DIE offset in the .debug_info section.
+# We're reading an invalid DIE due to the incorrect interpretation of DW_FORM for the DIE.
+# Instead of DW_FORM_data4 the Atom[0].form is: DW_FORM_flag_present.
+
+ .section __TEXT,__text,regular,pure_instructions
+ .file 1 "basic.c"
+ .comm _i,4,2 ## @i
+ .comm _j,4,2 ## @j
+ .section __DWARF,__debug_str,regular,debug
+Linfo_string:
+ .asciz "Apple LLVM version 8.1.0 (clang-802.0.35)" ## string offset=0
+ .asciz "basic.c" ## string offset=42
+ .asciz "/Users/sgravani/Development/tests" ## string offset=50
+ .asciz "i" ## string offset=84
+ .asciz "int" ## string offset=86
+ .asciz "j" ## string offset=90
+ .section __DWARF,__debug_info,regular,debug
+Lsection_info:
+ .section __DWARF,__apple_names,regular,debug
+Lnames_begin:
+ .long 1212240712 ## Header Magic
+ .short 1 ## Header Version
+ .short 0 ## Header Hash Function
+ .long 2 ## Header Bucket Count
+ .long 2 ## Header Hash Count
+ .long 12 ## Header Data Length
+ .long 0 ## HeaderData Die Offset Base
+ .long 1 ## HeaderData Atom Count
+ .short 1 ## DW_ATOM_die_offset
+ .short 25 ## DW_FORM_data4 -- error: .apple_names Bucket[1] Hash[1] = 0x0002b60f Str[0] = 0x0000005a DIE[0] = 0x00000001 is not a valid DIE offset for "j".
+ .long -2 ## Bucket 0 -- error: Bucket[0] has invalid hash index: 4294967294
+ .long 1 ## Bucket 1
+ .long 177678 ## Hash in Bucket 0
+ .long 177679 ## Hash in Bucket 1
+ .long Lsection_line ## Offset in Bucket 0 -- error: Hash[0] has invalid HashData offset: 0x000000b4
+ .long LNames1-Lnames_begin ## Offset in Bucket 1
+LNames0:
+ .long 84 ## i
+ .long 1 ## Num DIEs
+ .long 30
+ .long 0
+LNames1:
+ .long 90 ## j
+ .long 1 ## Num DIEs
+ .long 58
+ .long 0
+
+.subsections_via_symbols
+ .section __DWARF,__debug_line,regular,debug
+Lsection_line:
+Lline_table_start0:
diff --git a/test/tools/llvm-dwarfdump/X86/apple_names_verify_form.s b/test/tools/llvm-dwarfdump/X86/apple_names_verify_form.s
new file mode 100644
index 000000000000..ed4bf57069ce
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/apple_names_verify_form.s
@@ -0,0 +1,58 @@
+# RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o - \
+# RUN: | not llvm-dwarfdump -verify - \
+# RUN: | FileCheck %s
+
+# CHECK: Verifying .apple_names...
+# CHECK-NEXT: error: unsupported form; failed to read HashData
+
+# This test is meant to verify that the -verify option
+# in llvm-dwarfdump, correctly identifies that Atom[0].form is unsupported.
+# As a result, the hashdata cannot be read.
+
+ .section __TEXT,__text,regular,pure_instructions
+ .file 1 "basic.c"
+ .comm _i,4,2 ## @i
+ .comm _j,4,2 ## @j
+ .section __DWARF,__debug_str,regular,debug
+Linfo_string:
+ .asciz "Apple LLVM version 8.1.0 (clang-802.0.35)" ## string offset=0
+ .asciz "basic.c" ## string offset=42
+ .asciz "/Users/sgravani/Development/tests" ## string offset=50
+ .asciz "i" ## string offset=84
+ .asciz "int" ## string offset=86
+ .asciz "j" ## string offset=90
+ .section __DWARF,__debug_info,regular,debug
+Lsection_info:
+ .section __DWARF,__apple_names,regular,debug
+Lnames_begin:
+ .long 1212240712 ## Header Magic
+ .short 1 ## Header Version
+ .short 0 ## Header Hash Function
+ .long 2 ## Header Bucket Count
+ .long 2 ## Header Hash Count
+ .long 12 ## Header Data Length
+ .long 0 ## HeaderData Die Offset Base
+ .long 1 ## HeaderData Atom Count
+ .short 1 ## DW_ATOM_die_offset
+ .short 400 ## DW_FORM_data4 -- error: unsupported form; failed to read HashData
+ .long 0 ## Bucket 0
+ .long 1 ## Bucket 1
+ .long 177678 ## Hash in Bucket 0
+ .long 177679 ## Hash in Bucket 1
+ .long LNames0-Lnames_begin ## Offset in Bucket 0
+ .long LNames1-Lnames_begin ## Offset in Bucket 1
+LNames0:
+ .long 84 ## i
+ .long 1 ## Num DIEs
+ .long 30
+ .long 0
+LNames1:
+ .long 90 ## j
+ .long 1 ## Num DIEs
+ .long 58
+ .long 0
+
+.subsections_via_symbols
+ .section __DWARF,__debug_line,regular,debug
+Lsection_line:
+Lline_table_start0:
diff --git a/test/tools/llvm-dwarfdump/X86/apple_names_verify_num_atoms.s b/test/tools/llvm-dwarfdump/X86/apple_names_verify_num_atoms.s
new file mode 100644
index 000000000000..dffb39c20f08
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/apple_names_verify_num_atoms.s
@@ -0,0 +1,59 @@
+# RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o - \
+# RUN: | not llvm-dwarfdump -verify - \
+# RUN: | FileCheck %s
+
+# CHECK: Verifying .apple_names...
+# CHECK-NEXT: error: no atoms; failed to read HashData
+
+# This test is meant to verify that the -verify option
+# in llvm-dwarfdump, correctly identifies that there is not Atom.
+# As a result, the hashdata cannot be read.
+
+ .section __TEXT,__text,regular,pure_instructions
+ .file 1 "basic.c"
+ .comm _i,4,2 ## @i
+ .comm _j,4,2 ## @j
+ .section __DWARF,__debug_str,regular,debug
+Linfo_string:
+ .asciz "Apple LLVM version 8.1.0 (clang-802.0.35)" ## string offset=0
+ .asciz "basic.c" ## string offset=42
+ .asciz "/Users/sgravani/Development/tests" ## string offset=50
+ .asciz "i" ## string offset=84
+ .asciz "int" ## string offset=86
+ .asciz "j" ## string offset=90
+
+ .section __DWARF,__debug_info,regular,debug
+Lsection_info:
+ .section __DWARF,__apple_names,regular,debug
+Lnames_begin:
+ .long 1212240712 ## Header Magic
+ .short 1 ## Header Version
+ .short 0 ## Header Hash Function
+ .long 2 ## Header Bucket Count
+ .long 2 ## Header Hash Count
+ .long 12 ## Header Data Length
+ .long 0 ## HeaderData Die Offset Base
+ .long 0 ## HeaderData Atom Count -- error: no atoms; failed to read HashData
+ .short 1 ## DW_ATOM_die_offset
+ .short 6 ## DW_FORM_data4
+ .long 0 ## Bucket 0
+ .long 1 ## Bucket 1
+ .long 177678 ## Hash in Bucket 0
+ .long 177679 ## Hash in Bucket 1
+ .long LNames0-Lnames_begin ## Offset in Bucket 0
+ .long LNames1-Lnames_begin ## Offset in Bucket 1
+LNames0:
+ .long 84 ## i
+ .long 1 ## Num DIEs
+ .long 30
+ .long 0
+LNames1:
+ .long 90 ## j
+ .long 1 ## Num DIEs
+ .long 58
+ .long 0
+
+.subsections_via_symbols
+ .section __DWARF,__debug_line,regular,debug
+Lsection_line:
+Lline_table_start0:
diff --git a/test/tools/llvm-dwarfdump/X86/no_apple_names_verify.s b/test/tools/llvm-dwarfdump/X86/no_apple_names_verify.s
new file mode 100644
index 000000000000..76606bd40add
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/no_apple_names_verify.s
@@ -0,0 +1,33 @@
+# RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o - \
+# RUN: | llvm-dwarfdump -verify - \
+# RUN: | FileCheck %s
+
+# CHECK-NOT: Verifying .apple_names...
+
+# This test is meant to verify that the -verify option
+# in llvm-dwarfdump doesn't produce any .apple_names related
+# output when there's no such section in the object.
+# The test was manually modified to exclude the
+# .apple_names section from the apple_names_verify_num_atoms.s
+# test file in the same directory.
+
+ .section __TEXT,__text,regular,pure_instructions
+ .file 1 "basic.c"
+ .comm _i,4,2 ## @i
+ .comm _j,4,2 ## @j
+ .section __DWARF,__debug_str,regular,debug
+Linfo_string:
+ .asciz "Apple LLVM version 8.1.0 (clang-802.0.35)" ## string offset=0
+ .asciz "basic.c" ## string offset=42
+ .asciz "/Users/sgravani/Development/tests" ## string offset=50
+ .asciz "i" ## string offset=84
+ .asciz "int" ## string offset=86
+ .asciz "j" ## string offset=90
+
+ .section __DWARF,__debug_info,regular,debug
+Lsection_info:
+
+.subsections_via_symbols
+ .section __DWARF,__debug_line,regular,debug
+Lsection_line:
+Lline_table_start0:
diff --git a/test/tools/llvm-dwarfdump/X86/no_apple_names_verify_buckets.s b/test/tools/llvm-dwarfdump/X86/no_apple_names_verify_buckets.s
deleted file mode 100644
index 472ff71794c6..000000000000
--- a/test/tools/llvm-dwarfdump/X86/no_apple_names_verify_buckets.s
+++ /dev/null
@@ -1,174 +0,0 @@
-# RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o - \
-# RUN: | not llvm-dwarfdump -verify - \
-# RUN: | FileCheck %s
-
-# CHECK-NOT: Verifying .apple_names...
-
-# This test is meant to verify that the -verify option
-# in llvm-dwarfdump doesn't produce any .apple_names related
-# output when there's no such section int he object.
-# The test was manually modified to exclude the
-# .apple_names section from the apple_names_verify_buckets.s
-# test file in the same directory.
-
- .section __TEXT,__text,regular,pure_instructions
- .file 1 "basic.c"
- .comm _i,4,2 ## @i
- .section __DWARF,__debug_str,regular,debug
-Linfo_string:
- .asciz "basic.c" ## string offset=42
- .asciz "i" ## string offset=84
- .asciz "int" ## string offset=86
- .section __DWARF,__debug_loc,regular,debug
-Lsection_debug_loc:
- .section __DWARF,__debug_abbrev,regular,debug
-Lsection_abbrev:
- .byte 1 ## Abbreviation Code
- .byte 17 ## DW_TAG_compile_unit
- .byte 1 ## DW_CHILDREN_yes
- .byte 37 ## DW_AT_producer
- .byte 14 ## DW_FORM_strp
- .byte 19 ## DW_AT_language
- .byte 5 ## DW_FORM_data2
- .byte 3 ## DW_AT_name
- .byte 14 ## DW_FORM_strp
- .byte 16 ## DW_AT_stmt_list
- .byte 23 ## DW_FORM_sec_offset
- .byte 27 ## DW_AT_comp_dir
- .byte 14 ## DW_FORM_strp
- .byte 0 ## EOM(1)
- .byte 0 ## EOM(2)
- .byte 2 ## Abbreviation Code
- .byte 52 ## DW_TAG_variable
- .byte 0 ## DW_CHILDREN_no
- .byte 3 ## DW_AT_name
- .byte 14 ## DW_FORM_strp
- .byte 73 ## DW_AT_type
- .byte 19 ## DW_FORM_ref4
- .byte 63 ## DW_AT_external
- .byte 25 ## DW_FORM_flag_present
- .byte 58 ## DW_AT_decl_file
- .byte 11 ## DW_FORM_data1
- .byte 59 ## DW_AT_decl_line
- .byte 11 ## DW_FORM_data1
- .byte 2 ## DW_AT_location
- .byte 24 ## DW_FORM_exprloc
- .byte 0 ## EOM(1)
- .byte 0 ## EOM(2)
- .byte 3 ## Abbreviation Code
- .byte 36 ## DW_TAG_base_type
- .byte 0 ## DW_CHILDREN_no
- .byte 3 ## DW_AT_name
- .byte 14 ## DW_FORM_strp
- .byte 62 ## DW_AT_encoding
- .byte 11 ## DW_FORM_data1
- .byte 11 ## DW_AT_byte_size
- .byte 11 ## DW_FORM_data1
- .byte 0 ## EOM(1)
- .byte 0 ## EOM(2)
- .byte 0 ## EOM(3)
- .section __DWARF,__debug_info,regular,debug
-Lsection_info:
-Lcu_begin0:
- .long 55 ## Length of Unit
- .short 4 ## DWARF version number
-Lset0 = Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section
- .long Lset0
- .byte 8 ## Address Size (in bytes)
- .byte 1 ## Abbrev [1] 0xb:0x30 DW_TAG_compile_unit
- .long 0 ## DW_AT_producer
- .short 12 ## DW_AT_language
- .long 42 ## DW_AT_name
-Lset1 = Lline_table_start0-Lsection_line ## DW_AT_stmt_list
- .long Lset1
- .long 50 ## DW_AT_comp_dir
- .byte 2 ## Abbrev [2] 0x1e:0x15 DW_TAG_variable
- .long 84 ## DW_AT_name
- .long 51 ## DW_AT_type
- ## DW_AT_external
- .byte 1 ## DW_AT_decl_file
- .byte 1 ## DW_AT_decl_line
- .byte 9 ## DW_AT_location
- .byte 3
- .quad _i
- .byte 3 ## Abbrev [3] 0x33:0x7 DW_TAG_base_type
- .long 86 ## DW_AT_name
- .byte 5 ## DW_AT_encoding
- .byte 4 ## DW_AT_byte_size
- .byte 0 ## End Of Children Mark
- .section __DWARF,__debug_ranges,regular,debug
-Ldebug_range:
- .section __DWARF,__debug_macinfo,regular,debug
-Ldebug_macinfo:
-Lcu_macro_begin0:
- .byte 0 ## End Of Macro List Mark
- .section __DWARF,__apple_objc,regular,debug
-Lobjc_begin:
- .long 1212240712 ## Header Magic
- .short 1 ## Header Version
- .short 0 ## Header Hash Function
- .long 1 ## Header Bucket Count
- .long 0 ## Header Hash Count
- .long 12 ## Header Data Length
- .long 0 ## HeaderData Die Offset Base
- .long 1 ## HeaderData Atom Count
- .short 1 ## DW_ATOM_die_offset
- .short 6 ## DW_FORM_data4
- .long -1 ## Bucket 0
- .section __DWARF,__apple_namespac,regular,debug
-Lnamespac_begin:
- .long 1212240712 ## Header Magic
- .short 1 ## Header Version
- .short 0 ## Header Hash Function
- .long 1 ## Header Bucket Count
- .long 0 ## Header Hash Count
- .long 12 ## Header Data Length
- .long 0 ## HeaderData Die Offset Base
- .long 1 ## HeaderData Atom Count
- .short 1 ## DW_ATOM_die_offset
- .short 6 ## DW_FORM_data4
- .long -1 ## Bucket 0
- .section __DWARF,__apple_types,regular,debug
-Ltypes_begin:
- .long 1212240712 ## Header Magic
- .short 1 ## Header Version
- .short 0 ## Header Hash Function
- .long 1 ## Header Bucket Count
- .long 1 ## Header Hash Count
- .long 20 ## Header Data Length
- .long 0 ## HeaderData Die Offset Base
- .long 3 ## HeaderData Atom Count
- .short 1 ## DW_ATOM_die_offset
- .short 6 ## DW_FORM_data4
- .short 3 ## DW_ATOM_die_tag
- .short 5 ## DW_FORM_data2
- .short 4 ## DW_ATOM_type_flags
- .short 11 ## DW_FORM_data1
- .long 0 ## Bucket 0
- .long 193495088 ## Hash in Bucket 0
- .long Ltypes0-Ltypes_begin ## Offset in Bucket 0
-Ltypes0:
- .long 86 ## int
- .long 1 ## Num DIEs
- .long 51
- .short 36
- .byte 0
- .long 0
- .section __DWARF,__apple_exttypes,regular,debug
-Lexttypes_begin:
- .long 1212240712 ## Header Magic
- .short 1 ## Header Version
- .short 0 ## Header Hash Function
- .long 1 ## Header Bucket Count
- .long 0 ## Header Hash Count
- .long 12 ## Header Data Length
- .long 0 ## HeaderData Die Offset Base
- .long 1 ## HeaderData Atom Count
- .short 7 ## DW_ATOM_ext_types
- .short 6 ## DW_FORM_data4
- .long -1 ## Bucket 0
-
-.subsections_via_symbols
- .section __DWARF,__debug_line,regular,debug
-Lsection_line:
-Lline_table_start0:
diff --git a/test/tools/llvm-nm/X86/demangle.ll b/test/tools/llvm-nm/X86/demangle.ll
new file mode 100644
index 000000000000..283e604046a8
--- /dev/null
+++ b/test/tools/llvm-nm/X86/demangle.ll
@@ -0,0 +1,37 @@
+; RUN: llc -filetype=obj -mtriple=x86_64-pc-linux -o %t.o %s
+; RUN: llvm-nm %t.o | FileCheck --check-prefix="MANGLED" %s
+; RUN: llvm-nm -C %t.o | FileCheck --check-prefix="DEMANGLED" %s
+; RUN: llvm-nm --demangle %t.o | FileCheck --check-prefix="DEMANGLED" %s
+
+; RUN: llc -filetype=obj -mtriple=x86_64-apple-darwin9 -o %t.macho %s
+; RUN: llvm-nm %t.macho | FileCheck --check-prefix="MACHO-MANGLED" %s
+; RUN: llvm-nm -C %t.macho | FileCheck --check-prefix="DEMANGLED" %s
+
+; RUN: llc -filetype=obj -mtriple=x86_64-pc-win32 -o %t.coff %s
+; RUN: llvm-nm %t.coff | FileCheck --check-prefix="COFF-MANGLED" %s
+; RUN: llvm-nm -C %t.coff | FileCheck --check-prefix="COFF-DEMANGLED" %s
+
+define i32 @_Z3fooi(i32) #0 {
+entry:
+ ret i32 1
+}
+
+define float @_Z3barf(float) #0 {
+entry:
+ ret float 0.000000e+00
+}
+
+; MANGLED: 0000000000000010 T _Z3barf
+; MANGLED: 0000000000000000 T _Z3fooi
+
+; MACHO-MANGLED: 0000000000000010 T __Z3barf
+; MACHO-MANGLED: 0000000000000000 T __Z3fooi
+
+; COFF-MANGLED: 00000010 T _Z3barf
+; COFF-MANGLED: 00000000 T _Z3fooi
+
+; DEMANGLED: 0000000000000010 T bar(float)
+; DEMANGLED: 0000000000000000 T foo(int)
+
+; COFF-DEMANGLED: 00000010 T bar(float)
+; COFF-DEMANGLED: 00000000 T foo(int)
diff --git a/test/tools/llvm-nm/wasm/weak-symbols.yaml b/test/tools/llvm-nm/wasm/weak-symbols.yaml
index 682a874ea590..d46ca1afe8ea 100644
--- a/test/tools/llvm-nm/wasm/weak-symbols.yaml
+++ b/test/tools/llvm-nm/wasm/weak-symbols.yaml
@@ -33,6 +33,8 @@ Sections:
Index: 0x00000002
- Type: CUSTOM
Name: linking
+ DataSize: 0
+ DataAlignment: 2
SymbolInfo:
- Name: weak_global_func
Flags: 1
diff --git a/test/tools/llvm-objdump/ARM/invalid-instruction.s b/test/tools/llvm-objdump/ARM/invalid-instruction.s
new file mode 100644
index 000000000000..a63300cadffe
--- /dev/null
+++ b/test/tools/llvm-objdump/ARM/invalid-instruction.s
@@ -0,0 +1,9 @@
+@RUN: llvm-mc -triple arm-unknown-linux -filetype=obj %s | llvm-objdump -d - | FileCheck %s
+
+.text
+ b l0
+ .inst 0xffffffff
+l0:
+
+@CHECK: 0: 00 00 00 ea b #0 <l0>
+@CHECK-NEXT: 4: ff ff ff ff <unknown>
diff --git a/test/tools/llvm-objdump/WebAssembly/lit.local.cfg b/test/tools/llvm-objdump/WebAssembly/lit.local.cfg
new file mode 100644
index 000000000000..0dd8c920ff1e
--- /dev/null
+++ b/test/tools/llvm-objdump/WebAssembly/lit.local.cfg
@@ -0,0 +1,2 @@
+if 'WebAssembly' not in config.root.targets:
+ config.unsupported = True
diff --git a/test/tools/llvm-objdump/WebAssembly/relocations.test b/test/tools/llvm-objdump/WebAssembly/relocations.test
new file mode 100644
index 000000000000..07a167c550f9
--- /dev/null
+++ b/test/tools/llvm-objdump/WebAssembly/relocations.test
@@ -0,0 +1,8 @@
+; RUN: llc -mtriple=wasm32-unknown-unknown-wasm -filetype=obj %s -o - | llvm-objdump -r - | FileCheck %s
+
+@foo1 = hidden global i32 1, align 4
+@foo2 = hidden global i32 1, align 4
+@bar = hidden global i32* @foo2, align 4
+
+; CHECK: RELOCATION RECORDS FOR [DATA]:
+; CHECK-NEXT: 0000000e R_WEBASSEMBLY_GLOBAL_ADDR_I32 1+0
diff --git a/test/tools/llvm-pdbdump/partial-type-stream.test b/test/tools/llvm-pdbdump/partial-type-stream.test
new file mode 100644
index 000000000000..3a853c391450
--- /dev/null
+++ b/test/tools/llvm-pdbdump/partial-type-stream.test
@@ -0,0 +1,30 @@
+; RUN: llvm-pdbutil dump -type-index=0x1019 %p/Inputs/ClassLayoutTest.pdb \
+; RUN: | FileCheck --check-prefix=NODEPS %s
+; RUN: llvm-pdbutil dump -type-index=0x1019 -dependents %p/Inputs/ClassLayoutTest.pdb \
+; RUN: | FileCheck --check-prefix=DEPS %s
+
+
+NODEPS: Types (TPI Stream)
+NODEPS-NEXT: ============================================================
+NODEPS-NEXT: Showing 1 records.
+NODEPS-NEXT: 0x1019 | LF_MFUNCTION [size = 28]
+NODEPS-NEXT: return type = 0x0003 (void), # args = 0, param list = 0x100E
+NODEPS-NEXT: class type = 0x1017, this type = 0x1018, this adjust = 0
+NODEPS-NEXT: calling conv = thiscall, options = None
+
+
+DEPS: Types (TPI Stream)
+DEPS-NEXT: ============================================================
+DEPS-NEXT: Showing 1 records and their dependents (4 records total)
+DEPS-NEXT: 0x100E | LF_ARGLIST [size = 8]
+DEPS-NEXT: 0x1017 | LF_CLASS [size = 60]
+DEPS-NEXT: class name: `MembersTest::A`
+DEPS-NEXT: unique name: `.?AVA@MembersTest@@`
+DEPS-NEXT: vtable: <no type>, base list: <no type>, field list: <no type>
+DEPS-NEXT: options: forward ref | has unique name
+DEPS-NEXT: 0x1018 | LF_POINTER [size = 12]
+DEPS-NEXT: referent = 0x1017, mode = pointer, opts = const, kind = ptr32
+DEPS-NEXT: 0x1019 | LF_MFUNCTION [size = 28]
+DEPS-NEXT: return type = 0x0003 (void), # args = 0, param list = 0x100E
+DEPS-NEXT: class type = 0x1017, this type = 0x1018, this adjust = 0
+DEPS-NEXT: calling conv = thiscall, options = None
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.coff-arm64 b/test/tools/llvm-readobj/Inputs/trivial.obj.coff-arm64
new file mode 100644
index 000000000000..b494f6ade24c
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.obj.coff-arm64
Binary files differ
diff --git a/test/tools/llvm-readobj/file-headers.test b/test/tools/llvm-readobj/file-headers.test
index 47fb24de1b60..6bc9714f2037 100644
--- a/test/tools/llvm-readobj/file-headers.test
+++ b/test/tools/llvm-readobj/file-headers.test
@@ -1,5 +1,7 @@
RUN: llvm-readobj -h %p/Inputs/trivial.obj.coff-arm \
RUN: | FileCheck %s -check-prefix COFF-ARM
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.coff-arm64 \
+RUN: | FileCheck %s -check-prefix COFF-ARM64
RUN: llvm-readobj -h %p/Inputs/trivial.obj.coff-i386 \
RUN: | FileCheck %s -check-prefix COFF32
RUN: llvm-readobj -h %p/Inputs/trivial.obj.coff-x86-64 \
@@ -47,6 +49,21 @@ COFF-ARM-NEXT: Characteristics [ (0x0)
COFF-ARM-NEXT: ]
COFF-ARM-NEXT: }
+COFF-ARM64: File: {{(.*[/\\])?}}trivial.obj.coff-arm64
+COFF-ARM64-NEXT: Format: COFF-ARM64
+COFF-ARM64-NEXT: Arch: aarch64
+COFF-ARM64-NEXT: AddressSize: 64bit
+COFF-ARM64-NEXT: ImageFileHeader {
+COFF-ARM64-NEXT: Machine: IMAGE_FILE_MACHINE_ARM64 (0xAA64)
+COFF-ARM64-NEXT: SectionCount: 1
+COFF-ARM64-NEXT: TimeDateStamp: 1970-01-01 00:00:00 (0x0)
+COFF-ARM64-NEXT: PointerToSymbolTable: 0x44
+COFF-ARM64-NEXT: SymbolCount: 3
+COFF-ARM64-NEXT: OptionalHeaderSize: 0
+COFF-ARM64-NEXT: Characteristics [ (0x0)
+COFF-ARM64-NEXT: ]
+COFF-ARM64-NEXT: }
+
COFF32: File: {{(.*[/\\])?}}trivial.obj.coff-i386
COFF32-NEXT: Format: COFF-i386
COFF32-NEXT: Arch: i386
@@ -238,6 +255,7 @@ PE32-NEXT: IMAGE_FILE_EXECUTABLE_IMAGE (0x2)
PE32-NEXT: ]
PE32-NEXT: }
PE32-NEXT: ImageOptionalHeader {
+PE32-NEXT: Magic: 0x10B
PE32-NEXT: MajorLinkerVersion: 11
PE32-NEXT: MinorLinkerVersion: 0
PE32-NEXT: SizeOfCode: 512
diff --git a/test/tools/llvm-readobj/peplus.test b/test/tools/llvm-readobj/peplus.test
index 4d8d25db894c..3619cd792d54 100644
--- a/test/tools/llvm-readobj/peplus.test
+++ b/test/tools/llvm-readobj/peplus.test
@@ -16,6 +16,7 @@ CHECK: IMAGE_FILE_LARGE_ADDRESS_AWARE (0x20)
CHECK: ]
CHECK: }
CHECK: ImageOptionalHeader {
+CHECK: Magic: 0x20B
CHECK: MajorLinkerVersion: 11
CHECK: MinorLinkerVersion: 0
CHECK: SizeOfCode: 512
diff --git a/test/tools/llvm-readobj/symbols.test b/test/tools/llvm-readobj/symbols.test
index 1037c2866023..da8a70b031ab 100644
--- a/test/tools/llvm-readobj/symbols.test
+++ b/test/tools/llvm-readobj/symbols.test
@@ -75,17 +75,21 @@ WASM: Symbols [
WASM-NEXT: Symbol {
WASM-NEXT: Name: bar
WASM-NEXT: Type: GLOBAL_IMPORT (0x2)
+WASM-NEXT: Flags: 0x0
WASM-NEXT: }
WASM-NEXT: Symbol {
WASM-NEXT: Name: baz
WASM-NEXT: Type: GLOBAL_IMPORT (0x2)
+WASM-NEXT: Flags: 0x0
WASM-NEXT: }
WASM-NEXT: Symbol {
WASM-NEXT: Name: foo
WASM-NEXT: Type: FUNCTION_EXPORT (0x1)
+WASM-NEXT: Flags: 0x0
WASM-NEXT: }
WASM-NEXT: Symbol {
WASM-NEXT: Name: foo
WASM-NEXT: Type: DEBUG_FUNCTION_NAME (0x4)
+WASM-NEXT: Flags: 0x0
WASM-NEXT: }
WASM-NEXT: ]
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index 10532ef8395b..70b18e3dbbf9 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -861,7 +861,7 @@ Error CC::MakeSharedObject(const std::string &InputFile, FileType fileType,
errs() << "\n";);
if (RunProgramWithTimeout(CCPath, &CCArgs[0], "", "", ""))
return ProcessFailure(CCPath, &CCArgs[0]);
- return Error::success();;
+ return Error::success();
}
/// create - Try to find the CC executable
diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp
index ba5c63846b50..86621e3260f0 100644
--- a/tools/dsymutil/DwarfLinker.cpp
+++ b/tools/dsymutil/DwarfLinker.cpp
@@ -2212,7 +2212,7 @@ void DwarfLinker::keepDIEAndDependencies(RelocationManager &RelocMgr,
// Then we need to mark all the DIEs referenced by this DIE's
// attributes as kept.
- DataExtractor Data = Unit.getDebugInfoExtractor();
+ DWARFDataExtractor Data = Unit.getDebugInfoExtractor();
const auto *Abbrev = Die.getAbbreviationDeclarationPtr();
uint32_t Offset = Die.getOffset() + getULEB128Size(Abbrev->getCode());
@@ -2729,7 +2729,7 @@ DIE *DwarfLinker::DIECloner::cloneDIE(
}
// Extract and clone every attribute.
- DataExtractor Data = U.getDebugInfoExtractor();
+ DWARFDataExtractor Data = U.getDebugInfoExtractor();
// Point to the next DIE (generally there is always at least a NULL
// entry after the current one). If this is a lone
// DW_TAG_compile_unit without any children, point to the next unit.
@@ -2743,7 +2743,8 @@ DIE *DwarfLinker::DIECloner::cloneDIE(
// it. After testing, it seems there is no performance downside to
// doing the copy unconditionally, and it makes the code simpler.
SmallString<40> DIECopy(Data.getData().substr(Offset, NextOffset - Offset));
- Data = DataExtractor(DIECopy, Data.isLittleEndian(), Data.getAddressSize());
+ Data =
+ DWARFDataExtractor(DIECopy, Data.isLittleEndian(), Data.getAddressSize());
// Modify the copy with relocated addresses.
if (RelocMgr.applyValidRelocs(DIECopy, Offset, Data.isLittleEndian())) {
// If we applied relocations, we store the value of high_pc that was
@@ -2872,8 +2873,8 @@ void DwarfLinker::patchRangesForUnit(const CompileUnit &Unit,
DWARFDebugRangeList RangeList;
const auto &FunctionRanges = Unit.getFunctionRanges();
unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
- DataExtractor RangeExtractor(OrigDwarf.getRangeSection().Data,
- OrigDwarf.isLittleEndian(), AddressSize);
+ DWARFDataExtractor RangeExtractor(OrigDwarf.getRangeSection(),
+ OrigDwarf.isLittleEndian(), AddressSize);
auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange;
DWARFUnit &OrigUnit = Unit.getOrigUnit();
auto OrigUnitDie = OrigUnit.getUnitDIE(false);
@@ -2887,7 +2888,7 @@ void DwarfLinker::patchRangesForUnit(const CompileUnit &Unit,
for (const auto &RangeAttribute : Unit.getRangesAttributes()) {
uint32_t Offset = RangeAttribute.get();
RangeAttribute.set(Streamer->getRangesSectionSize());
- RangeList.extract(RangeExtractor, &Offset, OrigDwarf.getRangeSection().Relocs);
+ RangeList.extract(RangeExtractor, &Offset);
const auto &Entries = RangeList.getEntries();
if (!Entries.empty()) {
const DWARFDebugRangeList::RangeListEntry &First = Entries.front();
@@ -2983,11 +2984,10 @@ void DwarfLinker::patchLineTableForUnit(CompileUnit &Unit,
// Parse the original line info for the unit.
DWARFDebugLine::LineTable LineTable;
uint32_t StmtOffset = *StmtList;
- StringRef LineData = OrigDwarf.getLineSection().Data;
- DataExtractor LineExtractor(LineData, OrigDwarf.isLittleEndian(),
- Unit.getOrigUnit().getAddressByteSize());
- LineTable.parse(LineExtractor, &OrigDwarf.getLineSection().Relocs,
- &StmtOffset);
+ DWARFDataExtractor LineExtractor(OrigDwarf.getLineSection(),
+ OrigDwarf.isLittleEndian(),
+ Unit.getOrigUnit().getAddressByteSize());
+ LineTable.parse(LineExtractor, &StmtOffset);
// This vector is the output line table.
std::vector<DWARFDebugLine::Row> NewRows;
@@ -3086,6 +3086,7 @@ void DwarfLinker::patchLineTableForUnit(CompileUnit &Unit,
LineTable.Prologue.OpcodeBase > 13)
reportWarning("line table parameters mismatch. Cannot emit.");
else {
+ StringRef LineData = OrigDwarf.getLineSection().Data;
MCDwarfLineTableParams Params;
Params.DWARF2LineOpcodeBase = LineTable.Prologue.OpcodeBase;
Params.DWARF2LineBase = LineTable.Prologue.LineBase;
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index e71c3c5bb705..f13a19213c69 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -149,6 +149,11 @@ static cl::opt<bool> PassRemarksWithHotness(
cl::desc("With PGO, include profile count in optimization remarks"),
cl::Hidden);
+static cl::opt<unsigned> PassRemarksHotnessThreshold(
+ "pass-remarks-hotness-threshold",
+ cl::desc("Minimum profile count required for an optimization remark to be output"),
+ cl::Hidden);
+
static cl::opt<std::string>
RemarksFilename("pass-remarks-output",
cl::desc("YAML output filename for pass remarks"),
@@ -323,7 +328,10 @@ int main(int argc, char **argv) {
Context.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, &HasError);
if (PassRemarksWithHotness)
- Context.setDiagnosticHotnessRequested(true);
+ Context.setDiagnosticsHotnessRequested(true);
+
+ if (PassRemarksHotnessThreshold)
+ Context.setDiagnosticsHotnessThreshold(PassRemarksHotnessThreshold);
std::unique_ptr<tool_output_file> YamlFile;
if (RemarksFilename != "") {
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 22bc6f7043ee..528247c2dbc3 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -125,6 +125,7 @@ static const char *GetBlockName(unsigned BlockID,
return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
case bitc::MODULE_STRTAB_BLOCK_ID: return "MODULE_STRTAB_BLOCK";
case bitc::STRTAB_BLOCK_ID: return "STRTAB_BLOCK";
+ case bitc::SYMTAB_BLOCK_ID: return "SYMTAB_BLOCK";
}
}
@@ -393,6 +394,11 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
default: return nullptr;
case bitc::STRTAB_BLOB: return "BLOB";
}
+ case bitc::SYMTAB_BLOCK_ID:
+ switch(CodeID) {
+ default: return nullptr;
+ case bitc::SYMTAB_BLOB: return "BLOB";
+ }
}
#undef STRINGIFY_CODE
}
diff --git a/tools/llvm-lto2/CMakeLists.txt b/tools/llvm-lto2/CMakeLists.txt
index cdd97c9fef5c..9be12d71ed79 100644
--- a/tools/llvm-lto2/CMakeLists.txt
+++ b/tools/llvm-lto2/CMakeLists.txt
@@ -1,5 +1,6 @@
set(LLVM_LINK_COMPONENTS
${LLVM_TARGETS_TO_BUILD}
+ BitReader
Core
Linker
LTO
diff --git a/tools/llvm-lto2/LLVMBuild.txt b/tools/llvm-lto2/LLVMBuild.txt
index 42b0b9e26cca..d51aa2290675 100644
--- a/tools/llvm-lto2/LLVMBuild.txt
+++ b/tools/llvm-lto2/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Tool
name = llvm-lto2
parent = Tools
-required_libraries = Core Linker LTO MC Object Support all-targets
+required_libraries = BitReader Core Linker LTO MC Object Support all-targets
diff --git a/tools/llvm-lto2/llvm-lto2.cpp b/tools/llvm-lto2/llvm-lto2.cpp
index bbfece517c80..5426e040cd7c 100644
--- a/tools/llvm-lto2/llvm-lto2.cpp
+++ b/tools/llvm-lto2/llvm-lto2.cpp
@@ -16,9 +16,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/LTO/Caching.h"
+#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/LTO/Caching.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
@@ -298,6 +299,17 @@ static int run(int argc, char **argv) {
static int dumpSymtab(int argc, char **argv) {
for (StringRef F : make_range(argv + 1, argv + argc)) {
std::unique_ptr<MemoryBuffer> MB = check(MemoryBuffer::getFile(F), F);
+ BitcodeFileContents BFC = check(getBitcodeFileContents(*MB), F);
+
+ if (BFC.Symtab.size() >= sizeof(irsymtab::storage::Header)) {
+ auto *Hdr = reinterpret_cast<const irsymtab::storage::Header *>(
+ BFC.Symtab.data());
+ outs() << "version: " << Hdr->Version << '\n';
+ if (Hdr->Version == irsymtab::storage::Header::kCurrentVersion)
+ outs() << "producer: " << Hdr->Producer.get(BFC.StrtabForSymtab)
+ << '\n';
+ }
+
std::unique_ptr<InputFile> Input =
check(InputFile::create(MB->getMemBufferRef()), F);
diff --git a/tools/llvm-nm/CMakeLists.txt b/tools/llvm-nm/CMakeLists.txt
index 10019562565d..08bcd5f30898 100644
--- a/tools/llvm-nm/CMakeLists.txt
+++ b/tools/llvm-nm/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
AllTargetsDescs
AllTargetsInfos
Core
+ Demangle
Object
Support
)
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index 513e1b87558f..ea47891250f7 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
@@ -124,6 +125,10 @@ cl::opt<bool> NoSort("no-sort", cl::desc("Show symbols in order encountered"));
cl::alias NoSortp("p", cl::desc("Alias for --no-sort"), cl::aliasopt(NoSort),
cl::Grouping);
+cl::opt<bool> Demangle("demangle", cl::desc("Demangle C++ symbol names"));
+cl::alias DemangleC("C", cl::desc("Alias for --demangle"), cl::aliasopt(Demangle),
+ cl::Grouping);
+
cl::opt<bool> ReverseSort("reverse-sort", cl::desc("Sort in reverse order"));
cl::alias ReverseSortr("r", cl::desc("Alias for --reverse-sort"),
cl::aliasopt(ReverseSort), cl::Grouping);
@@ -659,6 +664,24 @@ static void darwinPrintStab(MachOObjectFile *MachO, SymbolListT::iterator I) {
outs() << Str;
}
+static Optional<std::string> demangle(StringRef Name, bool StripUnderscore) {
+ if (StripUnderscore && Name.size() > 0 && Name[0] == '_')
+ Name = Name.substr(1);
+
+ if (!Name.startswith("_Z"))
+ return None;
+
+ int Status;
+ char *Undecorated =
+ itaniumDemangle(Name.str().c_str(), nullptr, nullptr, &Status);
+ if (Status != 0)
+ return None;
+
+ std::string S(Undecorated);
+ free(Undecorated);
+ return S;
+}
+
static bool symbolIsDefined(const NMSymbol &Sym) {
return Sym.TypeChar != 'U' && Sym.TypeChar != 'w' && Sym.TypeChar != 'v';
}
@@ -724,6 +747,12 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
for (SymbolListT::iterator I = SymbolList.begin(), E = SymbolList.end();
I != E; ++I) {
uint32_t SymFlags;
+ std::string Name = I->Name.str();
+ MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
+ if (Demangle) {
+ if (Optional<std::string> Opt = demangle(I->Name, MachO))
+ Name = *Opt;
+ }
if (I->Sym.getRawDataRefImpl().p)
SymFlags = I->Sym.getFlags();
else
@@ -745,9 +774,10 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
outs() << CurrentFilename << ": ";
}
}
- if ((JustSymbolName || (UndefinedOnly && isa<MachOObjectFile>(Obj) &&
- OutputFormat != darwin)) && OutputFormat != posix) {
- outs() << I->Name << "\n";
+ if ((JustSymbolName ||
+ (UndefinedOnly && MachO && OutputFormat != darwin)) &&
+ OutputFormat != posix) {
+ outs() << Name << "\n";
continue;
}
@@ -767,7 +797,6 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
}
}
- MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
// Otherwise, print the symbol address and size.
if (symbolIsDefined(*I)) {
if (Obj.isIR())
@@ -789,7 +818,7 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
darwinPrintSymbol(Obj, I, SymbolAddrStr, printBlanks, printDashes,
printFormat);
} else if (OutputFormat == posix) {
- outs() << I->Name << " " << I->TypeChar << " ";
+ outs() << Name << " " << I->TypeChar << " ";
if (MachO)
outs() << SymbolAddrStr << " " << "0" /* SymbolSizeStr */ << "\n";
else
@@ -804,7 +833,7 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
outs() << I->TypeChar;
if (I->TypeChar == '-' && MachO)
darwinPrintStab(MachO, I);
- outs() << " " << I->Name;
+ outs() << " " << Name;
if (I->TypeChar == 'I' && MachO) {
outs() << " (indirect for ";
if (I->Sym.getRawDataRefImpl().p) {
@@ -818,7 +847,7 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
}
outs() << "\n";
} else if (OutputFormat == sysv) {
- std::string PaddedName(I->Name);
+ std::string PaddedName(Name);
while (PaddedName.length() < 20)
PaddedName += " ";
outs() << PaddedName << "|" << SymbolAddrStr << "| " << I->TypeChar
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 6a9151570908..be5635a3d4c6 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -41,6 +41,7 @@
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/Wasm.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -886,6 +887,18 @@ static void printRelocationTargetName(const MachOObjectFile *O,
fmt << S;
}
+static std::error_code getRelocationValueString(const WasmObjectFile *Obj,
+ const RelocationRef &RelRef,
+ SmallVectorImpl<char> &Result) {
+ const wasm::WasmRelocation& Rel = Obj->getWasmRelocation(RelRef);
+ std::string fmtbuf;
+ raw_string_ostream fmt(fmtbuf);
+ fmt << Rel.Index << (Rel.Addend < 0 ? "" : "+") << Rel.Addend;
+ fmt.flush();
+ Result.append(fmtbuf.begin(), fmtbuf.end());
+ return std::error_code();
+}
+
static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
const RelocationRef &RelRef,
SmallVectorImpl<char> &Result) {
@@ -1071,8 +1084,11 @@ static std::error_code getRelocationValueString(const RelocationRef &Rel,
return getRelocationValueString(ELF, Rel, Result);
if (auto *COFF = dyn_cast<COFFObjectFile>(Obj))
return getRelocationValueString(COFF, Rel, Result);
- auto *MachO = cast<MachOObjectFile>(Obj);
- return getRelocationValueString(MachO, Rel, Result);
+ if (auto *Wasm = dyn_cast<WasmObjectFile>(Obj))
+ return getRelocationValueString(Wasm, Rel, Result);
+ if (auto *MachO = dyn_cast<MachOObjectFile>(Obj))
+ return getRelocationValueString(MachO, Rel, Result);
+ llvm_unreachable("unknown object file format");
}
/// @brief Indicates whether this relocation should hidden when listing
diff --git a/tools/llvm-pdbutil/DumpOutputStyle.cpp b/tools/llvm-pdbutil/DumpOutputStyle.cpp
index f76635f9e511..a1f919b4dd06 100644
--- a/tools/llvm-pdbutil/DumpOutputStyle.cpp
+++ b/tools/llvm-pdbutil/DumpOutputStyle.cpp
@@ -37,6 +37,7 @@
#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h"
#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
@@ -116,12 +117,14 @@ Error DumpOutputStyle::dump() {
return EC;
}
- if (opts::dump::DumpTypes || opts::dump::DumpTypeExtras) {
+ if (opts::dump::DumpTypes || !opts::dump::DumpTypeIndex.empty() ||
+ opts::dump::DumpTypeExtras) {
if (auto EC = dumpTpiStream(StreamTPI))
return EC;
}
- if (opts::dump::DumpIds || opts::dump::DumpIdExtras) {
+ if (opts::dump::DumpIds || !opts::dump::DumpIdIndex.empty() ||
+ opts::dump::DumpIdExtras) {
if (auto EC = dumpTpiStream(StreamIPI))
return EC;
}
@@ -620,6 +623,76 @@ Error DumpOutputStyle::dumpStringTable() {
return Error::success();
}
+static void buildDepSet(LazyRandomTypeCollection &Types,
+ ArrayRef<TypeIndex> Indices,
+ std::map<TypeIndex, CVType> &DepSet) {
+ SmallVector<TypeIndex, 4> DepList;
+ for (const auto &I : Indices) {
+ TypeIndex TI(I);
+ if (DepSet.find(TI) != DepSet.end() || TI.isSimple() || TI.isNoneType())
+ continue;
+
+ CVType Type = Types.getType(TI);
+ DepSet[TI] = Type;
+ codeview::discoverTypeIndices(Type, DepList);
+ buildDepSet(Types, DepList, DepSet);
+ }
+}
+
+static void dumpFullTypeStream(LinePrinter &Printer,
+ LazyRandomTypeCollection &Types,
+ TpiStream &Stream, bool Bytes, bool Extras) {
+ Printer.formatLine("Showing {0:N} records", Stream.getNumTypeRecords());
+ uint32_t Width =
+ NumDigits(TypeIndex::FirstNonSimpleIndex + Stream.getNumTypeRecords());
+
+ MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types,
+ Stream.getHashValues());
+
+ if (auto EC = codeview::visitTypeStream(Types, V)) {
+ Printer.formatLine("An error occurred dumping type records: {0}",
+ toString(std::move(EC)));
+ }
+}
+
+static void dumpPartialTypeStream(LinePrinter &Printer,
+ LazyRandomTypeCollection &Types,
+ TpiStream &Stream, ArrayRef<TypeIndex> TiList,
+ bool Bytes, bool Extras, bool Deps) {
+ uint32_t Width =
+ NumDigits(TypeIndex::FirstNonSimpleIndex + Stream.getNumTypeRecords());
+
+ MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types,
+ Stream.getHashValues());
+
+ if (opts::dump::DumpTypeDependents) {
+ // If we need to dump all dependents, then iterate each index and find
+ // all dependents, adding them to a map ordered by TypeIndex.
+ std::map<TypeIndex, CVType> DepSet;
+ buildDepSet(Types, TiList, DepSet);
+
+ Printer.formatLine(
+ "Showing {0:N} records and their dependents ({1:N} records total)",
+ TiList.size(), DepSet.size());
+
+ for (auto &Dep : DepSet) {
+ if (auto EC = codeview::visitTypeRecord(Dep.second, Dep.first, V))
+ Printer.formatLine("An error occurred dumping type record {0}: {1}",
+ Dep.first, toString(std::move(EC)));
+ }
+ } else {
+ Printer.formatLine("Showing {0:N} records.", TiList.size());
+
+ for (const auto &I : TiList) {
+ TypeIndex TI(I);
+ CVType Type = Types.getType(TI);
+ if (auto EC = codeview::visitTypeRecord(Type, TI, V))
+ Printer.formatLine("An error occurred dumping type record {0}: {1}", TI,
+ toString(std::move(EC)));
+ }
+ }
+}
+
Error DumpOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
assert(StreamIdx == StreamTPI || StreamIdx == StreamIPI);
@@ -659,27 +732,13 @@ Error DumpOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
auto &Types = Err(initializeTypes(StreamIdx));
- if (DumpTypes) {
- P.formatLine("Showing {0:N} records", Stream.getNumTypeRecords());
- uint32_t Width =
- NumDigits(TypeIndex::FirstNonSimpleIndex + Stream.getNumTypeRecords());
-
- MinimalTypeDumpVisitor V(P, Width + 2, DumpBytes, DumpExtras, Types,
- Stream.getHashValues());
-
- if (Indices.empty()) {
- if (auto EC = codeview::visitTypeStream(Types, V)) {
- P.formatLine("An error occurred dumping type records: {0}",
- toString(std::move(EC)));
- }
- } else {
- for (const auto &I : Indices) {
- TypeIndex TI(I);
- CVType Type = Types.getType(TI);
- if (auto EC = codeview::visitTypeRecord(Type, TI, V))
- P.formatLine("An error occurred dumping type record {0}: {1}", TI,
- toString(std::move(EC)));
- }
+ if (DumpTypes || !Indices.empty()) {
+ if (Indices.empty())
+ dumpFullTypeStream(P, Types, Stream, DumpBytes, DumpExtras);
+ else {
+ std::vector<TypeIndex> TiList(Indices.begin(), Indices.end());
+ dumpPartialTypeStream(P, Types, Stream, TiList, DumpBytes, DumpExtras,
+ opts::dump::DumpTypeDependents);
}
}
@@ -775,7 +834,8 @@ Error DumpOutputStyle::dumpModuleSyms() {
Pipeline.addCallbackToPipeline(Deserializer);
Pipeline.addCallbackToPipeline(Dumper);
CVSymbolVisitor Visitor(Pipeline);
- if (auto EC = Visitor.visitSymbolStream(ModS.getSymbolArray())) {
+ auto SS = ModS.getSymbolsSubstream();
+ if (auto EC = Visitor.visitSymbolStream(ModS.getSymbolArray(), SS.Offset)) {
P.formatLine("Error while processing symbol records. {0}",
toString(std::move(EC)));
continue;
@@ -804,13 +864,14 @@ Error DumpOutputStyle::dumpPublics() {
Pipeline.addCallbackToPipeline(Deserializer);
Pipeline.addCallbackToPipeline(Dumper);
CVSymbolVisitor Visitor(Pipeline);
+
auto ExpectedSymbols = Publics.getSymbolArray();
if (!ExpectedSymbols) {
P.formatLine("Could not read public symbol record stream");
return Error::success();
}
- if (auto EC = Visitor.visitSymbolStream(*ExpectedSymbols))
+ if (auto EC = Visitor.visitSymbolStream(*ExpectedSymbols, 0))
P.formatLine("Error while processing public symbol records. {0}",
toString(std::move(EC)));
diff --git a/tools/llvm-pdbutil/DumpOutputStyle.h b/tools/llvm-pdbutil/DumpOutputStyle.h
index 296a6c14942e..4c52289f052e 100644
--- a/tools/llvm-pdbutil/DumpOutputStyle.h
+++ b/tools/llvm-pdbutil/DumpOutputStyle.h
@@ -37,8 +37,6 @@ private:
Error dumpFileSummary();
Error dumpStreamSummary();
- Error dumpBlockRanges();
- Error dumpStreamBytes();
Error dumpStringTable();
Error dumpLines();
Error dumpInlineeLines();
diff --git a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index 7f5412d59885..ab7045ca4492 100644
--- a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -367,11 +367,17 @@ static std::string formatGaps(uint32_t IndentLevel,
}
Error MinimalSymbolDumper::visitSymbolBegin(codeview::CVSymbol &Record) {
+ return visitSymbolBegin(Record, 0);
+}
+
+Error MinimalSymbolDumper::visitSymbolBegin(codeview::CVSymbol &Record,
+ uint32_t Offset) {
// formatLine puts the newline at the beginning, so we use formatLine here
// to start a new line, and then individual visit methods use format to
// append to the existing line.
- P.formatLine("- {0} [size = {1}]", getSymbolKindName(Record.Type),
- Record.length());
+ P.formatLine("{0} | {1} [size = {2}]",
+ fmt_align(Offset, AlignStyle::Right, 6),
+ getSymbolKindName(Record.Type), Record.length());
P.Indent();
return Error::success();
}
@@ -394,28 +400,28 @@ std::string MinimalSymbolDumper::typeIndex(TypeIndex TI) const {
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, BlockSym &Block) {
P.format(" `{0}`", Block.Name);
- AutoIndent Indent(P);
- P.formatLine("parent = {0}, addr = {1}", Block.Parent,
+ AutoIndent Indent(P, 7);
+ P.formatLine("parent = {0}, end = {1}", Block.Parent, Block.End);
+ P.formatLine("code size = {0}, addr = {1}", Block.CodeSize,
formatSegmentOffset(Block.Segment, Block.CodeOffset));
- P.formatLine("code size = {0}, end = {1}", Block.CodeSize, Block.End);
return Error::success();
}
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, Thunk32Sym &Thunk) {
P.format(" `{0}`", Thunk.Name);
- AutoIndent Indent(P);
- P.formatLine("parent = {0}, addr = {1}", Thunk.Parent,
- formatSegmentOffset(Thunk.Segment, Thunk.Offset));
- P.formatLine("kind = {0}, size = {1}, end = {2}, next = {3}",
- formatThunkOrdinal(Thunk.Thunk), Thunk.Length, Thunk.End,
+ AutoIndent Indent(P, 7);
+ P.formatLine("parent = {0}, end = {1}, next = {2}", Thunk.Parent, Thunk.End,
Thunk.Next);
+ P.formatLine("kind = {0}, size = {1}, addr = {2}",
+ formatThunkOrdinal(Thunk.Thunk), Thunk.Length,
+ formatSegmentOffset(Thunk.Segment, Thunk.Offset));
return Error::success();
}
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
TrampolineSym &Tramp) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("type = {0}, size = {1}, source = {2}, target = {3}",
formatTrampolineType(Tramp.Type), Tramp.Size,
formatSegmentOffset(Tramp.ThunkSection, Tramp.ThunkOffset),
@@ -427,7 +433,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
SectionSym &Section) {
P.format(" `{0}`", Section.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("length = {0}, alignment = {1}, rva = {2}, section # = {3}, "
"characteristics = {4}",
Section.Length, Section.Alignment, Section.Rva,
@@ -437,7 +443,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, CoffGroupSym &CG) {
P.format(" `{0}`", CG.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("length = {0}, addr = {1}, characteristics = {2}", CG.Size,
formatSegmentOffset(CG.Segment, CG.Offset), CG.Characteristics);
return Error::success();
@@ -446,7 +452,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, CoffGroupSym &CG) {
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
BPRelativeSym &BPRel) {
P.format(" `{0}`", BPRel.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("type = {0}, offset = {1}", typeIndex(BPRel.Type), BPRel.Offset);
return Error::success();
}
@@ -459,7 +465,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
CallSiteInfoSym &CSI) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("type = {0}, addr = {1}", typeIndex(CSI.Type),
formatSegmentOffset(CSI.Segment, CSI.CodeOffset));
return Error::success();
@@ -467,6 +473,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
EnvBlockSym &EnvBlock) {
+ AutoIndent Indent(P, 7);
for (const auto &Entry : EnvBlock.Fields) {
P.formatLine("- {0}", Entry);
}
@@ -475,7 +482,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FileStaticSym &FS) {
P.format(" `{0}`", FS.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("type = {0}, file name offset = {1}, flags = {2}",
typeIndex(FS.Index), FS.ModFilenameOffset,
formatLocalSymFlags(P.getIndentLevel() + 9, FS.Flags));
@@ -484,7 +491,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FileStaticSym &FS) {
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, ExportSym &Export) {
P.format(" `{0}`", Export.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("ordinal = {0}, flags = {1}", Export.Ordinal,
formatExportFlags(P.getIndentLevel() + 9, Export.Flags));
return Error::success();
@@ -492,7 +499,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, ExportSym &Export) {
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Compile2Sym &Compile2) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
SourceLanguage Lang = static_cast<SourceLanguage>(
Compile2.Flags & CompileSym2Flags::SourceLanguageMask);
P.formatLine("machine = {0}, ver = {1}, language = {2}",
@@ -512,7 +519,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Compile3Sym &Compile3) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
SourceLanguage Lang = static_cast<SourceLanguage>(
Compile3.Flags & CompileSym3Flags::SourceLanguageMask);
P.formatLine("machine = {0}, Ver = {1}, language = {2}",
@@ -531,7 +538,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
ConstantSym &Constant) {
P.format(" `{0}`", Constant.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("type = {0}, value = {1}", typeIndex(Constant.Type),
Constant.Value.toString(10));
return Error::success();
@@ -539,7 +546,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, DataSym &Data) {
P.format(" `{0}`", Data.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("type = {0}, addr = {1}", typeIndex(Data.Type),
formatSegmentOffset(Data.Segment, Data.DataOffset));
return Error::success();
@@ -553,7 +560,7 @@ Error MinimalSymbolDumper::visitKnownRecord(
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
DefRangeFramePointerRelSym &Def) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("offset = {0}, range = {1}", Def.Offset, formatRange(Def.Range));
P.formatLine("gaps = {2}", Def.Offset,
formatGaps(P.getIndentLevel() + 9, Def.Gaps));
@@ -562,7 +569,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
DefRangeRegisterRelSym &Def) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("register = {0}, base ptr = {1}, offset in parent = {2}, has "
"spilled udt = {3}",
uint16_t(Def.Hdr.Register), int32_t(Def.Hdr.BasePointerOffset),
@@ -574,7 +581,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(
CVSymbol &CVR, DefRangeRegisterSym &DefRangeRegister) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("register = {0}, may have no name = {1}, range start = "
"{2}, length = {3}",
uint16_t(DefRangeRegister.Hdr.Register),
@@ -589,7 +596,7 @@ Error MinimalSymbolDumper::visitKnownRecord(
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
DefRangeSubfieldRegisterSym &Def) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
bool NoName = !!(Def.Hdr.MayHaveNoName == 0);
P.formatLine("register = {0}, may have no name = {1}, offset in parent = {2}",
uint16_t(Def.Hdr.Register), NoName,
@@ -601,7 +608,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
DefRangeSubfieldSym &Def) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("program = {0}, offset in parent = {1}, range = {2}",
Def.Program, Def.OffsetInParent, formatRange(Def.Range));
P.formatLine("gaps = {0}", formatGaps(P.getIndentLevel() + 9, Def.Gaps));
@@ -609,7 +616,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
}
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, DefRangeSym &Def) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("program = {0}, range = {1}", Def.Program,
formatRange(Def.Range));
P.formatLine("gaps = {0}", formatGaps(P.getIndentLevel() + 9, Def.Gaps));
@@ -617,7 +624,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, DefRangeSym &Def) {
}
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameCookieSym &FC) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("code offset = {0}, Register = {1}, kind = {2}, flags = {3}",
FC.CodeOffset, FC.Register, formatCookieKind(FC.CookieKind),
FC.Flags);
@@ -625,7 +632,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameCookieSym &FC) {
}
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameProcSym &FP) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("size = {0}, padding size = {1}, offset to padding = {2}",
FP.TotalFrameBytes, FP.PaddingFrameBytes, FP.OffsetToPadding);
P.formatLine("bytes of callee saved registers = {0}, exception handler addr "
@@ -640,7 +647,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameProcSym &FP) {
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
HeapAllocationSiteSym &HAS) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("type = {0}, addr = {1} call size = {2}", typeIndex(HAS.Type),
formatSegmentOffset(HAS.Segment, HAS.CodeOffset),
HAS.CallInstructionSize);
@@ -648,7 +655,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
}
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, InlineSiteSym &IS) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
auto Bytes = makeArrayRef(IS.AnnotationData);
StringRef Annotations(reinterpret_cast<const char *>(Bytes.begin()),
Bytes.size());
@@ -662,7 +669,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, InlineSiteSym &IS) {
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
RegisterSym &Register) {
P.format(" `{0}`", Register.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("register = {0}, type = {1}",
formatRegisterId(Register.Register), typeIndex(Register.Index));
return Error::success();
@@ -671,7 +678,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
PublicSym32 &Public) {
P.format(" `{0}`", Public.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("flags = {0}, addr = {1}",
formatPublicSymFlags(P.getIndentLevel() + 9, Public.Flags),
formatSegmentOffset(Public.Segment, Public.Offset));
@@ -680,7 +687,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, ProcRefSym &PR) {
P.format(" `{0}`", PR.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("module = {0}, sum name = {1}, offset = {2}", PR.Module,
PR.SumName, PR.SymOffset);
return Error::success();
@@ -689,7 +696,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, ProcRefSym &PR) {
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, LabelSym &Label) {
P.format(" `{0}` (addr = {1})", Label.Name,
formatSegmentOffset(Label.Segment, Label.CodeOffset));
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("flags = {0}",
formatProcSymFlags(P.getIndentLevel() + 9, Label.Flags));
return Error::success();
@@ -697,7 +704,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, LabelSym &Label) {
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, LocalSym &Local) {
P.format(" `{0}`", Local.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
std::string FlagStr =
formatLocalSymFlags(P.getIndentLevel() + 9, Local.Flags);
@@ -713,10 +720,11 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) {
P.format(" `{0}`", Proc.Name);
- AutoIndent Indent(P);
- P.formatLine("parent = {0}, addr = {1}, code size = {2}, end = {3}",
- Proc.Parent, formatSegmentOffset(Proc.Segment, Proc.CodeOffset),
- Proc.CodeSize, Proc.End);
+ AutoIndent Indent(P, 7);
+ P.formatLine("parent = {0}, end = {1}, addr = {2}, code size = {3}",
+ Proc.Parent, Proc.End,
+ formatSegmentOffset(Proc.Segment, Proc.CodeOffset),
+ Proc.CodeSize);
P.formatLine("debug start = {0}, debug end = {1}, flags = {2}", Proc.DbgStart,
Proc.DbgEnd,
formatProcSymFlags(P.getIndentLevel() + 9, Proc.Flags));
@@ -729,7 +737,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
}
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, CallerSym &Caller) {
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
for (const auto &I : Caller.Indices) {
P.formatLine("callee: {0}", typeIndex(I));
}
@@ -739,7 +747,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, CallerSym &Caller) {
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
RegRelativeSym &RegRel) {
P.format(" `{0}`", RegRel.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("type = {0}, register = {1}, offset = {2}",
typeIndex(RegRel.Type), formatRegisterId(RegRel.Register),
RegRel.Offset);
@@ -749,7 +757,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
ThreadLocalDataSym &Data) {
P.format(" `{0}`", Data.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("type = {0}, addr = {1}", typeIndex(Data.Type),
formatSegmentOffset(Data.Segment, Data.DataOffset));
return Error::success();
@@ -757,7 +765,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) {
P.format(" `{0}`", UDT.Name);
- AutoIndent Indent(P);
+ AutoIndent Indent(P, 7);
P.formatLine("original type = {0}", UDT.Type);
return Error::success();
}
diff --git a/tools/llvm-pdbutil/MinimalSymbolDumper.h b/tools/llvm-pdbutil/MinimalSymbolDumper.h
index 451f2da6fd1d..5e30959ea9c0 100644
--- a/tools/llvm-pdbutil/MinimalSymbolDumper.h
+++ b/tools/llvm-pdbutil/MinimalSymbolDumper.h
@@ -27,6 +27,7 @@ public:
: P(P), Types(Types) {}
Error visitSymbolBegin(codeview::CVSymbol &Record) override;
+ Error visitSymbolBegin(codeview::CVSymbol &Record, uint32_t Offset) override;
Error visitSymbolEnd(codeview::CVSymbol &Record) override;
#define SYMBOL_RECORD(EnumName, EnumVal, Name) \
diff --git a/tools/llvm-pdbutil/MinimalTypeDumper.cpp b/tools/llvm-pdbutil/MinimalTypeDumper.cpp
index 22d3a4557c52..1af53e35ed11 100644
--- a/tools/llvm-pdbutil/MinimalTypeDumper.cpp
+++ b/tools/llvm-pdbutil/MinimalTypeDumper.cpp
@@ -377,7 +377,7 @@ Error MinimalTypeDumpVisitor::visitKnownRecord(CVType &CVR,
Error MinimalTypeDumpVisitor::visitKnownRecord(CVType &CVR,
MemberFunctionRecord &MF) {
P.formatLine("return type = {0}, # args = {1}, param list = {2}",
- MF.ParameterCount, MF.ArgumentList, MF.ReturnType);
+ MF.ReturnType, MF.ParameterCount, MF.ArgumentList);
P.formatLine("class type = {0}, this type = {1}, this adjust = {2}",
MF.ClassType, MF.ThisType, MF.ThisPointerAdjustment);
P.formatLine("calling conv = {0}, options = {1}",
diff --git a/tools/llvm-pdbutil/PdbYaml.cpp b/tools/llvm-pdbutil/PdbYaml.cpp
index b4a41fbfdb8f..315ae2e6711f 100644
--- a/tools/llvm-pdbutil/PdbYaml.cpp
+++ b/tools/llvm-pdbutil/PdbYaml.cpp
@@ -30,8 +30,6 @@ using namespace llvm::pdb;
using namespace llvm::pdb::yaml;
using namespace llvm::yaml;
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::StringRef)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::NamedStreamMapping)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbDbiModuleInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::StreamBlockList)
diff --git a/tools/llvm-pdbutil/llvm-pdbutil.cpp b/tools/llvm-pdbutil/llvm-pdbutil.cpp
index 4a176fb13590..ad11ad498000 100644
--- a/tools/llvm-pdbutil/llvm-pdbutil.cpp
+++ b/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -419,6 +419,13 @@ cl::list<uint32_t> DumpIdIndex(
cl::desc("only dump ids with the specified hexadecimal type index"),
cl::cat(TypeOptions), cl::sub(DumpSubcommand));
+cl::opt<bool> DumpTypeDependents(
+ "dependents",
+ cl::desc("In conjunection with -type-index and -id-index, dumps the entire "
+ "dependency graph for the specified index instead of "
+ "just the single record with the specified index"),
+ cl::cat(TypeOptions), cl::sub(DumpSubcommand));
+
// SYMBOL OPTIONS
cl::opt<bool> DumpPublics("publics", cl::desc("dump Publics stream data"),
cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
diff --git a/tools/llvm-pdbutil/llvm-pdbutil.h b/tools/llvm-pdbutil/llvm-pdbutil.h
index 837d8ebbaf9e..9ee5866bbeff 100644
--- a/tools/llvm-pdbutil/llvm-pdbutil.h
+++ b/tools/llvm-pdbutil/llvm-pdbutil.h
@@ -135,6 +135,7 @@ extern llvm::cl::opt<bool> DumpTypes;
extern llvm::cl::opt<bool> DumpTypeData;
extern llvm::cl::opt<bool> DumpTypeExtras;
extern llvm::cl::list<uint32_t> DumpTypeIndex;
+extern llvm::cl::opt<bool> DumpTypeDependents;
extern llvm::cl::opt<bool> DumpIds;
extern llvm::cl::opt<bool> DumpIdData;
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index daa7a643a72f..e5ff3e4186de 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -335,6 +335,7 @@ static const EnumEntry<COFF::MachineTypes> ImageFileMachineType[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_AM33 ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_AMD64 ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_ARM ),
+ LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_ARM64 ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_ARMNT ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_EBC ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_I386 ),
@@ -678,6 +679,7 @@ void COFFDumper::printDOSHeader(const dos_header *DH) {
template <class PEHeader>
void COFFDumper::printPEHeader(const PEHeader *Hdr) {
DictScope D(W, "ImageOptionalHeader");
+ W.printHex ("Magic", Hdr->Magic);
W.printNumber("MajorLinkerVersion", Hdr->MajorLinkerVersion);
W.printNumber("MinorLinkerVersion", Hdr->MinorLinkerVersion);
W.printNumber("SizeOfCode", Hdr->SizeOfCode);
diff --git a/tools/llvm-readobj/WasmDumper.cpp b/tools/llvm-readobj/WasmDumper.cpp
index 21614297e467..14603f8a2b09 100644
--- a/tools/llvm-readobj/WasmDumper.cpp
+++ b/tools/llvm-readobj/WasmDumper.cpp
@@ -183,6 +183,7 @@ void WasmDumper::printSymbol(const SymbolRef &Sym) {
WasmSymbol Symbol = Obj->getWasmSymbol(Sym.getRawDataRefImpl());
W.printString("Name", Symbol.Name);
W.printEnum("Type", static_cast<unsigned>(Symbol.Type), makeArrayRef(WasmSymbolTypes));
+ W.printHex("Flags", Symbol.Flags);
}
}
diff --git a/tools/obj2yaml/wasm2yaml.cpp b/tools/obj2yaml/wasm2yaml.cpp
index ad0075caac1d..1df6afcf3c46 100644
--- a/tools/obj2yaml/wasm2yaml.cpp
+++ b/tools/obj2yaml/wasm2yaml.cpp
@@ -77,6 +77,8 @@ std::unique_ptr<WasmYAML::CustomSection> WasmDumper::dumpCustomSection(const Was
LinkingSec->SymbolInfos.push_back(Info);
}
}
+ LinkingSec->DataSize = Obj.linkingData().DataSize;
+ LinkingSec->DataAlignment = Obj.linkingData().DataAlignment;
CustomSec = std::move(LinkingSec);
} else {
CustomSec = make_unique<WasmYAML::CustomSection>(WasmSec.Name);
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 9d489ab5a2d2..24cce58047f1 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -242,6 +242,11 @@ static cl::opt<bool> PassRemarksWithHotness(
cl::desc("With PGO, include profile count in optimization remarks"),
cl::Hidden);
+static cl::opt<unsigned> PassRemarksHotnessThreshold(
+ "pass-remarks-hotness-threshold",
+ cl::desc("Minimum profile count required for an optimization remark to be output"),
+ cl::Hidden);
+
static cl::opt<std::string>
RemarksFilename("pass-remarks-output",
cl::desc("YAML output filename for pass remarks"),
@@ -420,7 +425,10 @@ int main(int argc, char **argv) {
Context.enableDebugTypeODRUniquing();
if (PassRemarksWithHotness)
- Context.setDiagnosticHotnessRequested(true);
+ Context.setDiagnosticsHotnessRequested(true);
+
+ if (PassRemarksHotnessThreshold)
+ Context.setDiagnosticsHotnessThreshold(PassRemarksHotnessThreshold);
std::unique_ptr<tool_output_file> YamlFile;
if (RemarksFilename != "") {
diff --git a/tools/yaml2obj/yaml2wasm.cpp b/tools/yaml2obj/yaml2wasm.cpp
index cf591e16317c..110700d40c32 100644
--- a/tools/yaml2obj/yaml2wasm.cpp
+++ b/tools/yaml2obj/yaml2wasm.cpp
@@ -110,22 +110,52 @@ static int writeInitExpr(const wasm::WasmInitExpr &InitExpr, raw_ostream &OS) {
return 0;
}
+class SubSectionWriter {
+ raw_ostream &OS;
+ std::string OutString;
+ raw_string_ostream StringStream;
+
+public:
+ SubSectionWriter(raw_ostream &OS) : OS(OS), StringStream(OutString) {}
+
+ void Done() {
+ StringStream.flush();
+ encodeULEB128(OutString.size(), OS);
+ OS << OutString;
+ OutString.clear();
+ }
+
+ raw_ostream& GetStream() {
+ return StringStream;
+ }
+};
+
int WasmWriter::writeSectionContent(raw_ostream &OS, WasmYAML::LinkingSection &Section) {
writeStringRef(Section.Name, OS);
+
+ SubSectionWriter SubSection(OS);
+
+ // DATA_SIZE subsection
+ encodeULEB128(wasm::WASM_DATA_SIZE, OS);
+ encodeULEB128(Section.DataSize, SubSection.GetStream());
+ SubSection.Done();
+
+ // DATA_ALIGNMENT subsection
+ encodeULEB128(wasm::WASM_DATA_ALIGNMENT, OS);
+ encodeULEB128(Section.DataAlignment, SubSection.GetStream());
+ SubSection.Done();
+
+ // SYMBOL_INFO subsection
if (Section.SymbolInfos.size()) {
encodeULEB128(wasm::WASM_SYMBOL_INFO, OS);
- std::string OutString;
- raw_string_ostream StringStream(OutString);
- encodeULEB128(Section.SymbolInfos.size(), StringStream);
+ encodeULEB128(Section.SymbolInfos.size(), SubSection.GetStream());
for (const WasmYAML::SymbolInfo &Info : Section.SymbolInfos) {
- writeStringRef(Info.Name, StringStream);
- encodeULEB128(Info.Flags, StringStream);
+ writeStringRef(Info.Name, SubSection.GetStream());
+ encodeULEB128(Info.Flags, SubSection.GetStream());
}
- StringStream.flush();
- encodeULEB128(OutString.size(), OS);
- OS << OutString;
+ SubSection.Done();
}
return 0;
}
@@ -134,18 +164,16 @@ int WasmWriter::writeSectionContent(raw_ostream &OS, WasmYAML::NameSection &Sect
writeStringRef(Section.Name, OS);
if (Section.FunctionNames.size()) {
encodeULEB128(wasm::WASM_NAMES_FUNCTION, OS);
- std::string OutString;
- raw_string_ostream StringStream(OutString);
- encodeULEB128(Section.FunctionNames.size(), StringStream);
+ SubSectionWriter SubSection(OS);
+
+ encodeULEB128(Section.FunctionNames.size(), SubSection.GetStream());
for (const WasmYAML::NameEntry &NameEntry : Section.FunctionNames) {
- encodeULEB128(NameEntry.Index, StringStream);
- writeStringRef(NameEntry.Name, StringStream);
+ encodeULEB128(NameEntry.Index, SubSection.GetStream());
+ writeStringRef(NameEntry.Name, SubSection.GetStream());
}
- StringStream.flush();
- encodeULEB128(OutString.size(), OS);
- OS << OutString;
+ SubSection.Done();
}
return 0;
}
diff --git a/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp b/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp
index 882df5f25216..0e881759656d 100644
--- a/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp
+++ b/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp
@@ -117,4 +117,23 @@ TEST(LegalizerInfoTest, MultipleTypes) {
ASSERT_EQ(L.getAction({G_PTRTOINT, 1, p0}),
std::make_pair(LegalizerInfo::Legal, p0));
}
+
+TEST(LegalizerInfoTest, MultipleSteps) {
+ using namespace TargetOpcode;
+ LegalizerInfo L;
+ LLT s16 = LLT::scalar(16);
+ LLT s32 = LLT::scalar(32);
+ LLT s64 = LLT::scalar(64);
+
+ L.setAction({G_UREM, 0, s16}, LegalizerInfo::WidenScalar);
+ L.setAction({G_UREM, 0, s32}, LegalizerInfo::Lower);
+ L.setAction({G_UREM, 0, s64}, LegalizerInfo::Lower);
+
+ L.computeTables();
+
+ ASSERT_EQ(L.getAction({G_UREM, LLT::scalar(16)}),
+ std::make_pair(LegalizerInfo::WidenScalar, LLT::scalar(32)));
+ ASSERT_EQ(L.getAction({G_UREM, LLT::scalar(32)}),
+ std::make_pair(LegalizerInfo::Lower, LLT::scalar(32)));
+}
}
diff --git a/unittests/DebugInfo/DWARF/CMakeLists.txt b/unittests/DebugInfo/DWARF/CMakeLists.txt
index ed512a92ef18..1966472a9467 100644
--- a/unittests/DebugInfo/DWARF/CMakeLists.txt
+++ b/unittests/DebugInfo/DWARF/CMakeLists.txt
@@ -17,3 +17,5 @@ set(DebugInfoSources
add_llvm_unittest(DebugInfoDWARFTests
${DebugInfoSources}
)
+
+target_link_libraries(DebugInfoDWARFTests LLVMTestingSupport)
diff --git a/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
index bf004d6ffc23..6ffb710d2549 100644
--- a/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
+++ b/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
@@ -15,16 +15,22 @@
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/ObjectYAML/DWARFEmitter.h"
#include "llvm/ObjectYAML/DWARFYAML.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"
#include <climits>
#include <cstdint>
@@ -57,28 +63,19 @@ Triple getHostTripleForAddrSize(uint8_t AddrSize) {
return PT;
}
-/// Take any llvm::Expected and check and handle any errors.
-///
-/// \param Expected a llvm::Excepted instance to check.
-/// \returns true if there were errors, false otherwise.
-template <typename T>
-static bool HandleExpectedError(T &Expected) {
- std::string ErrorMsg;
- handleAllErrors(Expected.takeError(), [&](const ErrorInfoBase &EI) {
- ErrorMsg = EI.message();
- });
- if (!ErrorMsg.empty()) {
- ::testing::AssertionFailure() << "error: " << ErrorMsg;
- return true;
- }
- return false;
+static bool isConfigurationSupported(Triple &T) {
+ initLLVMIfNeeded();
+ std::string Err;
+ return TargetRegistry::lookupTarget(T.getTriple(), Err);
}
template <uint16_t Version, class AddrType, class RefAddrType>
void TestAllForms() {
- // Test that we can decode all DW_FORM values correctly.
+ Triple Triple = getHostTripleForAddrSize(sizeof(AddrType));
+ if (!isConfigurationSupported(Triple))
+ return;
- const uint8_t AddrSize = sizeof(AddrType);
+ // Test that we can decode all DW_FORM values correctly.
const AddrType AddrValue = (AddrType)0x0123456789abcdefULL;
const uint8_t BlockData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 0};
const uint32_t BlockSize = sizeof(BlockData);
@@ -97,11 +94,9 @@ void TestAllForms() {
const uint32_t Dwarf32Values[] = {1, 2, 3, 4, 5, 6, 7, 8};
const char *StringValue = "Hello";
const char *StrpValue = "World";
- initLLVMIfNeeded();
- Triple Triple = getHostTripleForAddrSize(AddrSize);
+
auto ExpectedDG = dwarfgen::Generator::create(Triple, Version);
- if (HandleExpectedError(ExpectedDG))
- return;
+ ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
dwarfgen::Generator *DG = ExpectedDG.get().get();
dwarfgen::CompileUnit &CU = DG->addCompileUnit();
dwarfgen::DIE CUDie = CU.getUnitDIE();
@@ -427,16 +422,16 @@ TEST(DWARFDebugInfo, TestDWARF32Version5Addr8AllForms) {
}
template <uint16_t Version, class AddrType> void TestChildren() {
+ Triple Triple = getHostTripleForAddrSize(sizeof(AddrType));
+ if (!isConfigurationSupported(Triple))
+ return;
+
// Test that we can decode DW_FORM_ref_addr values correctly in DWARF 2 with
// 4 byte addresses. DW_FORM_ref_addr values should be 4 bytes when using
// 8 byte addresses.
- const uint8_t AddrSize = sizeof(AddrType);
- initLLVMIfNeeded();
- Triple Triple = getHostTripleForAddrSize(AddrSize);
auto ExpectedDG = dwarfgen::Generator::create(Triple, Version);
- if (HandleExpectedError(ExpectedDG))
- return;
+ ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
dwarfgen::Generator *DG = ExpectedDG.get().get();
dwarfgen::CompileUnit &CU = DG->addCompileUnit();
dwarfgen::DIE CUDie = CU.getUnitDIE();
@@ -551,14 +546,13 @@ TEST(DWARFDebugInfo, TestDWARF32Version4Addr8Children) {
}
template <uint16_t Version, class AddrType> void TestReferences() {
- // Test that we can decode DW_FORM_refXXX values correctly in DWARF.
+ Triple Triple = getHostTripleForAddrSize(sizeof(AddrType));
+ if (!isConfigurationSupported(Triple))
+ return;
- const uint8_t AddrSize = sizeof(AddrType);
- initLLVMIfNeeded();
- Triple Triple = getHostTripleForAddrSize(AddrSize);
+ // Test that we can decode DW_FORM_refXXX values correctly in DWARF.
auto ExpectedDG = dwarfgen::Generator::create(Triple, Version);
- if (HandleExpectedError(ExpectedDG))
- return;
+ ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
dwarfgen::Generator *DG = ExpectedDG.get().get();
dwarfgen::CompileUnit &CU1 = DG->addCompileUnit();
dwarfgen::CompileUnit &CU2 = DG->addCompileUnit();
@@ -800,22 +794,22 @@ TEST(DWARFDebugInfo, TestDWARF32Version4Addr8References) {
}
template <uint16_t Version, class AddrType> void TestAddresses() {
+ Triple Triple = getHostTripleForAddrSize(sizeof(AddrType));
+ if (!isConfigurationSupported(Triple))
+ return;
+
// Test the DWARF APIs related to accessing the DW_AT_low_pc and
// DW_AT_high_pc.
- const uint8_t AddrSize = sizeof(AddrType);
const bool SupportsHighPCAsOffset = Version >= 4;
- initLLVMIfNeeded();
- Triple Triple = getHostTripleForAddrSize(AddrSize);
auto ExpectedDG = dwarfgen::Generator::create(Triple, Version);
- if (HandleExpectedError(ExpectedDG))
- return;
+ ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
dwarfgen::Generator *DG = ExpectedDG.get().get();
dwarfgen::CompileUnit &CU = DG->addCompileUnit();
dwarfgen::DIE CUDie = CU.getUnitDIE();
-
+
CUDie.addAttribute(DW_AT_name, DW_FORM_strp, "/tmp/main.c");
CUDie.addAttribute(DW_AT_language, DW_FORM_data2, DW_LANG_C);
-
+
// Create a subprogram DIE with no low or high PC.
dwarfgen::DIE SubprogramNoPC = CUDie.addChild(DW_TAG_subprogram);
SubprogramNoPC.addAttribute(DW_AT_name, DW_FORM_strp, "no_pc");
@@ -838,18 +832,18 @@ template <uint16_t Version, class AddrType> void TestAddresses() {
ActualHighPCOffset);
else
SubprogramLowHighPC.addAttribute(DW_AT_high_pc, DW_FORM_addr, ActualHighPC);
-
+
StringRef FileBytes = DG->generate();
MemoryBufferRef FileBuffer(FileBytes, "dwarf");
auto Obj = object::ObjectFile::createObjectFile(FileBuffer);
EXPECT_TRUE((bool)Obj);
DWARFContextInMemory DwarfContext(*Obj.get());
-
+
// Verify the number of compile units is correct.
uint32_t NumCUs = DwarfContext.getNumCompileUnits();
EXPECT_EQ(NumCUs, 1u);
DWARFCompileUnit *U = DwarfContext.getCompileUnitAtIndex(0);
-
+
// Get the compile unit DIE is valid.
auto DieDG = U->getUnitDIE(false);
EXPECT_TRUE(DieDG.isValid());
@@ -873,7 +867,7 @@ template <uint16_t Version, class AddrType> void TestAddresses() {
OptU64 = SubprogramDieNoPC.getHighPC(ActualLowPC);
EXPECT_FALSE((bool)OptU64);
EXPECT_FALSE(SubprogramDieNoPC.getLowAndHighPC(LowPC, HighPC, SectionIndex));
-
+
// Verify the that our subprogram with only a low PC value succeeds when
// we ask for the Low PC, but fails appropriately when asked for the high PC
// or both low and high PC values.
@@ -971,19 +965,18 @@ TEST(DWARFDebugInfo, TestDWARF32Version4Addr8Addresses) {
}
TEST(DWARFDebugInfo, TestRelations) {
+ Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+ if (!isConfigurationSupported(Triple))
+ return;
+
// Test the DWARF APIs related to accessing the DW_AT_low_pc and
// DW_AT_high_pc.
uint16_t Version = 4;
-
- const uint8_t AddrSize = sizeof(void *);
- initLLVMIfNeeded();
- Triple Triple = getHostTripleForAddrSize(AddrSize);
auto ExpectedDG = dwarfgen::Generator::create(Triple, Version);
- if (HandleExpectedError(ExpectedDG))
- return;
+ ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
dwarfgen::Generator *DG = ExpectedDG.get().get();
dwarfgen::CompileUnit &CU = DG->addCompileUnit();
-
+
enum class Tag: uint16_t {
A = dwarf::DW_TAG_lo_user,
B,
@@ -1020,34 +1013,34 @@ TEST(DWARFDebugInfo, TestRelations) {
auto Obj = object::ObjectFile::createObjectFile(FileBuffer);
EXPECT_TRUE((bool)Obj);
DWARFContextInMemory DwarfContext(*Obj.get());
-
+
// Verify the number of compile units is correct.
uint32_t NumCUs = DwarfContext.getNumCompileUnits();
EXPECT_EQ(NumCUs, 1u);
DWARFCompileUnit *U = DwarfContext.getCompileUnitAtIndex(0);
-
+
// Get the compile unit DIE is valid.
auto CUDie = U->getUnitDIE(false);
EXPECT_TRUE(CUDie.isValid());
-
+
// The compile unit doesn't have a parent or a sibling.
auto ParentDie = CUDie.getParent();
EXPECT_FALSE(ParentDie.isValid());
auto SiblingDie = CUDie.getSibling();
EXPECT_FALSE(SiblingDie.isValid());
-
+
// Get the children of the compile unit
auto A = CUDie.getFirstChild();
auto B = A.getFirstChild();
auto C = B.getSibling();
auto D = C.getSibling();
auto Null = D.getSibling();
-
+
// Verify NULL Die is NULL and has no children or siblings
EXPECT_TRUE(Null.isNULL());
EXPECT_FALSE(Null.getSibling().isValid());
EXPECT_FALSE(Null.getFirstChild().isValid());
-
+
// Verify all children of the compile unit DIE are correct.
EXPECT_EQ(A.getTag(), (dwarf::Tag)Tag::A);
EXPECT_EQ(B.getTag(), (dwarf::Tag)Tag::B);
@@ -1063,7 +1056,7 @@ TEST(DWARFDebugInfo, TestRelations) {
// Make sure the parent of all the children of the compile unit are the
// compile unit.
EXPECT_EQ(A.getParent(), CUDie);
-
+
// Make sure the parent of all the children of A are the A.
// B is the first child in A, so we need to verify we can get the previous
// DIE as the parent.
@@ -1082,7 +1075,7 @@ TEST(DWARFDebugInfo, TestRelations) {
auto C1 = C.getFirstChild();
auto C2 = C1.getSibling();
EXPECT_TRUE(C2.getSibling().isNULL());
-
+
// Verify all children of the B DIE correctly valid or invalid.
EXPECT_EQ(C1.getTag(), (dwarf::Tag)Tag::C1);
EXPECT_EQ(C2.getTag(), (dwarf::Tag)Tag::C2);
@@ -1102,24 +1095,23 @@ TEST(DWARFDebugInfo, TestDWARFDie) {
}
TEST(DWARFDebugInfo, TestChildIterators) {
+ Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+ if (!isConfigurationSupported(Triple))
+ return;
+
// Test the DWARF APIs related to iterating across the children of a DIE using
// the DWARFDie::iterator class.
uint16_t Version = 4;
-
- const uint8_t AddrSize = sizeof(void *);
- initLLVMIfNeeded();
- Triple Triple = getHostTripleForAddrSize(AddrSize);
auto ExpectedDG = dwarfgen::Generator::create(Triple, Version);
- if (HandleExpectedError(ExpectedDG))
- return;
+ ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
dwarfgen::Generator *DG = ExpectedDG.get().get();
dwarfgen::CompileUnit &CU = DG->addCompileUnit();
-
+
enum class Tag: uint16_t {
A = dwarf::DW_TAG_lo_user,
B,
};
-
+
// Scope to allow us to re-use the same DIE names
{
// Create DWARF tree that looks like:
@@ -1131,24 +1123,24 @@ TEST(DWARFDebugInfo, TestChildIterators) {
CUDie.addChild((dwarf::Tag)Tag::A);
CUDie.addChild((dwarf::Tag)Tag::B);
}
-
+
MemoryBufferRef FileBuffer(DG->generate(), "dwarf");
auto Obj = object::ObjectFile::createObjectFile(FileBuffer);
EXPECT_TRUE((bool)Obj);
DWARFContextInMemory DwarfContext(*Obj.get());
-
+
// Verify the number of compile units is correct.
uint32_t NumCUs = DwarfContext.getNumCompileUnits();
EXPECT_EQ(NumCUs, 1u);
DWARFCompileUnit *U = DwarfContext.getCompileUnitAtIndex(0);
-
+
// Get the compile unit DIE is valid.
auto CUDie = U->getUnitDIE(false);
EXPECT_TRUE(CUDie.isValid());
uint32_t Index;
DWARFDie A;
DWARFDie B;
-
+
// Verify the compile unit DIE's children.
Index = 0;
for (auto Die : CUDie.children()) {
@@ -1157,7 +1149,7 @@ TEST(DWARFDebugInfo, TestChildIterators) {
case 1: B = Die; break;
}
}
-
+
EXPECT_EQ(A.getTag(), (dwarf::Tag)Tag::A);
EXPECT_EQ(B.getTag(), (dwarf::Tag)Tag::B);
@@ -1206,7 +1198,7 @@ TEST(DWARFDebugInfo, TestEmptyChildren) {
// Get the compile unit DIE is valid.
auto CUDie = U->getUnitDIE(false);
EXPECT_TRUE(CUDie.isValid());
-
+
// Verify that the CU Die that says it has children, but doesn't, actually
// has begin and end iterators that are equal. We want to make sure we don't
// see the Null DIEs during iteration.
@@ -1214,21 +1206,20 @@ TEST(DWARFDebugInfo, TestEmptyChildren) {
}
TEST(DWARFDebugInfo, TestAttributeIterators) {
+ Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+ if (!isConfigurationSupported(Triple))
+ return;
+
// Test the DWARF APIs related to iterating across all attribute values in a
// a DWARFDie.
uint16_t Version = 4;
-
- const uint8_t AddrSize = sizeof(void *);
- initLLVMIfNeeded();
- Triple Triple = getHostTripleForAddrSize(AddrSize);
auto ExpectedDG = dwarfgen::Generator::create(Triple, Version);
- if (HandleExpectedError(ExpectedDG))
- return;
+ ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
dwarfgen::Generator *DG = ExpectedDG.get().get();
dwarfgen::CompileUnit &CU = DG->addCompileUnit();
const uint64_t CULowPC = 0x1000;
StringRef CUPath("/tmp/main.c");
-
+
// Scope to allow us to re-use the same DIE names
{
auto CUDie = CU.getUnitDIE();
@@ -1240,53 +1231,52 @@ TEST(DWARFDebugInfo, TestAttributeIterators) {
// Encode an attribute value after an attribute with no data.
CUDie.addAttribute(DW_AT_low_pc, DW_FORM_addr, CULowPC);
}
-
+
MemoryBufferRef FileBuffer(DG->generate(), "dwarf");
auto Obj = object::ObjectFile::createObjectFile(FileBuffer);
EXPECT_TRUE((bool)Obj);
DWARFContextInMemory DwarfContext(*Obj.get());
-
+
// Verify the number of compile units is correct.
uint32_t NumCUs = DwarfContext.getNumCompileUnits();
EXPECT_EQ(NumCUs, 1u);
DWARFCompileUnit *U = DwarfContext.getCompileUnitAtIndex(0);
-
+
// Get the compile unit DIE is valid.
auto CUDie = U->getUnitDIE(false);
EXPECT_TRUE(CUDie.isValid());
-
+
auto R = CUDie.attributes();
auto I = R.begin();
auto E = R.end();
-
+
ASSERT_NE(E, I);
EXPECT_EQ(I->Attr, DW_AT_name);
auto ActualCUPath = I->Value.getAsCString();
EXPECT_EQ(CUPath, *ActualCUPath);
-
+
ASSERT_NE(E, ++I);
EXPECT_EQ(I->Attr, DW_AT_declaration);
EXPECT_EQ(1ull, *I->Value.getAsUnsignedConstant());
-
+
ASSERT_NE(E, ++I);
EXPECT_EQ(I->Attr, DW_AT_low_pc);
EXPECT_EQ(CULowPC, *I->Value.getAsAddress());
-
+
EXPECT_EQ(E, ++I);
}
TEST(DWARFDebugInfo, TestFindRecurse) {
+ Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+ if (!isConfigurationSupported(Triple))
+ return;
+
uint16_t Version = 4;
-
- const uint8_t AddrSize = sizeof(void *);
- initLLVMIfNeeded();
- Triple Triple = getHostTripleForAddrSize(AddrSize);
auto ExpectedDG = dwarfgen::Generator::create(Triple, Version);
- if (HandleExpectedError(ExpectedDG))
- return;
+ ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
dwarfgen::Generator *DG = ExpectedDG.get().get();
dwarfgen::CompileUnit &CU = DG->addCompileUnit();
-
+
StringRef SpecDieName = "spec";
StringRef SpecLinkageName = "spec_linkage";
StringRef AbsDieName = "abs";
@@ -1305,21 +1295,21 @@ TEST(DWARFDebugInfo, TestFindRecurse) {
VarAbsDie.addAttribute(DW_AT_name, DW_FORM_strp, AbsDieName);
VarDie.addAttribute(DW_AT_abstract_origin, DW_FORM_ref4, VarAbsDie);
}
-
+
MemoryBufferRef FileBuffer(DG->generate(), "dwarf");
auto Obj = object::ObjectFile::createObjectFile(FileBuffer);
EXPECT_TRUE((bool)Obj);
DWARFContextInMemory DwarfContext(*Obj.get());
-
+
// Verify the number of compile units is correct.
uint32_t NumCUs = DwarfContext.getNumCompileUnits();
EXPECT_EQ(NumCUs, 1u);
DWARFCompileUnit *U = DwarfContext.getCompileUnitAtIndex(0);
-
+
// Get the compile unit DIE is valid.
auto CUDie = U->getUnitDIE(false);
EXPECT_TRUE(CUDie.isValid());
-
+
auto FuncSpecDie = CUDie.getFirstChild();
auto FuncAbsDie = FuncSpecDie.getSibling();
auto FuncDie = FuncAbsDie.getSibling();
@@ -1344,11 +1334,11 @@ TEST(DWARFDebugInfo, TestFindRecurse) {
auto LinkageNameOpt = FuncDie.findRecursively(DW_AT_linkage_name);
EXPECT_EQ(SpecLinkageName, toString(LinkageNameOpt).getValueOr(nullptr));
-
+
// Make sure we can't extract the name from the abstract origin die when using
// DWARFDie::find() since it won't check the DW_AT_abstract_origin DIE.
EXPECT_FALSE(VarDie.find(DW_AT_name));
-
+
// Make sure we can extract the name from the abstract origin die when using
// DWARFDie::findRecursively() since it should recurse through the
// DW_AT_abstract_origin DIE.
@@ -1408,7 +1398,7 @@ TEST(DWARFDebugInfo, TestDwarfToFunctions) {
FormVal.setForm(DW_FORM_udata);
FormVal.setUValue(UData8);
FormValOpt = FormVal;
-
+
EXPECT_FALSE(toString(FormValOpt).hasValue());
EXPECT_TRUE(toUnsigned(FormValOpt).hasValue());
EXPECT_FALSE(toReference(FormValOpt).hasValue());
@@ -1428,7 +1418,7 @@ TEST(DWARFDebugInfo, TestDwarfToFunctions) {
FormVal.setForm(DW_FORM_ref_addr);
FormVal.setUValue(RefData);
FormValOpt = FormVal;
-
+
EXPECT_FALSE(toString(FormValOpt).hasValue());
EXPECT_FALSE(toUnsigned(FormValOpt).hasValue());
EXPECT_TRUE(toReference(FormValOpt).hasValue());
@@ -1448,7 +1438,7 @@ TEST(DWARFDebugInfo, TestDwarfToFunctions) {
FormVal.setForm(DW_FORM_udata);
FormVal.setSValue(SData8);
FormValOpt = FormVal;
-
+
EXPECT_FALSE(toString(FormValOpt).hasValue());
EXPECT_TRUE(toUnsigned(FormValOpt).hasValue());
EXPECT_FALSE(toReference(FormValOpt).hasValue());
@@ -1469,7 +1459,7 @@ TEST(DWARFDebugInfo, TestDwarfToFunctions) {
FormVal.setForm(DW_FORM_block1);
FormVal.setBlockValue(Array);
FormValOpt = FormVal;
-
+
EXPECT_FALSE(toString(FormValOpt).hasValue());
EXPECT_FALSE(toUnsigned(FormValOpt).hasValue());
EXPECT_FALSE(toReference(FormValOpt).hasValue());
@@ -1490,19 +1480,18 @@ TEST(DWARFDebugInfo, TestDwarfToFunctions) {
}
TEST(DWARFDebugInfo, TestFindAttrs) {
+ Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+ if (!isConfigurationSupported(Triple))
+ return;
+
// Test the DWARFDie::find() and DWARFDie::findRecursively() that take an
// ArrayRef<dwarf::Attribute> value to make sure they work correctly.
uint16_t Version = 4;
-
- const uint8_t AddrSize = sizeof(void *);
- initLLVMIfNeeded();
- Triple Triple = getHostTripleForAddrSize(AddrSize);
auto ExpectedDG = dwarfgen::Generator::create(Triple, Version);
- if (HandleExpectedError(ExpectedDG))
- return;
+ ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
dwarfgen::Generator *DG = ExpectedDG.get().get();
dwarfgen::CompileUnit &CU = DG->addCompileUnit();
-
+
StringRef DieMangled("_Z3fooi");
// Scope to allow us to re-use the same DIE names
{
@@ -1512,21 +1501,21 @@ TEST(DWARFDebugInfo, TestFindAttrs) {
FuncSpecDie.addAttribute(DW_AT_MIPS_linkage_name, DW_FORM_strp, DieMangled);
FuncDie.addAttribute(DW_AT_specification, DW_FORM_ref4, FuncSpecDie);
}
-
+
MemoryBufferRef FileBuffer(DG->generate(), "dwarf");
auto Obj = object::ObjectFile::createObjectFile(FileBuffer);
EXPECT_TRUE((bool)Obj);
DWARFContextInMemory DwarfContext(*Obj.get());
-
+
// Verify the number of compile units is correct.
uint32_t NumCUs = DwarfContext.getNumCompileUnits();
EXPECT_EQ(NumCUs, 1u);
DWARFCompileUnit *U = DwarfContext.getCompileUnitAtIndex(0);
-
+
// Get the compile unit DIE is valid.
auto CUDie = U->getUnitDIE(false);
EXPECT_TRUE(CUDie.isValid());
-
+
auto FuncSpecDie = CUDie.getFirstChild();
auto FuncDie = FuncSpecDie.getSibling();
@@ -1543,7 +1532,7 @@ TEST(DWARFDebugInfo, TestFindAttrs) {
// Make sure we can't extract the linkage name attributes when using
// DWARFDie::find() since it won't check the DW_AT_specification DIE.
EXPECT_FALSE(FuncDie.find(Attrs).hasValue());
-
+
// Make sure we can extract the name from the specification die when using
// DWARFDie::findRecursively() since it should recurse through the
// DW_AT_specification DIE.
@@ -1553,14 +1542,13 @@ TEST(DWARFDebugInfo, TestFindAttrs) {
}
TEST(DWARFDebugInfo, TestImplicitConstAbbrevs) {
- uint16_t Version = 5;
+ Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+ if (!isConfigurationSupported(Triple))
+ return;
- const uint8_t AddrSize = sizeof(void *);
- initLLVMIfNeeded();
- Triple Triple = getHostTripleForAddrSize(AddrSize);
+ uint16_t Version = 5;
auto ExpectedDG = dwarfgen::Generator::create(Triple, Version);
- if (HandleExpectedError(ExpectedDG))
- return;
+ ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
dwarfgen::Generator *DG = ExpectedDG.get().get();
dwarfgen::CompileUnit &CU = DG->addCompileUnit();
dwarfgen::DIE CUDie = CU.getUnitDIE();
@@ -2073,38 +2061,38 @@ TEST(DWARFDebugInfo, TestDwarfVerifyCUDontShareLineTable) {
- ''
- /tmp/main.c
- /tmp/foo.c
- debug_abbrev:
+ debug_abbrev:
- Code: 0x00000001
Tag: DW_TAG_compile_unit
Children: DW_CHILDREN_no
- Attributes:
+ Attributes:
- Attribute: DW_AT_name
Form: DW_FORM_strp
- Attribute: DW_AT_stmt_list
Form: DW_FORM_sec_offset
- debug_info:
- - Length:
+ debug_info:
+ - Length:
TotalLength: 16
Version: 4
AbbrOffset: 0
AddrSize: 8
- Entries:
+ Entries:
- AbbrCode: 0x00000001
- Values:
+ Values:
- Value: 0x0000000000000001
- Value: 0x0000000000000000
- - Length:
+ - Length:
TotalLength: 16
Version: 4
AbbrOffset: 0
AddrSize: 8
- Entries:
+ Entries:
- AbbrCode: 0x00000001
- Values:
+ Values:
- Value: 0x000000000000000D
- Value: 0x0000000000000000
- debug_line:
- - Length:
+ debug_line:
+ - Length:
TotalLength: 60
Version: 2
PrologueLength: 34
@@ -2114,14 +2102,14 @@ TEST(DWARFDebugInfo, TestDwarfVerifyCUDontShareLineTable) {
LineRange: 14
OpcodeBase: 13
StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
- IncludeDirs:
+ IncludeDirs:
- /tmp
- Files:
+ Files:
- Name: main.c
DirIdx: 1
ModTime: 0
Length: 0
- Opcodes:
+ Opcodes:
- Opcode: DW_LNS_extended_op
ExtLen: 9
SubOpcode: DW_LNE_set_address
@@ -2146,4 +2134,48 @@ TEST(DWARFDebugInfo, TestDwarfVerifyCUDontShareLineTable) {
"offset:");
}
+TEST(DWARFDebugInfo, TestErrorReportingPolicy) {
+ Triple Triple("x86_64-pc-linux");
+ if (!isConfigurationSupported(Triple))
+ return;
+
+ auto ExpectedDG = dwarfgen::Generator::create(Triple, 4 /*DwarfVersion*/);
+ ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded());
+ dwarfgen::Generator *DG = ExpectedDG.get().get();
+ AsmPrinter *AP = DG->getAsmPrinter();
+ MCContext *MC = DG->getMCContext();
+
+ // Emit two compressed sections with broken headers.
+ AP->OutStreamer->SwitchSection(
+ MC->getELFSection(".zdebug_foo", 0 /*Type*/, 0 /*Flags*/));
+ AP->OutStreamer->EmitBytes("0");
+ AP->OutStreamer->SwitchSection(
+ MC->getELFSection(".zdebug_bar", 0 /*Type*/, 0 /*Flags*/));
+ AP->OutStreamer->EmitBytes("0");
+
+ MemoryBufferRef FileBuffer(DG->generate(), "dwarf");
+ auto Obj = object::ObjectFile::createObjectFile(FileBuffer);
+ EXPECT_TRUE((bool)Obj);
+
+ // Case 1: error handler handles all errors. That allows
+ // DWARFContextInMemory
+ // to parse whole file and find both two errors we know about.
+ int Errors = 0;
+ DWARFContextInMemory Ctx1(*Obj.get(), nullptr, [&](Error E) {
+ ++Errors;
+ consumeError(std::move(E));
+ return ErrorPolicy::Continue;
+ });
+ EXPECT_TRUE(Errors == 2);
+
+ // Case 2: error handler stops parsing of object after first error.
+ Errors = 0;
+ DWARFContextInMemory Ctx2(*Obj.get(), nullptr, [&](Error E) {
+ ++Errors;
+ consumeError(std::move(E));
+ return ErrorPolicy::Halt;
+ });
+ EXPECT_TRUE(Errors == 1);
+}
+
} // end anonymous namespace
diff --git a/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp b/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp
index 621796b684d6..f283ac0961cc 100644
--- a/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp
+++ b/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp
@@ -97,8 +97,8 @@ DWARFFormValue createDataXFormValue(dwarf::Form Form, RawTypeT Value) {
memcpy(Raw, &Value, sizeof(RawTypeT));
uint32_t Offset = 0;
DWARFFormValue Result(Form);
- DataExtractor Data(StringRef(Raw, sizeof(RawTypeT)),
- sys::IsLittleEndianHost, sizeof(void*));
+ DWARFDataExtractor Data(StringRef(Raw, sizeof(RawTypeT)),
+ sys::IsLittleEndianHost, sizeof(void *));
Result.extractValue(Data, &Offset, nullptr);
return Result;
}
@@ -109,7 +109,7 @@ DWARFFormValue createULEBFormValue(uint64_t Value) {
encodeULEB128(Value, OS);
uint32_t Offset = 0;
DWARFFormValue Result(DW_FORM_udata);
- DataExtractor Data(OS.str(), sys::IsLittleEndianHost, sizeof(void*));
+ DWARFDataExtractor Data(OS.str(), sys::IsLittleEndianHost, sizeof(void *));
Result.extractValue(Data, &Offset, nullptr);
return Result;
}
@@ -120,7 +120,7 @@ DWARFFormValue createSLEBFormValue(int64_t Value) {
encodeSLEB128(Value, OS);
uint32_t Offset = 0;
DWARFFormValue Result(DW_FORM_sdata);
- DataExtractor Data(OS.str(), sys::IsLittleEndianHost, sizeof(void*));
+ DWARFDataExtractor Data(OS.str(), sys::IsLittleEndianHost, sizeof(void *));
Result.extractValue(Data, &Offset, nullptr);
return Result;
}
diff --git a/unittests/DebugInfo/DWARF/DwarfGenerator.h b/unittests/DebugInfo/DWARF/DwarfGenerator.h
index 76665e5193e8..dd7e8709638d 100644
--- a/unittests/DebugInfo/DWARF/DwarfGenerator.h
+++ b/unittests/DebugInfo/DWARF/DwarfGenerator.h
@@ -215,6 +215,7 @@ public:
BumpPtrAllocator &getAllocator() { return Allocator; }
AsmPrinter *getAsmPrinter() const { return Asm.get(); }
+ MCContext *getMCContext() const { return MC.get(); }
DIEAbbrevSet &getAbbrevSet() { return Abbreviations; }
DwarfStringPool &getStringPool() { return *StringPool; }
diff --git a/unittests/IR/DominatorTreeTest.cpp b/unittests/IR/DominatorTreeTest.cpp
index 232f0cbd4ed9..fa3dad8a2ab1 100644
--- a/unittests/IR/DominatorTreeTest.cpp
+++ b/unittests/IR/DominatorTreeTest.cpp
@@ -220,6 +220,7 @@ TEST(DominatorTree, Unreachable) {
EXPECT_EQ(PostDominatedBBs.size(), 0UL);
// Check DFS Numbers before
+ DT->updateDFSNumbers();
EXPECT_EQ(DT->getNode(BB0)->getDFSNumIn(), 0UL);
EXPECT_EQ(DT->getNode(BB0)->getDFSNumOut(), 7UL);
EXPECT_EQ(DT->getNode(BB1)->getDFSNumIn(), 1UL);
@@ -229,12 +230,19 @@ TEST(DominatorTree, Unreachable) {
EXPECT_EQ(DT->getNode(BB4)->getDFSNumIn(), 3UL);
EXPECT_EQ(DT->getNode(BB4)->getDFSNumOut(), 4UL);
+ // Check levels before
+ EXPECT_EQ(DT->getNode(BB0)->getLevel(), 0U);
+ EXPECT_EQ(DT->getNode(BB1)->getLevel(), 1U);
+ EXPECT_EQ(DT->getNode(BB2)->getLevel(), 1U);
+ EXPECT_EQ(DT->getNode(BB4)->getLevel(), 1U);
+
// Reattach block 3 to block 1 and recalculate
BB1->getTerminator()->eraseFromParent();
BranchInst::Create(BB4, BB3, ConstantInt::getTrue(F.getContext()), BB1);
DT->recalculate(F);
// Check DFS Numbers after
+ DT->updateDFSNumbers();
EXPECT_EQ(DT->getNode(BB0)->getDFSNumIn(), 0UL);
EXPECT_EQ(DT->getNode(BB0)->getDFSNumOut(), 9UL);
EXPECT_EQ(DT->getNode(BB1)->getDFSNumIn(), 1UL);
@@ -246,6 +254,13 @@ TEST(DominatorTree, Unreachable) {
EXPECT_EQ(DT->getNode(BB4)->getDFSNumIn(), 5UL);
EXPECT_EQ(DT->getNode(BB4)->getDFSNumOut(), 6UL);
+ // Check levels after
+ EXPECT_EQ(DT->getNode(BB0)->getLevel(), 0U);
+ EXPECT_EQ(DT->getNode(BB1)->getLevel(), 1U);
+ EXPECT_EQ(DT->getNode(BB2)->getLevel(), 1U);
+ EXPECT_EQ(DT->getNode(BB3)->getLevel(), 2U);
+ EXPECT_EQ(DT->getNode(BB4)->getLevel(), 1U);
+
// Change root node
DT->verifyDomTree();
BasicBlock *NewEntry =
diff --git a/unittests/ProfileData/CoverageMappingTest.cpp b/unittests/ProfileData/CoverageMappingTest.cpp
index ea51f717a1db..1d621f4060ca 100644
--- a/unittests/ProfileData/CoverageMappingTest.cpp
+++ b/unittests/ProfileData/CoverageMappingTest.cpp
@@ -232,15 +232,17 @@ struct CoverageMappingTest : ::testing::TestWithParam<std::pair<bool, bool>> {
}
Expected<std::unique_ptr<CoverageMapping>> readOutputFunctions() {
- if (!UseMultipleReaders) {
- CoverageMappingReaderMock CovReader(OutputFunctions);
- return CoverageMapping::load(CovReader, *ProfileReader);
- }
-
std::vector<std::unique_ptr<CoverageMappingReader>> CoverageReaders;
- for (const auto &OF : OutputFunctions) {
- ArrayRef<OutputFunctionCoverageData> Funcs(OF);
- CoverageReaders.push_back(make_unique<CoverageMappingReaderMock>(Funcs));
+ if (UseMultipleReaders) {
+ for (const auto &OF : OutputFunctions) {
+ ArrayRef<OutputFunctionCoverageData> Funcs(OF);
+ CoverageReaders.push_back(
+ make_unique<CoverageMappingReaderMock>(Funcs));
+ }
+ } else {
+ ArrayRef<OutputFunctionCoverageData> Funcs(OutputFunctions);
+ CoverageReaders.push_back(
+ make_unique<CoverageMappingReaderMock>(Funcs));
}
return CoverageMapping::load(CoverageReaders, *ProfileReader);
}
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index e2a6561089bc..641163e39ed3 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_unittest(SupportTests
DebugTest.cpp
EndianStreamTest.cpp
EndianTest.cpp
+ ErrnoTest.cpp
ErrorOrTest.cpp
ErrorTest.cpp
FileOutputBufferTest.cpp
diff --git a/unittests/Support/ErrnoTest.cpp b/unittests/Support/ErrnoTest.cpp
new file mode 100644
index 000000000000..67f834a938da
--- /dev/null
+++ b/unittests/Support/ErrnoTest.cpp
@@ -0,0 +1,36 @@
+//===- ErrnoTest.cpp - Error handling unit tests --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Errno.h"
+#include "gtest/gtest.h"
+
+using namespace llvm::sys;
+
+TEST(ErrnoTest, RetryAfterSignal) {
+ EXPECT_EQ(1, RetryAfterSignal(-1, [] { return 1; }));
+
+ EXPECT_EQ(-1, RetryAfterSignal(-1, [] {
+ errno = EAGAIN;
+ return -1;
+ }));
+ EXPECT_EQ(EAGAIN, errno);
+
+ unsigned calls = 0;
+ EXPECT_EQ(1, RetryAfterSignal(-1, [&calls] {
+ errno = EINTR;
+ ++calls;
+ return calls == 1 ? -1 : 1;
+ }));
+ EXPECT_EQ(2u, calls);
+
+ EXPECT_EQ(1, RetryAfterSignal(-1, [](int x) { return x; }, 1));
+
+ std::unique_ptr<int> P(RetryAfterSignal(nullptr, [] { return new int(47); }));
+ EXPECT_EQ(47, *P);
+}
diff --git a/unittests/Support/YAMLIOTest.cpp b/unittests/Support/YAMLIOTest.cpp
index 21c8430f9588..5cf0e9d0f5b3 100644
--- a/unittests/Support/YAMLIOTest.cpp
+++ b/unittests/Support/YAMLIOTest.cpp
@@ -1029,7 +1029,8 @@ TEST(YAMLIO, TestReadWriteBlockScalarValue) {
LLVM_YAML_STRONG_TYPEDEF(int, MyNumber)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(MyNumber)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::StringRef)
+LLVM_YAML_STRONG_TYPEDEF(llvm::StringRef, MyString)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(MyString)
namespace llvm {
namespace yaml {
@@ -1049,12 +1050,23 @@ namespace yaml {
static bool mustQuote(StringRef) { return false; }
};
+
+ template <> struct ScalarTraits<MyString> {
+ using Impl = ScalarTraits<StringRef>;
+ static void output(const MyString &V, void *Ctx, raw_ostream &OS) {
+ Impl::output(V, Ctx, OS);
+ }
+ static StringRef input(StringRef S, void *Ctx, MyString &V) {
+ return Impl::input(S, Ctx, V.value);
+ }
+ static bool mustQuote(StringRef S) { return Impl::mustQuote(S); }
+ };
}
}
struct NameAndNumbers {
llvm::StringRef name;
- std::vector<llvm::StringRef> strings;
+ std::vector<MyString> strings;
std::vector<MyNumber> single;
std::vector<MyNumber> numbers;
};
@@ -1128,8 +1140,8 @@ TEST(YAMLIO, TestReadWriteMyFlowSequence) {
EXPECT_FALSE(yin.error());
EXPECT_TRUE(map2.name.equals("hello"));
EXPECT_EQ(map2.strings.size(), 2UL);
- EXPECT_TRUE(map2.strings[0].equals("one"));
- EXPECT_TRUE(map2.strings[1].equals("two"));
+ EXPECT_TRUE(map2.strings[0].value.equals("one"));
+ EXPECT_TRUE(map2.strings[1].value.equals("two"));
EXPECT_EQ(map2.single.size(), 1UL);
EXPECT_EQ(1, map2.single[0]);
EXPECT_EQ(map2.numbers.size(), 3UL);
@@ -1739,7 +1751,6 @@ TEST(YAMLIO, TestFlagsReadError) {
//
// Test error handling reading built-in uint8_t type
//
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t)
TEST(YAMLIO, TestReadBuiltInTypesUint8Error) {
std::vector<uint8_t> seq;
Input yin("---\n"
@@ -1758,7 +1769,6 @@ TEST(YAMLIO, TestReadBuiltInTypesUint8Error) {
//
// Test error handling reading built-in uint16_t type
//
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint16_t)
TEST(YAMLIO, TestReadBuiltInTypesUint16Error) {
std::vector<uint16_t> seq;
Input yin("---\n"
@@ -1777,7 +1787,6 @@ TEST(YAMLIO, TestReadBuiltInTypesUint16Error) {
//
// Test error handling reading built-in uint32_t type
//
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
TEST(YAMLIO, TestReadBuiltInTypesUint32Error) {
std::vector<uint32_t> seq;
Input yin("---\n"
@@ -1796,7 +1805,6 @@ TEST(YAMLIO, TestReadBuiltInTypesUint32Error) {
//
// Test error handling reading built-in uint64_t type
//
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint64_t)
TEST(YAMLIO, TestReadBuiltInTypesUint64Error) {
std::vector<uint64_t> seq;
Input yin("---\n"
@@ -1815,7 +1823,6 @@ TEST(YAMLIO, TestReadBuiltInTypesUint64Error) {
//
// Test error handling reading built-in int8_t type
//
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(int8_t)
TEST(YAMLIO, TestReadBuiltInTypesint8OverError) {
std::vector<int8_t> seq;
Input yin("---\n"
@@ -1853,7 +1860,6 @@ TEST(YAMLIO, TestReadBuiltInTypesint8UnderError) {
//
// Test error handling reading built-in int16_t type
//
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(int16_t)
TEST(YAMLIO, TestReadBuiltInTypesint16UnderError) {
std::vector<int16_t> seq;
Input yin("---\n"
@@ -1892,7 +1898,6 @@ TEST(YAMLIO, TestReadBuiltInTypesint16OverError) {
//
// Test error handling reading built-in int32_t type
//
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(int32_t)
TEST(YAMLIO, TestReadBuiltInTypesint32UnderError) {
std::vector<int32_t> seq;
Input yin("---\n"
@@ -1930,7 +1935,6 @@ TEST(YAMLIO, TestReadBuiltInTypesint32OverError) {
//
// Test error handling reading built-in int64_t type
//
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(int64_t)
TEST(YAMLIO, TestReadBuiltInTypesint64UnderError) {
std::vector<int64_t> seq;
Input yin("---\n"
@@ -1967,7 +1971,6 @@ TEST(YAMLIO, TestReadBuiltInTypesint64OverError) {
//
// Test error handling reading built-in float type
//
-LLVM_YAML_IS_SEQUENCE_VECTOR(float)
TEST(YAMLIO, TestReadBuiltInTypesFloatError) {
std::vector<float> seq;
Input yin("---\n"
@@ -1986,7 +1989,6 @@ TEST(YAMLIO, TestReadBuiltInTypesFloatError) {
//
// Test error handling reading built-in float type
//
-LLVM_YAML_IS_SEQUENCE_VECTOR(double)
TEST(YAMLIO, TestReadBuiltInTypesDoubleError) {
std::vector<double> seq;
Input yin("---\n"
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 03914ef98952..e48ba3845326 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -3837,11 +3837,11 @@ void CodeGenDAGPatterns::GenerateVariants() {
if (AlreadyExists) continue;
// Otherwise, add it to the list of patterns we have.
- PatternsToMatch.emplace_back(
+ PatternsToMatch.push_back(PatternToMatch(
PatternsToMatch[i].getSrcRecord(), PatternsToMatch[i].getPredicates(),
Variant, PatternsToMatch[i].getDstPattern(),
PatternsToMatch[i].getDstRegs(),
- PatternsToMatch[i].getAddedComplexity(), Record::getNewUID());
+ PatternsToMatch[i].getAddedComplexity(), Record::getNewUID()));
}
DEBUG(errs() << "\n");
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index 3907336221a4..d4a21a986c58 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -915,6 +915,84 @@ void CodeGenRegisterClass::computeSubClasses(CodeGenRegBank &RegBank) {
RC.inheritProperties(RegBank);
}
+Optional<std::pair<CodeGenRegisterClass *, CodeGenRegisterClass *>>
+CodeGenRegisterClass::getMatchingSubClassWithSubRegs(
+ CodeGenRegBank &RegBank, const CodeGenSubRegIndex *SubIdx) const {
+ auto SizeOrder = [](const CodeGenRegisterClass *A,
+ const CodeGenRegisterClass *B) {
+ return A->getMembers().size() > B->getMembers().size();
+ };
+
+ auto &RegClasses = RegBank.getRegClasses();
+
+ // Find all the subclasses of this one that fully support the sub-register
+ // index and order them by size. BiggestSuperRC should always be first.
+ CodeGenRegisterClass *BiggestSuperRegRC = getSubClassWithSubReg(SubIdx);
+ if (!BiggestSuperRegRC)
+ return None;
+ BitVector SuperRegRCsBV = BiggestSuperRegRC->getSubClasses();
+ std::vector<CodeGenRegisterClass *> SuperRegRCs;
+ for (auto &RC : RegClasses)
+ if (SuperRegRCsBV[RC.EnumValue])
+ SuperRegRCs.emplace_back(&RC);
+ std::sort(SuperRegRCs.begin(), SuperRegRCs.end(), SizeOrder);
+ assert(SuperRegRCs.front() == BiggestSuperRegRC && "Biggest class wasn't first");
+
+ // Find all the subreg classes and order them by size too.
+ std::vector<std::pair<CodeGenRegisterClass *, BitVector>> SuperRegClasses;
+ for (auto &RC: RegClasses) {
+ BitVector SuperRegClassesBV(RegClasses.size());
+ RC.getSuperRegClasses(SubIdx, SuperRegClassesBV);
+ if (SuperRegClassesBV.any())
+ SuperRegClasses.push_back(std::make_pair(&RC, SuperRegClassesBV));
+ }
+ std::sort(SuperRegClasses.begin(), SuperRegClasses.end(),
+ [&](const std::pair<CodeGenRegisterClass *, BitVector> &A,
+ const std::pair<CodeGenRegisterClass *, BitVector> &B) {
+ return SizeOrder(A.first, B.first);
+ });
+
+ // Find the biggest subclass and subreg class such that R:subidx is in the
+ // subreg class for all R in subclass.
+ //
+ // For example:
+ // All registers in X86's GR64 have a sub_32bit subregister but no class
+ // exists that contains all the 32-bit subregisters because GR64 contains RIP
+ // but GR32 does not contain EIP. Instead, we constrain SuperRegRC to
+ // GR32_with_sub_8bit (which is identical to GR32_with_sub_32bit) and then,
+ // having excluded RIP, we are able to find a SubRegRC (GR32).
+ CodeGenRegisterClass *ChosenSuperRegClass = nullptr;
+ CodeGenRegisterClass *SubRegRC = nullptr;
+ for (auto *SuperRegRC : SuperRegRCs) {
+ for (const auto &SuperRegClassPair : SuperRegClasses) {
+ const BitVector &SuperRegClassBV = SuperRegClassPair.second;
+ if (SuperRegClassBV[SuperRegRC->EnumValue]) {
+ SubRegRC = SuperRegClassPair.first;
+ ChosenSuperRegClass = SuperRegRC;
+
+ // If SubRegRC is bigger than SuperRegRC then there are members of
+ // SubRegRC that don't have super registers via SubIdx. Keep looking to
+ // find a better fit and fall back on this one if there isn't one.
+ //
+ // This is intended to prevent X86 from making odd choices such as
+ // picking LOW32_ADDR_ACCESS_RBP instead of GR32 in the example above.
+ // LOW32_ADDR_ACCESS_RBP is a valid choice but contains registers that
+ // aren't subregisters of SuperRegRC whereas GR32 has a direct 1:1
+ // mapping.
+ if (SuperRegRC->getMembers().size() >= SubRegRC->getMembers().size())
+ return std::make_pair(ChosenSuperRegClass, SubRegRC);
+ }
+ }
+
+ // If we found a fit but it wasn't quite ideal because SubRegRC had excess
+ // registers, then we're done.
+ if (ChosenSuperRegClass)
+ return std::make_pair(ChosenSuperRegClass, SubRegRC);
+ }
+
+ return None;
+}
+
void CodeGenRegisterClass::getSuperRegClasses(const CodeGenSubRegIndex *SubIdx,
BitVector &Out) const {
auto FindI = SuperRegClasses.find(SubIdx);
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 1fcba8a135d1..d0f96a035ea1 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -329,6 +329,9 @@ namespace llvm {
const std::string &getName() const { return Name; }
std::string getQualifiedName() const;
ArrayRef<MVT::SimpleValueType> getValueTypes() const {return VTs;}
+ bool hasValueType(MVT::SimpleValueType VT) const {
+ return std::find(VTs.begin(), VTs.end(), VT) != VTs.end();
+ }
unsigned getNumValueTypes() const { return VTs.size(); }
MVT::SimpleValueType getValueTypeNum(unsigned VTNum) const {
@@ -360,6 +363,18 @@ namespace llvm {
return SubClassWithSubReg.lookup(SubIdx);
}
+ /// Find largest subclass where all registers have SubIdx subregisters in
+ /// SubRegClass and the largest subregister class that contains those
+ /// subregisters without (as far as possible) also containing additional registers.
+ ///
+ /// This can be used to find a suitable pair of classes for subregister copies.
+ /// \return std::pair<SubClass, SubRegClass> where SubClass is a SubClass is
+ /// a class where every register has SubIdx and SubRegClass is a class where
+ /// every register is covered by the SubIdx subregister of SubClass.
+ Optional<std::pair<CodeGenRegisterClass *, CodeGenRegisterClass *>>
+ getMatchingSubClassWithSubRegs(CodeGenRegBank &RegBank,
+ const CodeGenSubRegIndex *SubIdx) const;
+
void setSubClassWithSubReg(const CodeGenSubRegIndex *SubIdx,
CodeGenRegisterClass *SubRC) {
SubClassWithSubReg[SubIdx] = SubRC;
@@ -370,7 +385,7 @@ namespace llvm {
void getSuperRegClasses(const CodeGenSubRegIndex *SubIdx,
BitVector &Out) const;
- // addSuperRegClass - Add a class containing only SudIdx super-registers.
+ // addSuperRegClass - Add a class containing only SubIdx super-registers.
void addSuperRegClass(CodeGenSubRegIndex *SubIdx,
CodeGenRegisterClass *SuperRC) {
SuperRegClasses[SubIdx].insert(SuperRC);
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index 20f6047052ff..50569b2ad989 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -140,6 +140,7 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK,
// Populate each CodeGenProcModel's WriteResDefs, ReadAdvanceDefs, and
// ProcResourceDefs.
+ DEBUG(dbgs() << "\n+++ RESOURCE DEFINITIONS (collectProcResources) +++\n");
collectProcResources();
checkCompleteness();
@@ -160,6 +161,7 @@ void CodeGenSchedModels::collectProcModels() {
ProcModelMap[NoModelDef] = 0;
// For each processor, find a unique machine model.
+ DEBUG(dbgs() << "+++ PROCESSOR MODELs (addProcModel) +++\n");
for (unsigned i = 0, N = ProcRecords.size(); i < N; ++i)
addProcModel(ProcRecords[i]);
}
@@ -315,6 +317,7 @@ void CodeGenSchedModels::collectSchedRW() {
RW.Aliases.push_back(*AI);
}
DEBUG(
+ dbgs() << "\n+++ SCHED READS and WRITES (collectSchedRW) +++\n";
for (unsigned WIdx = 0, WEnd = SchedWrites.size(); WIdx != WEnd; ++WIdx) {
dbgs() << WIdx << ": ";
SchedWrites[WIdx].dump();
@@ -531,6 +534,7 @@ void CodeGenSchedModels::collectSchedClasses() {
// Create classes for InstRW defs.
RecVec InstRWDefs = Records.getAllDerivedDefinitions("InstRW");
std::sort(InstRWDefs.begin(), InstRWDefs.end(), LessRecord());
+ DEBUG(dbgs() << "\n+++ SCHED CLASSES (createInstRWClass) +++\n");
for (RecIter OI = InstRWDefs.begin(), OE = InstRWDefs.end(); OI != OE; ++OI)
createInstRWClass(*OI);
@@ -541,6 +545,7 @@ void CodeGenSchedModels::collectSchedClasses() {
if (!EnableDump)
return;
+ dbgs() << "\n+++ ITINERARIES and/or MACHINE MODELS (collectSchedClasses) +++\n";
for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
StringRef InstName = Inst->TheDef->getName();
unsigned SCIdx = InstrClassMap.lookup(Inst->TheDef);
@@ -790,6 +795,7 @@ bool CodeGenSchedModels::hasItineraries() const {
// Gather the processor itineraries.
void CodeGenSchedModels::collectProcItins() {
+ DEBUG(dbgs() << "\n+++ PROBLEM ITINERARIES (collectProcItins) +++\n");
for (CodeGenProcModel &ProcModel : ProcModels) {
if (!ProcModel.hasItineraries())
continue;
@@ -860,6 +866,7 @@ void CodeGenSchedModels::collectProcUnsupportedFeatures() {
/// Infer new classes from existing classes. In the process, this may create new
/// SchedWrites from sequences of existing SchedWrites.
void CodeGenSchedModels::inferSchedClasses() {
+ DEBUG(dbgs() << "\n+++ INFERRING SCHED CLASSES (inferSchedClasses) +++\n");
DEBUG(dbgs() << NumInstrSchedClasses << " instr sched classes.\n");
// Visit all existing classes and newly created classes.
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index 6bda9ca5f96f..c672b0acac9f 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -208,7 +208,7 @@ public:
Children.resize(NC);
}
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == Scope;
}
@@ -233,7 +233,7 @@ public:
const std::string &getWhatFor() const { return WhatFor; }
unsigned getResultNo() const { return ResultNo; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == RecordNode;
}
@@ -265,7 +265,7 @@ public:
const std::string &getWhatFor() const { return WhatFor; }
unsigned getResultNo() const { return ResultNo; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == RecordChild;
}
@@ -281,7 +281,7 @@ class RecordMemRefMatcher : public Matcher {
public:
RecordMemRefMatcher() : Matcher(RecordMemRef) {}
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == RecordMemRef;
}
@@ -297,7 +297,7 @@ class CaptureGlueInputMatcher : public Matcher {
public:
CaptureGlueInputMatcher() : Matcher(CaptureGlueInput) {}
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CaptureGlueInput;
}
@@ -315,7 +315,7 @@ public:
unsigned getChildNo() const { return ChildNo; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == MoveChild;
}
@@ -332,7 +332,7 @@ class MoveParentMatcher : public Matcher {
public:
MoveParentMatcher() : Matcher(MoveParent) {}
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == MoveParent;
}
@@ -352,7 +352,7 @@ public:
unsigned getMatchNumber() const { return MatchNumber; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckSame;
}
@@ -376,7 +376,7 @@ public:
unsigned getChildNo() const { return ChildNo; }
unsigned getMatchNumber() const { return MatchNumber; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckChildSame;
}
@@ -399,7 +399,7 @@ public:
StringRef getPredicate() const { return Predicate; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckPatternPredicate;
}
@@ -419,7 +419,7 @@ public:
TreePredicateFn getPredicate() const;
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckPredicate;
}
@@ -441,7 +441,7 @@ public:
const SDNodeInfo &getOpcode() const { return Opcode; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckOpcode;
}
@@ -462,7 +462,7 @@ public:
: Matcher(SwitchOpcode), Cases(cases.begin(), cases.end()) {}
~SwitchOpcodeMatcher() override;
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == SwitchOpcode;
}
@@ -489,7 +489,7 @@ public:
MVT::SimpleValueType getType() const { return Type; }
unsigned getResNo() const { return ResNo; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckType;
}
@@ -512,7 +512,7 @@ public:
: Matcher(SwitchType), Cases(cases.begin(), cases.end()) {}
~SwitchTypeMatcher() override;
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == SwitchType;
}
@@ -540,7 +540,7 @@ public:
unsigned getChildNo() const { return ChildNo; }
MVT::SimpleValueType getType() const { return Type; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckChildType;
}
@@ -564,7 +564,7 @@ public:
int64_t getValue() const { return Value; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckInteger;
}
@@ -588,7 +588,7 @@ public:
unsigned getChildNo() const { return ChildNo; }
int64_t getValue() const { return Value; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckChildInteger;
}
@@ -611,7 +611,7 @@ public:
StringRef getCondCodeName() const { return CondCodeName; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckCondCode;
}
@@ -632,7 +632,7 @@ public:
StringRef getTypeName() const { return TypeName; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckValueType;
}
@@ -673,7 +673,7 @@ public:
const std::string getName() const { return Name; }
unsigned getFirstResult() const { return FirstResult; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckComplexPat;
}
@@ -695,7 +695,7 @@ public:
int64_t getValue() const { return Value; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckAndImm;
}
@@ -716,7 +716,7 @@ public:
int64_t getValue() const { return Value; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckOrImm;
}
@@ -734,7 +734,7 @@ public:
CheckFoldableChainNodeMatcher()
: Matcher(CheckFoldableChainNode) {}
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CheckFoldableChainNode;
}
@@ -754,7 +754,7 @@ public:
int64_t getValue() const { return Val; }
MVT::SimpleValueType getVT() const { return VT; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == EmitInteger;
}
@@ -778,7 +778,7 @@ public:
const std::string &getValue() const { return Val; }
MVT::SimpleValueType getVT() const { return VT; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == EmitStringInteger;
}
@@ -803,7 +803,7 @@ public:
const CodeGenRegister *getReg() const { return Reg; }
MVT::SimpleValueType getVT() const { return VT; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == EmitRegister;
}
@@ -826,7 +826,7 @@ public:
unsigned getSlot() const { return Slot; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == EmitConvertToTarget;
}
@@ -854,7 +854,7 @@ public:
return ChainNodes[i];
}
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == EmitMergeInputChains;
}
@@ -878,7 +878,7 @@ public:
unsigned getSrcSlot() const { return SrcSlot; }
Record *getDestPhysReg() const { return DestPhysReg; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == EmitCopyToReg;
}
@@ -904,7 +904,7 @@ public:
unsigned getSlot() const { return Slot; }
Record *getNodeXForm() const { return NodeXForm; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == EmitNodeXForm;
}
@@ -964,7 +964,7 @@ public:
bool hasMemRefs() const { return HasMemRefs; }
int getNumFixedArityOperands() const { return NumFixedArityOperands; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == EmitNode || N->getKind() == MorphNodeTo;
}
@@ -991,7 +991,7 @@ public:
unsigned getFirstResultSlot() const { return FirstResultSlot; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == EmitNode;
}
@@ -1015,7 +1015,7 @@ public:
const PatternToMatch &getPattern() const { return Pattern; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == MorphNodeTo;
}
};
@@ -1036,7 +1036,7 @@ public:
unsigned getResult(unsigned R) const { return Results[R]; }
const PatternToMatch &getPattern() const { return Pattern; }
- static inline bool classof(const Matcher *N) {
+ static bool classof(const Matcher *N) {
return N->getKind() == CompleteMatch;
}
diff --git a/utils/TableGen/GlobalISelEmitter.cpp b/utils/TableGen/GlobalISelEmitter.cpp
index 03d231a153dc..924ed8f65c2c 100644
--- a/utils/TableGen/GlobalISelEmitter.cpp
+++ b/utils/TableGen/GlobalISelEmitter.cpp
@@ -80,8 +80,8 @@ public:
return;
}
if (Ty.isVector()) {
- OS << "LLT::vector(" << Ty.getNumElements() << ", " << Ty.getScalarSizeInBits()
- << ")";
+ OS << "LLT::vector(" << Ty.getNumElements() << ", "
+ << Ty.getScalarSizeInBits() << ")";
return;
}
llvm_unreachable("Unhandled LLT");
@@ -96,7 +96,8 @@ class InstructionMatcher;
static Optional<LLTCodeGen> MVTToLLT(MVT::SimpleValueType SVT) {
MVT VT(SVT);
if (VT.isVector() && VT.getVectorNumElements() != 1)
- return LLTCodeGen(LLT::vector(VT.getVectorNumElements(), VT.getScalarSizeInBits()));
+ return LLTCodeGen(
+ LLT::vector(VT.getVectorNumElements(), VT.getScalarSizeInBits()));
if (VT.isInteger() || VT.isFloatingPoint())
return LLTCodeGen(LLT::scalar(VT.getSizeInBits()));
return None;
@@ -241,12 +242,18 @@ public:
return *static_cast<Kind *>(Predicates.back().get());
}
- typename PredicateVec::const_iterator predicates_begin() const { return Predicates.begin(); }
- typename PredicateVec::const_iterator predicates_end() const { return Predicates.end(); }
+ typename PredicateVec::const_iterator predicates_begin() const {
+ return Predicates.begin();
+ }
+ typename PredicateVec::const_iterator predicates_end() const {
+ return Predicates.end();
+ }
iterator_range<typename PredicateVec::const_iterator> predicates() const {
return make_range(predicates_begin(), predicates_end());
}
- typename PredicateVec::size_type predicates_size() const { return Predicates.size(); }
+ typename PredicateVec::size_type predicates_size() const {
+ return Predicates.size();
+ }
/// Emit a C++ expression that tests whether all the predicates are met.
template <class... Args>
@@ -600,7 +607,8 @@ public:
/// Compare the priority of this object and B.
///
/// Returns true if this object is more important than B.
- virtual bool isHigherPriorityThan(const InstructionPredicateMatcher &B) const {
+ virtual bool
+ isHigherPriorityThan(const InstructionPredicateMatcher &B) const {
return Kind < B.Kind;
};
@@ -631,7 +639,8 @@ public:
/// Compare the priority of this object and B.
///
/// Returns true if this object is more important than B.
- bool isHigherPriorityThan(const InstructionPredicateMatcher &B) const override {
+ bool
+ isHigherPriorityThan(const InstructionPredicateMatcher &B) const override {
if (InstructionPredicateMatcher::isHigherPriorityThan(B))
return true;
if (B.InstructionPredicateMatcher::isHigherPriorityThan(*this))
@@ -832,7 +841,13 @@ public:
//===- Actions ------------------------------------------------------------===//
class OperandRenderer {
public:
- enum RendererKind { OR_Copy, OR_Imm, OR_Register, OR_ComplexPattern };
+ enum RendererKind {
+ OR_Copy,
+ OR_CopySubReg,
+ OR_Imm,
+ OR_Register,
+ OR_ComplexPattern
+ };
protected:
RendererKind Kind;
@@ -877,6 +892,42 @@ public:
}
};
+/// A CopySubRegRenderer emits code to copy a single register operand from an
+/// existing instruction to the one being built and indicate that only a
+/// subregister should be copied.
+class CopySubRegRenderer : public OperandRenderer {
+protected:
+ /// The matcher for the instruction that this operand is copied from.
+ /// This provides the facility for looking up an a operand by it's name so
+ /// that it can be used as a source for the instruction being built.
+ const InstructionMatcher &Matched;
+ /// The name of the operand.
+ const StringRef SymbolicName;
+ /// The subregister to extract.
+ const CodeGenSubRegIndex *SubReg;
+
+public:
+ CopySubRegRenderer(const InstructionMatcher &Matched, StringRef SymbolicName,
+ const CodeGenSubRegIndex *SubReg)
+ : OperandRenderer(OR_CopySubReg), Matched(Matched),
+ SymbolicName(SymbolicName), SubReg(SubReg) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_CopySubReg;
+ }
+
+ const StringRef getSymbolicName() const { return SymbolicName; }
+
+ void emitCxxRenderStmts(raw_ostream &OS, RuleMatcher &Rule) const override {
+ const OperandMatcher &Operand = Matched.getOperand(SymbolicName);
+ StringRef InsnVarName =
+ Rule.getInsnVarName(Operand.getInstructionMatcher());
+ std::string OperandExpr = Operand.getOperandExpr(InsnVarName);
+ OS << " MIB.addReg(" << OperandExpr << ".getReg() /*" << SymbolicName
+ << "*/, 0, " << SubReg->EnumValue << ");\n";
+ }
+};
+
/// Adds a specific physical register to the instruction being built.
/// This is typically useful for WZR/XZR on AArch64.
class AddRegisterRenderer : public OperandRenderer {
@@ -1076,7 +1127,8 @@ public:
void emitCxxActionStmts(raw_ostream &OS, RuleMatcher &Rule,
StringRef RecycleVarName) const override {
- OS << " constrainSelectedInstRegOperands(" << Name << ", TII, TRI, RBI);\n";
+ OS << " constrainSelectedInstRegOperands(" << Name
+ << ", TII, TRI, RBI);\n";
}
};
@@ -1123,14 +1175,16 @@ std::string RuleMatcher::defineInsnVar(raw_ostream &OS,
return InsnVarName;
}
-StringRef RuleMatcher::getInsnVarName(const InstructionMatcher &InsnMatcher) const {
+StringRef
+RuleMatcher::getInsnVarName(const InstructionMatcher &InsnMatcher) const {
const auto &I = InsnVariableNames.find(&InsnMatcher);
if (I != InsnVariableNames.end())
return I->second;
llvm_unreachable("Matched Insn was not captured in a local variable");
}
-/// Emit a C++ initializer_list containing references to every matched instruction.
+/// Emit a C++ initializer_list containing references to every matched
+/// instruction.
void RuleMatcher::emitCxxCapturedInsnList(raw_ostream &OS) {
SmallVector<StringRef, 2> Names;
for (const auto &Pair : InsnVariableNames)
@@ -1292,6 +1346,7 @@ private:
const RecordKeeper &RK;
const CodeGenDAGPatterns CGP;
const CodeGenTarget &Target;
+ CodeGenRegBank CGRegs;
/// Keep track of the equivalence between SDNodes and Instruction.
/// This is defined using 'GINodeEquiv' in the target description.
@@ -1315,9 +1370,9 @@ private:
Error importChildMatcher(InstructionMatcher &InsnMatcher,
const TreePatternNode *SrcChild, unsigned OpIdx,
unsigned &TempOpIdx) const;
- Expected<BuildMIAction &> createAndImportInstructionRenderer(
- RuleMatcher &M, const TreePatternNode *Dst,
- const InstructionMatcher &InsnMatcher) const;
+ Expected<BuildMIAction &>
+ createAndImportInstructionRenderer(RuleMatcher &M, const TreePatternNode *Dst,
+ const InstructionMatcher &InsnMatcher);
Error importExplicitUseRenderer(BuildMIAction &DstMIBuilder,
TreePatternNode *DstChild,
const InstructionMatcher &InsnMatcher) const;
@@ -1354,7 +1409,7 @@ const CodeGenInstruction *GlobalISelEmitter::findNodeEquiv(Record *N) const {
}
GlobalISelEmitter::GlobalISelEmitter(RecordKeeper &RK)
- : RK(RK), CGP(RK), Target(CGP.getTargetInfo()) {}
+ : RK(RK), CGP(RK), Target(CGP.getTargetInfo()), CGRegs(RK) {}
//===- Emitter ------------------------------------------------------------===//
@@ -1382,7 +1437,8 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
InsnMatcher.addPredicate<InstructionOpcodeMatcher>(
&Target.getInstruction(RK.getDef("G_CONSTANT")));
} else
- return failedImport("Unable to deduce gMIR opcode to handle Src (which is a leaf)");
+ return failedImport(
+ "Unable to deduce gMIR opcode to handle Src (which is a leaf)");
} else {
auto SrcGIOrNull = findNodeEquiv(Src->getOperator());
if (!SrcGIOrNull)
@@ -1415,7 +1471,8 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
OperandMatcher &OM = InsnMatcher.addOperand(OpIdx++, "", TempOpIdx);
OM.addPredicate<LiteralIntOperandMatcher>(SrcIntInit->getValue());
} else
- return failedImport("Unable to deduce gMIR opcode to handle Src (which is a leaf)");
+ return failedImport(
+ "Unable to deduce gMIR opcode to handle Src (which is a leaf)");
} else {
// Match the used operands (i.e. the children of the operator).
for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) {
@@ -1585,7 +1642,7 @@ Error GlobalISelEmitter::importExplicitUseRenderer(
Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
RuleMatcher &M, const TreePatternNode *Dst,
- const InstructionMatcher &InsnMatcher) const {
+ const InstructionMatcher &InsnMatcher) {
Record *DstOp = Dst->getOperator();
if (!DstOp->isSubClassOf("Instruction")) {
if (DstOp->isSubClassOf("ValueType"))
@@ -1597,13 +1654,17 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
unsigned DstINumUses = DstI->Operands.size() - DstI->Operands.NumDefs;
unsigned ExpectedDstINumUses = Dst->getNumChildren();
+ bool IsExtractSubReg = false;
// COPY_TO_REGCLASS is just a copy with a ConstrainOperandToRegClassAction
- // attached.
+ // attached. Similarly for EXTRACT_SUBREG except that's a subregister copy.
if (DstI->TheDef->getName() == "COPY_TO_REGCLASS") {
DstI = &Target.getInstruction(RK.getDef("COPY"));
DstINumUses--; // Ignore the class constraint.
ExpectedDstINumUses--;
+ } else if (DstI->TheDef->getName() == "EXTRACT_SUBREG") {
+ DstI = &Target.getInstruction(RK.getDef("COPY"));
+ IsExtractSubReg = true;
}
auto &DstMIBuilder = M.addAction<BuildMIAction>("NewI", DstI, InsnMatcher);
@@ -1614,6 +1675,33 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
DstMIBuilder.addRenderer<CopyRenderer>(InsnMatcher, DstIOperand.Name);
}
+ // EXTRACT_SUBREG needs to use a subregister COPY.
+ if (IsExtractSubReg) {
+ if (!Dst->getChild(0)->isLeaf())
+ return failedImport("EXTRACT_SUBREG child #1 is not a leaf");
+
+ if (DefInit *SubRegInit =
+ dyn_cast<DefInit>(Dst->getChild(1)->getLeafValue())) {
+ CodeGenRegisterClass *RC = CGRegs.getRegClass(
+ getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()));
+ CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef());
+
+ const auto &SrcRCDstRCPair =
+ RC->getMatchingSubClassWithSubRegs(CGRegs, SubIdx);
+ if (SrcRCDstRCPair.hasValue()) {
+ assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass");
+ if (SrcRCDstRCPair->first != RC)
+ return failedImport("EXTRACT_SUBREG requires an additional COPY");
+ }
+
+ DstMIBuilder.addRenderer<CopySubRegRenderer>(
+ InsnMatcher, Dst->getChild(0)->getName(), SubIdx);
+ return DstMIBuilder;
+ }
+
+ return failedImport("EXTRACT_SUBREG child #1 is not a subreg index");
+ }
+
// Render the explicit uses.
unsigned Child = 0;
unsigned NumDefaultOps = 0;
@@ -1740,10 +1828,22 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
if (DstIOpRec == nullptr)
return failedImport(
"COPY_TO_REGCLASS operand #1 isn't a register class");
+ } else if (DstI.TheDef->getName() == "EXTRACT_SUBREG") {
+ if (!Dst->getChild(0)->isLeaf())
+ return failedImport("EXTRACT_SUBREG operand #0 isn't a leaf");
+
+ // We can assume that a subregister is in the same bank as it's super
+ // register.
+ DstIOpRec = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue());
+
+ if (DstIOpRec == nullptr)
+ return failedImport(
+ "EXTRACT_SUBREG operand #0 isn't a register class");
} else if (DstIOpRec->isSubClassOf("RegisterOperand"))
DstIOpRec = DstIOpRec->getValueAsDef("RegClass");
else if (!DstIOpRec->isSubClassOf("RegisterClass"))
- return failedImport("Dst MI def isn't a register class" + to_string(*Dst));
+ return failedImport("Dst MI def isn't a register class" +
+ to_string(*Dst));
OperandMatcher &OM = InsnMatcher.getOperand(OpIdx);
OM.setSymbolicName(DstIOperand.Name);
@@ -1776,6 +1876,52 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
M.addAction<ConstrainOperandToRegClassAction>(
"NewI", 0, Target.getRegisterClass(DstIOpRec));
+
+ // We're done with this pattern! It's eligible for GISel emission; return
+ // it.
+ ++NumPatternImported;
+ return std::move(M);
+ }
+
+ if (DstI.TheDef->getName() == "EXTRACT_SUBREG") {
+ // EXTRACT_SUBREG selects into a subregister COPY but unlike most
+ // instructions, the result register class is controlled by the
+ // subregisters of the operand. As a result, we must constrain the result
+ // class rather than check that it's already the right one.
+ if (!Dst->getChild(0)->isLeaf())
+ return failedImport("EXTRACT_SUBREG child #1 is not a leaf");
+
+ DefInit *SubRegInit = dyn_cast<DefInit>(Dst->getChild(1)->getLeafValue());
+ if (!SubRegInit)
+ return failedImport("EXTRACT_SUBREG child #1 is not a subreg index");
+
+ // Constrain the result to the same register bank as the operand.
+ Record *DstIOpRec =
+ getInitValueAsRegClass(Dst->getChild(0)->getLeafValue());
+
+ if (DstIOpRec == nullptr)
+ return failedImport("EXTRACT_SUBREG operand #1 isn't a register class");
+
+ CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef());
+ CodeGenRegisterClass *SrcRC = CGRegs.getRegClass(
+ getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()));
+
+ // It would be nice to leave this constraint implicit but we're required
+ // to pick a register class so constrain the result to a register class
+ // that can hold the correct MVT.
+ //
+ // FIXME: This may introduce an extra copy if the chosen class doesn't
+ // actually contain the subregisters.
+ assert(Src->getExtTypes().size() == 1 &&
+ "Expected Src of EXTRACT_SUBREG to have one result type");
+
+ const auto &SrcRCDstRCPair =
+ SrcRC->getMatchingSubClassWithSubRegs(CGRegs, SubIdx);
+ assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass");
+ M.addAction<ConstrainOperandToRegClassAction>("NewI", 0,
+ *SrcRCDstRCPair->second);
+ M.addAction<ConstrainOperandToRegClassAction>("NewI", 1,
+ *SrcRCDstRCPair->first);
} else
M.addAction<ConstrainOperandsToDefinitionAction>("NewI");
@@ -1874,8 +2020,10 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
<< "InstructionSelector::selectImpl(MachineInstr &I) const {\n"
<< " MachineFunction &MF = *I.getParent()->getParent();\n"
<< " const MachineRegisterInfo &MRI = MF.getRegInfo();\n"
- << " // FIXME: This should be computed on a per-function basis rather than per-insn.\n"
- << " AvailableFunctionFeatures = computeAvailableFunctionFeatures(&STI, &MF);\n"
+ << " // FIXME: This should be computed on a per-function basis rather "
+ "than per-insn.\n"
+ << " AvailableFunctionFeatures = computeAvailableFunctionFeatures(&STI, "
+ "&MF);\n"
<< " const PredicateBitset AvailableFeatures = getAvailableFeatures();\n";
for (auto &Rule : Rules) {
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 12cfb93a0c4f..bebb1a183fc7 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -1195,7 +1195,8 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
OS << "\" };\n\n";
// Emit SubRegIndex lane masks, including 0.
- OS << "\nstatic const LaneBitmask SubRegIndexLaneMaskTable[] = {\n LaneBitmask::getAll(),\n";
+ OS << "\nstatic const LaneBitmask SubRegIndexLaneMaskTable[] = {\n "
+ "LaneBitmask::getAll(),\n";
for (const auto &Idx : SubRegIndices) {
printMask(OS << " ", Idx.LaneMask);
OS << ", // " << Idx.getName() << '\n';
@@ -1234,7 +1235,8 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
BitVector MaskBV(RegisterClasses.size());
for (const auto &RC : RegisterClasses) {
- OS << "static const uint32_t " << RC.getName() << "SubClassMask[] = {\n ";
+ OS << "static const uint32_t " << RC.getName()
+ << "SubClassMask[] = {\n ";
printBitVectorAsHex(OS, RC.getSubClasses(), 32);
// Emit super-reg class masks for any relevant SubRegIndices that can
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 7e9f552eccc0..16d5740b79a3 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -805,6 +805,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
return;
std::vector<MCSchedClassDesc> &SCTab = SchedTables.ProcSchedClasses.back();
+ DEBUG(dbgs() << "\n+++ SCHED CLASSES (GenSchedClassTables) +++\n");
for (const CodeGenSchedClass &SC : SchedModels.schedClasses()) {
DEBUG(SC.dump(&SchedModels));
diff --git a/utils/docker/README b/utils/docker/README
new file mode 100644
index 000000000000..be08dfa4c505
--- /dev/null
+++ b/utils/docker/README
@@ -0,0 +1 @@
+See llvm/docs/Docker.rst for details
diff --git a/utils/docker/build_docker_image.sh b/utils/docker/build_docker_image.sh
new file mode 100755
index 000000000000..2ec07ab6da4b
--- /dev/null
+++ b/utils/docker/build_docker_image.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+#===- llvm/utils/docker/build_docker_image.sh ----------------------------===//
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===//
+set -e
+
+IMAGE_SOURCE=""
+DOCKER_REPOSITORY=""
+DOCKER_TAG=""
+BUILDSCRIPT_ARGS=""
+
+function show_usage() {
+ usage=$(cat << EOF
+Usage: build_docker_image.sh [options] [-- [buildscript_args]...]
+
+Available options:
+ -s|--source image source dir (i.e. debian8, nvidia-cuda, etc)
+ -d|--docker-repository docker repository for the image
+ -t|--docker-tag docker tag for the image
+Required options: --source and --docker-repository.
+
+All options after '--' are passed to buildscript (see
+scripts/build_install_llvm.sh).
+
+For example, running:
+$ build_docker_image.sh -s debian8 -d mydocker/debian8-clang -t latest \
+ -- -p clang -i install-clang -i install-clang-headers
+will produce two docker images:
+ mydocker/debian8-clang-build:latest - an intermediate image used to compile
+ clang.
+ mydocker/clang-debian8:latest - a small image with preinstalled clang.
+Please note that this example produces a not very useful installation, since it
+doesn't override CMake defaults, which produces a Debug and non-boostrapped
+version of clang.
+For an example of a somewhat more useful build, see build_clang_image.sh.
+EOF
+)
+ echo "$usage"
+}
+
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ -h|--help)
+ show_usage
+ exit 0
+ ;;
+ -s|--source)
+ shift
+ IMAGE_SOURCE="$1"
+ shift
+ ;;
+ -d|--docker-repository)
+ shift
+ DOCKER_REPOSITORY="$1"
+ shift
+ ;;
+ -t|--docker-tag)
+ shift
+ DOCKER_TAG="$1"
+ shift
+ ;;
+ --)
+ shift
+ BUILDSCRIPT_ARGS="$*"
+ shift $#
+ ;;
+ *)
+ echo "Unknown argument $1"
+ exit 1
+ ;;
+ esac
+done
+
+command -v docker >/dev/null ||
+ {
+ echo "Docker binary cannot be found. Please install Docker to use this script."
+ exit 1
+ }
+
+if [ "$IMAGE_SOURCE" == "" ]; then
+ echo "Required argument missing: --source"
+ exit 1
+fi
+
+if [ "$DOCKER_REPOSITORY" == "" ]; then
+ echo "Required argument missing: --docker-repository"
+ exit 1
+fi
+
+cd $(dirname $0)
+if [ ! -d $IMAGE_SOURCE ]; then
+ echo "No sources for '$IMAGE_SOURCE' were found in $PWD"
+ exit 1
+fi
+
+echo "Building from $IMAGE_SOURCE"
+
+if [ "$DOCKER_TAG" != "" ]; then
+ DOCKER_TAG=":$DOCKER_TAG"
+fi
+
+echo "Building $DOCKER_REPOSITORY-build$DOCKER_TAG"
+docker build -t "$DOCKER_REPOSITORY-build$DOCKER_TAG" \
+ --build-arg "buildscript_args=$BUILDSCRIPT_ARGS" \
+ -f "$IMAGE_SOURCE/build/Dockerfile" .
+
+echo "Copying clang installation to release image sources"
+docker run -v "$PWD/$IMAGE_SOURCE:/workspace" "$DOCKER_REPOSITORY-build$DOCKER_TAG" \
+ cp /tmp/clang.tar.gz /workspace/release
+trap "rm -f $PWD/$IMAGE_SOURCE/release/clang.tar.gz" EXIT
+
+echo "Building release image"
+docker build -t "${DOCKER_REPOSITORY}${DOCKER_TAG}" \
+ "$IMAGE_SOURCE/release"
+
+echo "Done"
diff --git a/utils/docker/debian8/build/Dockerfile b/utils/docker/debian8/build/Dockerfile
new file mode 100644
index 000000000000..13a11a73be6c
--- /dev/null
+++ b/utils/docker/debian8/build/Dockerfile
@@ -0,0 +1,35 @@
+#===- llvm/utils/docker/debian8/build/Dockerfile -------------------------===//
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===//
+# Produces an image that compiles and archives clang, based on debian8.
+FROM launcher.gcr.io/google/debian8:latest
+
+LABEL maintainer "LLVM Developers"
+
+# Install build dependencies of llvm.
+# First, Update the apt's source list and include the sources of the packages.
+RUN grep deb /etc/apt/sources.list | \
+ sed 's/^deb/deb-src /g' >> /etc/apt/sources.list
+
+# Install compiler, python and subversion.
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends build-essential python2.7 wget \
+ subversion ninja-build && \
+ rm -rf /var/lib/apt/lists/*
+
+# Install cmake version that can compile clang into /usr/local.
+# (Version in debian8 repos is is too old)
+RUN wget -O - "https://cmake.org/files/v3.7/cmake-3.7.2-Linux-x86_64.tar.gz" | \
+ tar xzf - -C /usr/local --strip-components=1
+
+# Arguments passed to build_install_clang.sh.
+ARG buildscript_args
+
+# Run the build. Results of the build will be available as /tmp/clang.tar.gz.
+ADD scripts/build_install_llvm.sh /tmp
+RUN /tmp/build_install_llvm.sh ${buildscript_args}
diff --git a/utils/docker/debian8/release/Dockerfile b/utils/docker/debian8/release/Dockerfile
new file mode 100644
index 000000000000..d0214b9c67af
--- /dev/null
+++ b/utils/docker/debian8/release/Dockerfile
@@ -0,0 +1,21 @@
+#===- llvm/utils/docker/debian8/release/Dockerfile -----------------------===//
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===//
+# A release image, containing clang installation, produced by the 'build/' image
+# and adding libstdc++ and binutils.
+FROM launcher.gcr.io/google/debian8:latest
+
+LABEL maintainer "LLVM Developers"
+
+# Install packages for minimal usefull image.
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends libstdc++-4.9-dev binutils && \
+ rm -rf /var/lib/apt/lists/*
+
+# Unpack clang installation into this image.
+ADD clang.tar.gz /
diff --git a/utils/docker/example/build/Dockerfile b/utils/docker/example/build/Dockerfile
new file mode 100644
index 000000000000..597ccfeb4f23
--- /dev/null
+++ b/utils/docker/example/build/Dockerfile
@@ -0,0 +1,26 @@
+#===- llvm/utils/docker/example/build/Dockerfile -------------------------===//
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===//
+# This is an example Dockerfile to build an image that compiles clang.
+# Replace FIXMEs to prepare your own image.
+
+# FIXME: Replace 'ubuntu' with your base image
+FROM ubuntu
+
+# FIXME: Change maintainer name
+LABEL maintainer "Maintainer <maintainer@email>"
+
+# FIXME: Install llvm/clang build dependencies. Including compiler to
+# build stage1, cmake, subversion, ninja, etc.
+
+# Arguments to pass to build_install_clang.sh.
+ARG buildscript_args
+
+# Run the build. Results of the build will be available as /tmp/clang.tar.gz.
+ADD scripts/build_install_llvm.sh /tmp
+RUN /tmp/build_install_llvm.sh ${buildscript_args}
diff --git a/utils/docker/example/release/Dockerfile b/utils/docker/example/release/Dockerfile
new file mode 100644
index 000000000000..953d81fc9951
--- /dev/null
+++ b/utils/docker/example/release/Dockerfile
@@ -0,0 +1,24 @@
+#===- llvm/utils/docker/example/release/Dockerfile -----------------------===//
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===//
+# An image that unpacks a clang installation, compiled by the 'build/'
+# container.
+# Replace FIXMEs to prepare your own image.
+
+# FIXME: Replace 'ubuntu' with your base image.
+FROM ubuntu
+
+# FIXME: Change maintainer name.
+LABEL maintainer "Maintainer <maintainer@email>"
+
+# FIXME: Install all packages you want to have in your release container.
+# A minimal usefull installation must include libstdc++ and binutils.
+
+# Unpack clang installation into this container.
+# It is copied to this directory by build_docker_image.sh script.
+ADD clang.tar.gz /
diff --git a/utils/docker/nvidia-cuda/build/Dockerfile b/utils/docker/nvidia-cuda/build/Dockerfile
new file mode 100644
index 000000000000..619b80cbb61a
--- /dev/null
+++ b/utils/docker/nvidia-cuda/build/Dockerfile
@@ -0,0 +1,25 @@
+#===- llvm/utils/docker/nvidia-cuda/build/Dockerfile ---------------------===//
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===//
+# Produces an image that compiles and archives clang, based on nvidia/cuda
+# image.
+FROM nvidia/cuda:8.0-devel
+
+LABEL maintainer "LLVM Developers"
+
+# Arguments to pass to build_install_clang.sh.
+ARG buildscript_args
+
+# Install llvm build dependencies.
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends cmake python2.7 subversion ninja-build && \
+ rm -rf /var/lib/apt/lists/*
+
+# Run the build. Results of the build will be available as /tmp/clang.tar.gz.
+ADD scripts/build_install_llvm.sh /tmp
+RUN /tmp/build_install_llvm.sh ${buildscript_args}
diff --git a/utils/docker/nvidia-cuda/release/Dockerfile b/utils/docker/nvidia-cuda/release/Dockerfile
new file mode 100644
index 000000000000..b9bcae159780
--- /dev/null
+++ b/utils/docker/nvidia-cuda/release/Dockerfile
@@ -0,0 +1,23 @@
+#===- llvm/utils/docker/nvidia-cuda/release/Dockerfile -------------------===//
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===//
+# This is an example Dockerfile that copies a clang installation, compiled
+# by the 'build/' container into a fresh docker image to get a container of
+# minimal size.
+# Replace FIXMEs to prepare a new Dockerfile.
+
+# FIXME: Replace 'ubuntu' with your base image.
+FROM nvidia/cuda:8.0-devel
+
+# FIXME: Change maintainer name.
+LABEL maintainer "LLVM Developers"
+
+# Unpack clang installation into this container.
+ADD clang.tar.gz /
+
+# C++ standard library and binutils are already included in the base package.
diff --git a/utils/docker/scripts/build_install_llvm.sh b/utils/docker/scripts/build_install_llvm.sh
new file mode 100755
index 000000000000..7e0e90657416
--- /dev/null
+++ b/utils/docker/scripts/build_install_llvm.sh
@@ -0,0 +1,169 @@
+#!/usr/bin/env bash
+#===- llvm/utils/docker/scripts/build_install_llvm.sh ---------------------===//
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===-----------------------------------------------------------------------===//
+
+set -e
+
+function show_usage() {
+ usage=$(cat << EOF
+Usage: build_install_llvm.sh [options] -- [cmake-args]
+
+Checkout svn sources and run cmake with the specified arguments. Used
+inside docker container.
+Passes additional -DCMAKE_INSTALL_PREFIX and archives the contents of
+the directory to /tmp/clang.tar.gz.
+
+Available options:
+ -h|--help show this help message
+ -b|--branch svn branch to checkout, i.e. 'trunk',
+ 'branches/release_40'
+ (default: 'trunk')
+ -r|--revision svn revision to checkout
+ -p|--llvm-project name of an svn project to checkout. Will also add the
+ project to a list LLVM_ENABLE_PROJECTS, passed to CMake.
+ For clang, please use 'clang', not 'cfe'.
+ Project 'llvm' is always included and ignored, if
+ specified.
+ Can be specified multiple times.
+ -i|--install-target name of a cmake install target to build and include in
+ the resulting archive. Can be specified multiple times.
+Required options: At least one --install-target.
+
+All options after '--' are passed to CMake invocation.
+EOF
+)
+ echo "$usage"
+}
+
+LLVM_SVN_REV=""
+LLVM_BRANCH=""
+CMAKE_ARGS=""
+CMAKE_INSTALL_TARGETS=""
+# We always checkout llvm
+LLVM_PROJECTS="llvm"
+CMAKE_LLVM_ENABLE_PROJECTS=""
+
+function contains_project() {
+ local TARGET_PROJ="$1"
+ local PROJ
+ for PROJ in $LLVM_PROJECTS; do
+ if [ "$PROJ" == "$TARGET_PROJ" ]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ -r|--revision)
+ shift
+ LLVM_SVN_REV="$1"
+ ;;
+ -b|--branch)
+ shift
+ LLVM_BRANCH="$1"
+ shift
+ ;;
+ -p|--llvm-project)
+ shift
+ PROJ="$1"
+ if [ "$PROJ" == "cfe" ]; then
+ PROJ="clang"
+ fi
+ if ! contains_project "$PROJ" ; then
+ LLVM_PROJECTS="$LLVM_PROJECTS $PROJ"
+ CMAKE_LLVM_ENABLE_PROJECTS="$CMAKE_LLVM_ENABLED_PROJECTS;$PROJ"
+ else
+ echo "Project '$PROJ' is already enabled, ignoring extra occurences."
+ fi
+ shift
+ ;;
+ -i|--install-target)
+ shift
+ CMAKE_INSTALL_TARGETS="$CMAKE_INSTALL_TARGETS $1"
+ shift
+ ;;
+ --)
+ shift
+ CMAKE_ARGS="$*"
+ shift $#
+ ;;
+ -h|--help)
+ show_usage
+ exit 0
+ ;;
+ *)
+ echo "Unknown option: $1"
+ exit 1
+ esac
+done
+
+if [ "$CMAKE_INSTALL_TARGETS" == "" ]; then
+ echo "No install targets. Please pass one or more --install-target."
+ exit 1
+fi
+
+if [ "$LLVM_BRANCH" == "" ]; then
+ LLVM_BRANCH="trunk"
+fi
+
+if [ "$LLVM_SVN_REVISION" != "" ]; then
+ SVN_REV_ARG="-r$LLVM_SVN_REVISION"
+else
+ SVN_REV_ARG=""
+fi
+
+CLANG_BUILD_DIR=/tmp/clang-build
+CLANG_INSTALL_DIR=/tmp/clang-install
+
+mkdir "$CLANG_BUILD_DIR"
+
+# Get the sources from svn.
+echo "Checking out sources from svn"
+mkdir "$CLANG_BUILD_DIR/src"
+for LLVM_PROJECT in $LLVM_PROJECTS; do
+ if [ "$LLVM_PROJECT" == "clang" ]; then
+ SVN_PROJECT="cfe"
+ else
+ SVN_PROJECT="$LLVM_PROJECT"
+ fi
+
+ echo "Checking out http://llvm.org/svn/llvm-project/$SVN_PROJECT to $CLANG_BUILD_DIR/src/$LLVM_PROJECT"
+ # FIXME: --trust-server-cert is required to workaround 'SSL issuer is not
+ # trusted' error. Using https seems preferable to http either way,
+ # albeit this is not secure.
+ svn co -q $SVN_REV_ARG --trust-server-cert \
+ "https://llvm.org/svn/llvm-project/$SVN_PROJECT/$LLVM_BRANCH" \
+ "$CLANG_BUILD_DIR/src/$LLVM_PROJECT"
+done
+
+pushd "$CLANG_BUILD_DIR"
+
+# Run the build as specified in the build arguments.
+echo "Running build"
+mkdir "$CLANG_BUILD_DIR/build"
+cmake -GNinja \
+ -DCMAKE_INSTALL_PREFIX="$CLANG_INSTALL_DIR" \
+ -DLLVM_ENABLE_PROJECTS="$CMAKE_LLVM_ENABLE_PROJECTS" \
+ $CMAKE_ARGS \
+ "$CLANG_BUILD_DIR/src/llvm"
+ninja $CMAKE_INSTALL_TARGETS
+
+popd
+
+# Pack the installed clang into an archive.
+echo "Archiving clang installation to /tmp/clang.tar.gz"
+cd "$CLANG_INSTALL_DIR"
+tar -czf /tmp/clang.tar.gz *
+
+# Cleanup.
+rm -rf "$CLANG_BUILD_DIR" "$CLANG_INSTALL_DIR"
+
+echo "Done"
diff --git a/utils/lit/lit/formats/__init__.py b/utils/lit/lit/formats/__init__.py
index 3ff46e93ead2..7d14ca4b535a 100644
--- a/utils/lit/lit/formats/__init__.py
+++ b/utils/lit/lit/formats/__init__.py
@@ -1,8 +1,3 @@
-from lit.formats.base import ( # noqa: F401
- TestFormat,
- FileBasedTest,
- OneCommandPerFileTest
-)
-
+from lit.formats.base import TestFormat # noqa: F401
from lit.formats.googletest import GoogleTest # noqa: F401
from lit.formats.shtest import ShTest # noqa: F401
diff --git a/utils/lit/lit/formats/base.py b/utils/lit/lit/formats/base.py
index 6721d17e334e..baa9ff1d3b7d 100644
--- a/utils/lit/lit/formats/base.py
+++ b/utils/lit/lit/formats/base.py
@@ -1,117 +1,50 @@
-from __future__ import absolute_import
-import os
-
-import lit.Test
-import lit.util
+import abc
class TestFormat(object):
- pass
-
-###
-
-class FileBasedTest(TestFormat):
- def getTestsInDirectory(self, testSuite, path_in_suite,
- litConfig, localConfig):
- source_path = testSuite.getSourcePath(path_in_suite)
- for filename in os.listdir(source_path):
- # Ignore dot files and excluded tests.
- if (filename.startswith('.') or
- filename in localConfig.excludes):
- continue
-
- filepath = os.path.join(source_path, filename)
- if not os.path.isdir(filepath):
- base,ext = os.path.splitext(filename)
- if ext in localConfig.suffixes:
- yield lit.Test.Test(testSuite, path_in_suite + (filename,),
- localConfig)
-
-###
-
-import re
-import tempfile
-
-class OneCommandPerFileTest(TestFormat):
- # FIXME: Refactor into generic test for running some command on a directory
- # of inputs.
-
- def __init__(self, command, dir, recursive=False,
- pattern=".*", useTempInput=False):
- if isinstance(command, str):
- self.command = [command]
- else:
- self.command = list(command)
- if dir is not None:
- dir = str(dir)
- self.dir = dir
- self.recursive = bool(recursive)
- self.pattern = re.compile(pattern)
- self.useTempInput = useTempInput
-
- def getTestsInDirectory(self, testSuite, path_in_suite,
- litConfig, localConfig):
- dir = self.dir
- if dir is None:
- dir = testSuite.getSourcePath(path_in_suite)
-
- for dirname,subdirs,filenames in os.walk(dir):
- if not self.recursive:
- subdirs[:] = []
-
- subdirs[:] = [d for d in subdirs
- if (d != '.svn' and
- d not in localConfig.excludes)]
-
- for filename in filenames:
- if (filename.startswith('.') or
- not self.pattern.match(filename) or
- filename in localConfig.excludes):
- continue
-
- path = os.path.join(dirname,filename)
- suffix = path[len(dir):]
- if suffix.startswith(os.sep):
- suffix = suffix[1:]
- test = lit.Test.Test(
- testSuite, path_in_suite + tuple(suffix.split(os.sep)),
- localConfig)
- # FIXME: Hack?
- test.source_path = path
- yield test
-
- def createTempInput(self, tmp, test):
- raise NotImplementedError('This is an abstract method.')
-
+ """Base class for test formats.
+
+ A TestFormat encapsulates logic for finding and executing a certain type of
+ test. For example, a subclass FooTestFormat would contain the logic for
+ finding tests written in the 'Foo' format, and the logic for running a
+ single one.
+
+ TestFormat is an Abstract Base Class (ABC). It uses the Python abc.ABCMeta
+ type and associated @abc.abstractmethod decorator. Together, these provide
+ subclass behaviour which is notionally similar to C++ pure virtual classes:
+ only subclasses which implement all abstract methods can be instantiated
+ (the implementation may come from an intermediate base).
+
+ For details on ABCs, see: https://docs.python.org/2/library/abc.html. Note
+ that Python ABCs have extensive abilities beyond what is used here. For
+ TestFormat, we only care about enforcing that abstract methods are
+ implemented.
+ """
+
+ __metaclass__ = abc.ABCMeta
+
+ @abc.abstractmethod
+ def getTestsInDirectory(self, testSuite, path_in_suite, litConfig,
+ localConfig):
+ """Finds tests of this format in the given directory.
+
+ Args:
+ testSuite: a Test.TestSuite object.
+ path_in_suite: the subpath under testSuite to look for tests.
+ litConfig: the LitConfig for the test suite.
+ localConfig: a LitConfig with local specializations.
+
+ Returns:
+ An iterable of Test.Test objects.
+ """
+
+ @abc.abstractmethod
def execute(self, test, litConfig):
- if test.config.unsupported:
- return (lit.Test.UNSUPPORTED, 'Test is unsupported')
-
- cmd = list(self.command)
-
- # If using temp input, create a temporary file and hand it to the
- # subclass.
- if self.useTempInput:
- tmp = tempfile.NamedTemporaryFile(suffix='.cpp')
- self.createTempInput(tmp, test)
- tmp.flush()
- cmd.append(tmp.name)
- elif hasattr(test, 'source_path'):
- cmd.append(test.source_path)
- else:
- cmd.append(test.getSourcePath())
-
- out, err, exitCode = lit.util.executeCommand(cmd)
-
- diags = out + err
- if not exitCode and not diags.strip():
- return lit.Test.PASS,''
+ """Runs the given 'test', which is of this format.
- # Try to include some useful information.
- report = """Command: %s\n""" % ' '.join(["'%s'" % a
- for a in cmd])
- if self.useTempInput:
- report += """Temporary File: %s\n""" % tmp.name
- report += "--\n%s--\n""" % open(tmp.name).read()
- report += """Output:\n--\n%s--""" % diags
+ Args:
+ test: a Test.Test object describing the test to run.
+ litConfig: the LitConfig for the test suite.
- return lit.Test.FAIL, report
+ Returns:
+ A tuple of (status:Test.ResultCode, message:str)
+ """
diff --git a/utils/lit/lit/formats/googletest.py b/utils/lit/lit/formats/googletest.py
index 29a92c4e960b..b683f7c7db8e 100644
--- a/utils/lit/lit/formats/googletest.py
+++ b/utils/lit/lit/formats/googletest.py
@@ -11,8 +11,8 @@ from .base import TestFormat
kIsWindows = sys.platform in ['win32', 'cygwin']
class GoogleTest(TestFormat):
- def __init__(self, test_sub_dir, test_suffix):
- self.test_sub_dir = os.path.normcase(str(test_sub_dir)).split(';')
+ def __init__(self, test_sub_dirs, test_suffix):
+ self.test_sub_dirs = os.path.normcase(str(test_sub_dirs)).split(';')
self.test_suffix = str(test_suffix)
# On Windows, assume tests will also end in '.exe'.
@@ -30,19 +30,24 @@ class GoogleTest(TestFormat):
localConfig: TestingConfig instance"""
try:
- lines = lit.util.capture([path, '--gtest_list_tests'],
- env=localConfig.environment)
- if kIsWindows:
- lines = lines.replace('\r', '')
- lines = lines.split('\n')
- except Exception as exc:
- out = exc.output if isinstance(exc, subprocess.CalledProcessError) else ''
- litConfig.warning("unable to discover google-tests in %r: %s. Process output: %s"
- % (path, sys.exc_info()[1], out))
+ output = subprocess.check_output([path, '--gtest_list_tests'],
+ env=localConfig.environment)
+ except subprocess.CalledProcessError as exc:
+ litConfig.warning(
+ "unable to discover google-tests in %r: %s. Process output: %s"
+ % (path, sys.exc_info()[1], exc.output))
raise StopIteration
nested_tests = []
- for ln in lines:
+ for ln in output.splitlines(False): # Don't keep newlines.
+ ln = lit.util.to_string(ln)
+
+ if 'Running main() from gtest_main.cc' in ln:
+ # Upstream googletest prints this to stdout prior to running
+ # tests. LLVM removed that print statement in r61540, but we
+ # handle it here in case upstream googletest is being used.
+ continue
+
# The test name list includes trailing comments beginning with
# a '#' on some lines, so skip those. We don't support test names
# that use escaping to embed '#' into their name as the names come
@@ -52,12 +57,6 @@ class GoogleTest(TestFormat):
if not ln.lstrip():
continue
- if 'Running main() from gtest_main.cc' in ln:
- # Upstream googletest prints this to stdout prior to running
- # tests. LLVM removed that print statement in r61540, but we
- # handle it here in case upstream googletest is being used.
- continue
-
index = 0
while ln[index*2:index*2+2] == ' ':
index += 1
@@ -75,38 +74,19 @@ class GoogleTest(TestFormat):
else:
yield ''.join(nested_tests) + ln
- # Note: path_in_suite should not include the executable name.
- def getTestsInExecutable(self, testSuite, path_in_suite, execpath,
- litConfig, localConfig):
- if not execpath.endswith(self.test_suffix):
- return
- (dirname, basename) = os.path.split(execpath)
- # Discover the tests in this executable.
- for testname in self.getGTestTests(execpath, litConfig, localConfig):
- testPath = path_in_suite + (basename, testname)
- yield lit.Test.Test(testSuite, testPath, localConfig, file_path=execpath)
-
def getTestsInDirectory(self, testSuite, path_in_suite,
litConfig, localConfig):
source_path = testSuite.getSourcePath(path_in_suite)
- for filename in os.listdir(source_path):
- filepath = os.path.join(source_path, filename)
- if os.path.isdir(filepath):
- # Iterate over executables in a directory.
- if not os.path.normcase(filename) in self.test_sub_dir:
- continue
- dirpath_in_suite = path_in_suite + (filename, )
- for subfilename in os.listdir(filepath):
- execpath = os.path.join(filepath, subfilename)
- for test in self.getTestsInExecutable(
- testSuite, dirpath_in_suite, execpath,
- litConfig, localConfig):
- yield test
- elif ('.' in self.test_sub_dir):
- for test in self.getTestsInExecutable(
- testSuite, path_in_suite, filepath,
- litConfig, localConfig):
- yield test
+ for subdir in self.test_sub_dirs:
+ for fn in lit.util.listdir_files(os.path.join(source_path, subdir),
+ suffixes={self.test_suffix}):
+ # Discover the tests in this executable.
+ execpath = os.path.join(source_path, subdir, fn)
+ testnames = self.getGTestTests(execpath, litConfig, localConfig)
+ for testname in testnames:
+ testPath = path_in_suite + (subdir, fn, testname)
+ yield lit.Test.Test(testSuite, testPath, localConfig,
+ file_path=execpath)
def execute(self, test, litConfig):
testPath,testName = os.path.split(test.getSourcePath())
diff --git a/utils/lit/lit/formats/shtest.py b/utils/lit/lit/formats/shtest.py
index 30a6a3310b01..01ecd192092e 100644
--- a/utils/lit/lit/formats/shtest.py
+++ b/utils/lit/lit/formats/shtest.py
@@ -1,12 +1,48 @@
from __future__ import absolute_import
+import os
+
+import lit.Test
import lit.TestRunner
-from .base import FileBasedTest
+import lit.util
+from .base import TestFormat
+
+class ShTest(TestFormat):
+ """ShTest is a format with one file per test.
+
+ This is the primary format for regression tests as described in the LLVM
+ testing guide:
+
+ http://llvm.org/docs/TestingGuide.html
+
+ The ShTest files contain some number of shell-like command pipelines, along
+ with assertions about what should be in the output.
+ """
-class ShTest(FileBasedTest):
def __init__(self, execute_external = False):
+ """Initializer.
+
+ The 'execute_external' argument controls whether lit uses its internal
+ logic for command pipelines, or passes the command to a shell
+ subprocess.
+
+ Args:
+ execute_external: (optional) If true, use shell subprocesses instead
+ of lit's internal pipeline logic.
+ """
self.execute_external = execute_external
+ def getTestsInDirectory(self, testSuite, path_in_suite,
+ litConfig, localConfig):
+ """Yields test files matching 'suffixes' from the localConfig."""
+ file_matches = lit.util.listdir_files(
+ testSuite.getSourcePath(path_in_suite),
+ localConfig.suffixes, localConfig.excludes)
+ for filename in file_matches:
+ yield lit.Test.Test(testSuite, path_in_suite + (filename,),
+ localConfig)
+
def execute(self, test, litConfig):
+ """Interprets and runs the given test file, and returns the result."""
return lit.TestRunner.executeShTest(test, litConfig,
self.execute_external)
diff --git a/utils/lit/lit/run.py b/utils/lit/lit/run.py
index aa4fdc18b877..1290c142c834 100644
--- a/utils/lit/lit/run.py
+++ b/utils/lit/lit/run.py
@@ -24,140 +24,6 @@ def abort_now():
else:
os.kill(0, 9)
-###
-# Test Execution Implementation
-
-class LockedValue(object):
- def __init__(self, value):
- self.lock = threading.Lock()
- self._value = value
-
- def _get_value(self):
- self.lock.acquire()
- try:
- return self._value
- finally:
- self.lock.release()
-
- def _set_value(self, value):
- self.lock.acquire()
- try:
- self._value = value
- finally:
- self.lock.release()
-
- value = property(_get_value, _set_value)
-
-class TestProvider(object):
- def __init__(self, queue_impl, canceled_flag):
- self.canceled_flag = canceled_flag
-
- # Create a shared queue to provide the test indices.
- self.queue = queue_impl()
-
- def queue_tests(self, tests, num_jobs):
- for i in range(len(tests)):
- self.queue.put(i)
- for i in range(num_jobs):
- self.queue.put(None)
-
- def cancel(self):
- self.canceled_flag.value = 1
-
- def get(self):
- # Check if we are canceled.
- if self.canceled_flag.value:
- return None
-
- # Otherwise take the next test.
- return self.queue.get()
-
-class Tester(object):
- def __init__(self, run_instance, provider, consumer):
- self.run_instance = run_instance
- self.provider = provider
- self.consumer = consumer
-
- def run(self):
- while True:
- item = self.provider.get()
- if item is None:
- break
- self.run_test(item)
- self.consumer.task_finished()
-
- def run_test(self, test_index):
- test = self.run_instance.tests[test_index]
- try:
- execute_test(test, self.run_instance.lit_config,
- self.run_instance.parallelism_semaphores)
- except KeyboardInterrupt:
- # This is a sad hack. Unfortunately subprocess goes
- # bonkers with ctrl-c and we start forking merrily.
- print('\nCtrl-C detected, goodbye.')
- abort_now()
- self.consumer.update(test_index, test)
-
-class ThreadResultsConsumer(object):
- def __init__(self, display):
- self.display = display
- self.lock = threading.Lock()
-
- def update(self, test_index, test):
- self.lock.acquire()
- try:
- self.display.update(test)
- finally:
- self.lock.release()
-
- def task_finished(self):
- pass
-
- def handle_results(self):
- pass
-
-class MultiprocessResultsConsumer(object):
- def __init__(self, run, display, num_jobs):
- self.run = run
- self.display = display
- self.num_jobs = num_jobs
- self.queue = multiprocessing.Queue()
-
- def update(self, test_index, test):
- # This method is called in the child processes, and communicates the
- # results to the actual display implementation via an output queue.
- self.queue.put((test_index, test.result))
-
- def task_finished(self):
- # This method is called in the child processes, and communicates that
- # individual tasks are complete.
- self.queue.put(None)
-
- def handle_results(self):
- # This method is called in the parent, and consumes the results from the
- # output queue and dispatches to the actual display. The method will
- # complete after each of num_jobs tasks has signalled completion.
- completed = 0
- while completed != self.num_jobs:
- # Wait for a result item.
- item = self.queue.get()
- if item is None:
- completed += 1
- continue
-
- # Update the test result in the parent process.
- index,result = item
- test = self.run.tests[index]
- test.result = result
-
- self.display.update(test)
-
-def run_one_tester(run, provider, display):
- tester = Tester(run, provider, display)
- tester.run()
-
-###
-
class _Display(object):
def __init__(self, display, provider, maxFailures):
self.display = display
@@ -170,47 +36,6 @@ class _Display(object):
if self.failedCount == self.maxFailures:
self.provider.cancel()
-def handleFailures(provider, consumer, maxFailures):
- consumer.display = _Display(consumer.display, provider, maxFailures)
-
-def execute_test(test, lit_config, parallelism_semaphores):
- """Execute one test"""
- pg = test.config.parallelism_group
- if callable(pg):
- pg = pg(test)
-
- result = None
- semaphore = None
- try:
- if pg:
- semaphore = parallelism_semaphores[pg]
- if semaphore:
- semaphore.acquire()
- start_time = time.time()
- result = test.config.test_format.execute(test, lit_config)
- # Support deprecated result from execute() which returned the result
- # code and additional output as a tuple.
- if isinstance(result, tuple):
- code, output = result
- result = lit.Test.Result(code, output)
- elif not isinstance(result, lit.Test.Result):
- raise ValueError("unexpected result from test execution")
- result.elapsed = time.time() - start_time
- except KeyboardInterrupt:
- raise
- except:
- if lit_config.debug:
- raise
- output = 'Exception during script execution:\n'
- output += traceback.format_exc()
- output += '\n'
- result = lit.Test.Result(lit.Test.UNRESOLVED, output)
- finally:
- if semaphore:
- semaphore.release()
-
- test.setResult(result)
-
class Run(object):
"""
This class represents a concrete, configured testing run.
@@ -221,7 +46,8 @@ class Run(object):
self.tests = tests
def execute_test(self, test):
- return execute_test(test, self.lit_config, self.parallelism_semaphores)
+ return _execute_test_impl(test, self.lit_config,
+ self.parallelism_semaphores)
def execute_tests(self, display, jobs, max_time=None):
"""
@@ -350,6 +176,44 @@ class Run(object):
self.failure_count == self.lit_config.maxFailures:
self.hit_max_failures = True
+def _execute_test_impl(test, lit_config, parallelism_semaphores):
+ """Execute one test"""
+ pg = test.config.parallelism_group
+ if callable(pg):
+ pg = pg(test)
+
+ result = None
+ semaphore = None
+ try:
+ if pg:
+ semaphore = parallelism_semaphores[pg]
+ if semaphore:
+ semaphore.acquire()
+ start_time = time.time()
+ result = test.config.test_format.execute(test, lit_config)
+ # Support deprecated result from execute() which returned the result
+ # code and additional output as a tuple.
+ if isinstance(result, tuple):
+ code, output = result
+ result = lit.Test.Result(code, output)
+ elif not isinstance(result, lit.Test.Result):
+ raise ValueError("unexpected result from test execution")
+ result.elapsed = time.time() - start_time
+ except KeyboardInterrupt:
+ raise
+ except:
+ if lit_config.debug:
+ raise
+ output = 'Exception during script execution:\n'
+ output += traceback.format_exc()
+ output += '\n'
+ result = lit.Test.Result(lit.Test.UNRESOLVED, output)
+ finally:
+ if semaphore:
+ semaphore.release()
+
+ test.setResult(result)
+
child_lit_config = None
child_parallelism_semaphores = None
@@ -375,7 +239,7 @@ def worker_run_one_test(test_index, test):
the display.
"""
try:
- execute_test(test, child_lit_config, child_parallelism_semaphores)
+ _execute_test_impl(test, child_lit_config, child_parallelism_semaphores)
return (test_index, test)
except KeyboardInterrupt as e:
# If a worker process gets an interrupt, abort it immediately.
diff --git a/utils/lit/lit/util.py b/utils/lit/lit/util.py
index 8991588a868d..1819d4d1c34f 100644
--- a/utils/lit/lit/util.py
+++ b/utils/lit/lit/util.py
@@ -8,24 +8,57 @@ import subprocess
import sys
import threading
-def to_bytes(str):
- # Encode to UTF-8 to get binary data.
- if isinstance(str, bytes):
- return str
- return str.encode('utf-8')
-
-def to_string(bytes):
- if isinstance(bytes, str):
- return bytes
- return to_bytes(bytes)
-
-def convert_string(bytes):
+def to_bytes(s):
+ """Return the parameter as type 'bytes', possibly encoding it.
+
+ In Python2, the 'bytes' type is the same as 'str'. In Python3, they are
+ distinct.
+ """
+ if isinstance(s, bytes):
+ # In Python2, this branch is taken for both 'str' and 'bytes'.
+ # In Python3, this branch is taken only for 'bytes'.
+ return s
+ # In Python2, 's' is a 'unicode' object.
+ # In Python3, 's' is a 'str' object.
+ # Encode to UTF-8 to get 'bytes' data.
+ return s.encode('utf-8')
+
+def to_string(b):
+ """Return the parameter as type 'str', possibly encoding it.
+
+ In Python2, the 'str' type is the same as 'bytes'. In Python3, the
+ 'str' type is (essentially) Python2's 'unicode' type, and 'bytes' is
+ distinct.
+ """
+ if isinstance(b, str):
+ # In Python2, this branch is taken for types 'str' and 'bytes'.
+ # In Python3, this branch is taken only for 'str'.
+ return b
+ if isinstance(b, bytes):
+ # In Python2, this branch is never taken ('bytes' is handled as 'str').
+ # In Python3, this is true only for 'bytes'.
+ try:
+ return b.decode('utf-8')
+ except UnicodeDecodeError:
+ # If the value is not valid Unicode, return the default
+ # repr-line encoding.
+ return str(b)
+
+ # By this point, here's what we *don't* have:
+ #
+ # - In Python2:
+ # - 'str' or 'bytes' (1st branch above)
+ # - In Python3:
+ # - 'str' (1st branch above)
+ # - 'bytes' (2nd branch above)
+ #
+ # The last type we might expect is the Python2 'unicode' type. There is no
+ # 'unicode' type in Python3 (all the Python3 cases were already handled). In
+ # order to get a 'str' object, we need to encode the 'unicode' object.
try:
- return to_string(bytes.decode('utf-8'))
- except AttributeError: # 'str' object has no attribute 'decode'.
- return str(bytes)
- except UnicodeError:
- return str(bytes)
+ return b.encode('utf-8')
+ except AttributeError:
+ raise TypeError('not sure how to convert %s to %s' % (type(b), str))
def detectCPUs():
"""
@@ -39,7 +72,8 @@ def detectCPUs():
if isinstance(ncpus, int) and ncpus > 0:
return ncpus
else: # OSX:
- return int(capture(['sysctl', '-n', 'hw.ncpu']))
+ return int(subprocess.check_output(['sysctl', '-n', 'hw.ncpu'],
+ stderr=subprocess.STDOUT))
# Windows:
if "NUMBER_OF_PROCESSORS" in os.environ:
ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
@@ -67,20 +101,44 @@ def mkdir_p(path):
if e.errno != errno.EEXIST:
raise
-def capture(args, env=None):
- """capture(command) - Run the given command (or argv list) in a shell and
- return the standard output. Raises a CalledProcessError if the command
- exits with a non-zero status."""
- p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- env=env)
- out, err = p.communicate()
- out = convert_string(out)
- err = convert_string(err)
- if p.returncode != 0:
- raise subprocess.CalledProcessError(cmd=args,
- returncode=p.returncode,
- output="{}\n{}".format(out, err))
- return out
+def listdir_files(dirname, suffixes=None, exclude_filenames=None):
+ """Yields files in a directory.
+
+ Filenames that are not excluded by rules below are yielded one at a time, as
+ basenames (i.e., without dirname).
+
+ Files starting with '.' are always skipped.
+
+ If 'suffixes' is not None, then only filenames ending with one of its
+ members will be yielded. These can be extensions, like '.exe', or strings,
+ like 'Test'. (It is a lexicographic check; so an empty sequence will yield
+ nothing, but a single empty string will yield all filenames.)
+
+ If 'exclude_filenames' is not None, then none of the file basenames in it
+ will be yielded.
+
+ If specified, the containers for 'suffixes' and 'exclude_filenames' must
+ support membership checking for strs.
+
+ Args:
+ dirname: a directory path.
+ suffixes: (optional) a sequence of strings (set, list, etc.).
+ exclude_filenames: (optional) a sequence of strings.
+
+ Yields:
+ Filenames as returned by os.listdir (generally, str).
+ """
+ if exclude_filenames is None:
+ exclude_filenames = set()
+ if suffixes is None:
+ suffixes = {''}
+ for filename in os.listdir(dirname):
+ if (os.path.isdir(os.path.join(dirname, filename)) or
+ filename.startswith('.') or
+ filename in exclude_filenames or
+ not any(filename.endswith(sfx) for sfx in suffixes)):
+ continue
+ yield filename
def which(command, paths = None):
"""which(command, [paths]) - Look up the given command in the paths string
@@ -233,8 +291,8 @@ def executeCommand(command, cwd=None, env=None, input=None, timeout=0):
timerObject.cancel()
# Ensure the resulting output is always of string type.
- out = convert_string(out)
- err = convert_string(err)
+ out = to_string(out)
+ err = to_string(err)
if hitTimeOut[0]:
raise ExecuteCommandTimeoutException(
diff --git a/utils/opt-viewer/opt-diff.py b/utils/opt-viewer/opt-diff.py
index 2b5d1bbfabbe..9e921f8488d3 100755
--- a/utils/opt-viewer/opt-diff.py
+++ b/utils/opt-viewer/opt-diff.py
@@ -44,20 +44,21 @@ if __name__ == '__main__':
default=cpu_count(),
type=int,
help='Max job count (defaults to %(default)s, the current CPU count)')
+ parser.add_argument(
+ '--no-progress-indicator',
+ '-n',
+ action='store_true',
+ default=False,
+ help='Do not display any indicator of how many YAML files were read.')
parser.add_argument('--output', '-o', default='diff.opt.yaml')
args = parser.parse_args()
- if args.jobs == 1:
- pmap = map
- else:
- pool = Pool(processes=args.jobs)
- pmap = pool.map
-
files1 = find_files(args.yaml_dir_or_file_1)
files2 = find_files(args.yaml_dir_or_file_2)
- all_remarks1, _, _ = optrecord.gather_results(pmap, files1)
- all_remarks2, _, _ = optrecord.gather_results(pmap, files2)
+ print_progress = not args.no_progress_indicator
+ all_remarks1, _, _ = optrecord.gather_results(files1, args.jobs, print_progress)
+ all_remarks2, _, _ = optrecord.gather_results(files2, args.jobs, print_progress)
added = set(all_remarks2.values()) - set(all_remarks1.values())
removed = set(all_remarks1.values()) - set(all_remarks2.values())
@@ -66,5 +67,5 @@ if __name__ == '__main__':
r.Added = True
for r in removed:
r.Added = False
- stream = file(args.output, 'w')
- yaml.dump_all(added | removed, stream)
+ with open(args.output, 'w') as stream:
+ yaml.dump_all(added | removed, stream)
diff --git a/utils/opt-viewer/opt-stats.py b/utils/opt-viewer/opt-stats.py
index 79e5c03eca9f..a7e598fdfd02 100755
--- a/utils/opt-viewer/opt-stats.py
+++ b/utils/opt-viewer/opt-stats.py
@@ -22,15 +22,19 @@ if __name__ == '__main__':
default=cpu_count(),
type=int,
help='Max job count (defaults to %(default)s, the current CPU count)')
+ parser.add_argument(
+ '--no-progress-indicator',
+ '-n',
+ action='store_true',
+ default=False,
+ help='Do not display any indicator of how many YAML files were read.')
args = parser.parse_args()
- if args.jobs == 1:
- pmap = map
- else:
- pool = Pool(processes=args.jobs)
- pmap = pool.map
-
- all_remarks, file_remarks, _ = optrecord.gather_results(pmap, args.yaml_files)
+ print_progress = not args.no_progress_indicator
+ all_remarks, file_remarks, _ = optrecord.gather_results(
+ args.yaml_files, args.jobs, print_progress)
+ if print_progress:
+ print('\n')
bypass = defaultdict(int)
byname = defaultdict(int)
diff --git a/utils/opt-viewer/opt-viewer.py b/utils/opt-viewer/opt-viewer.py
index 3f5503f26b1f..5e5daf7feb0d 100755
--- a/utils/opt-viewer/opt-viewer.py
+++ b/utils/opt-viewer/opt-viewer.py
@@ -2,24 +2,28 @@
from __future__ import print_function
-desc = '''Generate HTML output to visualize optimization records from the YAML files
-generated with -fsave-optimization-record and -fdiagnostics-show-hotness.
-
-The tools requires PyYAML and Pygments Python packages.'''
-
-import optrecord
-import functools
-from multiprocessing import Pool
-from multiprocessing import Lock, cpu_count
-import errno
import argparse
+import cgi
+import errno
+import functools
+from multiprocessing import cpu_count
import os.path
import re
import shutil
+
from pygments import highlight
from pygments.lexers.c_cpp import CppLexer
from pygments.formatters import HtmlFormatter
-import cgi
+
+import optpmap
+import optrecord
+
+
+desc = '''Generate HTML output to visualize optimization records from the YAML files
+generated with -fsave-optimization-record and -fdiagnostics-show-hotness.
+
+The tools requires PyYAML and Pygments Python packages.'''
+
# This allows passing the global context to the child processes.
class Context:
@@ -169,7 +173,7 @@ def _render_file(source_dir, output_dir, ctx, entry):
def map_remarks(all_remarks):
# Set up a map between function names and their source location for
# function where inlining happened
- for remark in all_remarks.itervalues():
+ for remark in optrecord.itervalues(all_remarks):
if isinstance(remark, optrecord.Passed) and remark.Pass == "inline" and remark.Name == "Inlined":
for arg in remark.Args:
caller = arg.get('Caller')
@@ -177,7 +181,13 @@ def map_remarks(all_remarks):
context.caller_loc[caller] = arg['DebugLoc']
-def generate_report(pmap, all_remarks, file_remarks, source_dir, output_dir, should_display_hotness):
+def generate_report(all_remarks,
+ file_remarks,
+ source_dir,
+ output_dir,
+ should_display_hotness,
+ num_jobs,
+ should_print_progress):
try:
os.makedirs(output_dir)
except OSError as e:
@@ -187,12 +197,17 @@ def generate_report(pmap, all_remarks, file_remarks, source_dir, output_dir, sho
raise
_render_file_bound = functools.partial(_render_file, source_dir, output_dir, context)
- pmap(_render_file_bound, file_remarks.items())
+ if should_print_progress:
+ print('Rendering HTML files...')
+ optpmap.pmap(_render_file_bound,
+ file_remarks.items(),
+ num_jobs,
+ should_print_progress)
if should_display_hotness:
- sorted_remarks = sorted(all_remarks.itervalues(), key=lambda r: (r.Hotness, r.File, r.Line, r.Column, r.__dict__), reverse=True)
+ sorted_remarks = sorted(optrecord.itervalues(all_remarks), key=lambda r: (r.Hotness, r.File, r.Line, r.Column, r.PassWithDiffPrefix, r.yaml_tag, r.Function), reverse=True)
else:
- sorted_remarks = sorted(all_remarks.itervalues(), key=lambda r: (r.File, r.Line, r.Column, r.__dict__))
+ sorted_remarks = sorted(optrecord.itervalues(all_remarks), key=lambda r: (r.File, r.Line, r.Column, r.PassWithDiffPrefix, r.yaml_tag, r.Function))
IndexRenderer(args.output_dir).render(sorted_remarks)
shutil.copy(os.path.join(os.path.dirname(os.path.realpath(__file__)),
@@ -202,7 +217,13 @@ def generate_report(pmap, all_remarks, file_remarks, source_dir, output_dir, sho
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('yaml_files', nargs='+')
- parser.add_argument('output_dir')
+ parser.add_argument(
+ '--output-dir',
+ '-o',
+ default='html',
+ help='Path to a directory where generated HTML files will be output. '
+ 'If the directory does not already exist, it will be created. '
+ '"%(default)s" by default.')
parser.add_argument(
'--jobs',
'-j',
@@ -214,16 +235,25 @@ if __name__ == '__main__':
'-s',
default='',
help='set source directory')
+ parser.add_argument(
+ '--no-progress-indicator',
+ '-n',
+ action='store_true',
+ default=False,
+ help='Do not display any indicator of how many YAML files were read '
+ 'or rendered into HTML.')
args = parser.parse_args()
- if args.jobs == 1:
- pmap = map
- else:
- pool = Pool(processes=args.jobs)
- pmap = pool.map
-
- all_remarks, file_remarks, should_display_hotness = optrecord.gather_results(pmap, args.yaml_files)
+ print_progress = not args.no_progress_indicator
+ all_remarks, file_remarks, should_display_hotness = \
+ optrecord.gather_results(args.yaml_files, args.jobs, print_progress)
map_remarks(all_remarks)
- generate_report(pmap, all_remarks, file_remarks, args.source_dir, args.output_dir, should_display_hotness)
+ generate_report(all_remarks,
+ file_remarks,
+ args.source_dir,
+ args.output_dir,
+ should_display_hotness,
+ args.jobs,
+ print_progress)
diff --git a/utils/opt-viewer/optpmap.py b/utils/opt-viewer/optpmap.py
new file mode 100644
index 000000000000..01e848e03976
--- /dev/null
+++ b/utils/opt-viewer/optpmap.py
@@ -0,0 +1,53 @@
+import sys
+import multiprocessing
+
+
+_current = None
+_total = None
+
+
+def _init(current, total):
+ global _current
+ global _total
+ _current = current
+ _total = total
+
+
+def _wrapped_func(func_and_args):
+ func, argument, should_print_progress = func_and_args
+
+ if should_print_progress:
+ with _current.get_lock():
+ _current.value += 1
+ sys.stdout.write('\r\t{} of {}'.format(_current.value, _total.value))
+
+ return func(argument)
+
+
+def pmap(func, iterable, processes, should_print_progress, *args, **kwargs):
+ """
+ A parallel map function that reports on its progress.
+
+ Applies `func` to every item of `iterable` and return a list of the
+ results. If `processes` is greater than one, a process pool is used to run
+ the functions in parallel. `should_print_progress` is a boolean value that
+ indicates whether a string 'N of M' should be printed to indicate how many
+ of the functions have finished being run.
+ """
+ global _current
+ global _total
+ _current = multiprocessing.Value('i', 0)
+ _total = multiprocessing.Value('i', len(iterable))
+
+ func_and_args = [(func, arg, should_print_progress,) for arg in iterable]
+ if processes <= 1:
+ result = map(_wrapped_func, func_and_args, *args, **kwargs)
+ else:
+ pool = multiprocessing.Pool(initializer=_init,
+ initargs=(_current, _total,),
+ processes=processes)
+ result = pool.map(_wrapped_func, func_and_args, *args, **kwargs)
+
+ if should_print_progress:
+ sys.stdout.write('\r')
+ return result
diff --git a/utils/opt-viewer/optrecord.py b/utils/opt-viewer/optrecord.py
index 6dc1a32e536a..61ed9626cffa 100644
--- a/utils/opt-viewer/optrecord.py
+++ b/utils/opt-viewer/optrecord.py
@@ -10,15 +10,14 @@ except ImportError:
print("For faster parsing, you may want to install libYAML for PyYAML")
from yaml import Loader
-import functools
-from collections import defaultdict
-import itertools
-from multiprocessing import Pool
-from multiprocessing import Lock, cpu_count
import cgi
+from collections import defaultdict
+import functools
+from multiprocessing import Lock
import subprocess
-import traceback
+import optpmap
+
p = subprocess.Popen(['c++filt', '-n'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
p_lock = Lock()
@@ -42,8 +41,9 @@ else:
def demangle(name):
with p_lock:
- p.stdin.write(name + '\n')
- return p.stdout.readline().rstrip()
+ p.stdin.write((name + '\n').encode('utf-8'))
+ p.stdin.flush()
+ return p.stdout.readline().rstrip().decode('utf-8')
def html_file_name(filename):
@@ -209,8 +209,11 @@ def get_remarks(input_file):
return max_hotness, all_remarks, file_remarks
-def gather_results(pmap, filenames):
- remarks = pmap(get_remarks, filenames)
+def gather_results(filenames, num_jobs, should_print_progress):
+ if should_print_progress:
+ print('Reading YAML files...')
+ remarks = optpmap.pmap(
+ get_remarks, filenames, num_jobs, should_print_progress)
max_hotness = max(entry[0] for entry in remarks)
def merge_file_remarks(file_remarks_job, all_remarks, merged):