aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-06 20:13:21 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-06 20:13:21 +0000
commit7e7b6700743285c0af506ac6299ddf82ebd434b9 (patch)
tree578d2ea1868b77f3dff145df7f8f3fe73272c09e
parent4b570baa7e867c652fa7d690585098278082fae9 (diff)
downloadsrc-7e7b6700743285c0af506ac6299ddf82ebd434b9.tar.gz
src-7e7b6700743285c0af506ac6299ddf82ebd434b9.zip
Vendor import of llvm trunk r291274:vendor/llvm/llvm-trunk-r291274
Notes
Notes: svn path=/vendor/llvm/dist/; revision=311532 svn path=/vendor/llvm/llvm-trunk-r291274/; revision=311533; tag=vendor/llvm/llvm-trunk-r291274
-rwxr-xr-xcmake/config-ix.cmake7
-rwxr-xr-xcmake/modules/AddLLVM.cmake4
-rw-r--r--docs/CompileCudaWithLLVM.rst4
-rw-r--r--docs/Phabricator.rst2
-rw-r--r--include/llvm/Analysis/CGSCCPassManager.h2
-rw-r--r--include/llvm/Analysis/TargetTransformInfo.h19
-rw-r--r--include/llvm/Analysis/TargetTransformInfoImpl.h30
-rw-r--r--include/llvm/Bitcode/BitCodes.h6
-rw-r--r--include/llvm/Bitcode/BitstreamReader.h11
-rw-r--r--include/llvm/Bitcode/BitstreamWriter.h26
-rw-r--r--include/llvm/CodeGen/AsmPrinter.h7
-rw-r--r--include/llvm/CodeGen/BasicTTIImpl.h5
-rw-r--r--include/llvm/CodeGen/DIE.h9
-rw-r--r--include/llvm/CodeGen/GlobalISel/IRTranslator.h6
-rw-r--r--include/llvm/CodeGen/MachineBasicBlock.h2
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDie.h58
-rw-r--r--include/llvm/ExecutionEngine/Orc/RawByteChannel.h2
-rw-r--r--include/llvm/IR/ModuleSummaryIndex.h112
-rw-r--r--include/llvm/IR/ModuleSummaryIndexYAML.h111
-rw-r--r--include/llvm/IR/PassManager.h250
-rw-r--r--include/llvm/LTO/LTO.h7
-rw-r--r--include/llvm/MC/MCTargetOptions.h10
-rw-r--r--include/llvm/Support/FileSystem.h19
-rw-r--r--include/llvm/Support/TarWriter.h32
-rw-r--r--include/llvm/Transforms/IPO/FunctionImport.h13
-rw-r--r--include/llvm/Transforms/IPO/LowerTypeTests.h4
-rw-r--r--include/llvm/Transforms/Utils/FunctionImportUtils.h18
-rw-r--r--include/llvm/module.modulemap1
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp120
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp5
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp16
-rw-r--r--lib/Bitcode/Reader/BitstreamReader.cpp22
-rw-r--r--lib/Bitcode/Reader/MetadataLoader.cpp400
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp191
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp3
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp24
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp2
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp44
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBankInfo.cpp13
-rw-r--r--lib/CodeGen/IfConversion.cpp42
-rw-r--r--lib/CodeGen/MIRPrinter.cpp8
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp9
-rw-r--r--lib/CodeGen/MachineVerifier.cpp18
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp17
-rw-r--r--lib/DebugInfo/DWARF/DWARFDie.cpp5
-rw-r--r--lib/Fuzzer/FuzzerDriver.cpp1
-rw-r--r--lib/Fuzzer/FuzzerFlags.def1
-rw-r--r--lib/Fuzzer/FuzzerIO.h3
-rw-r--r--lib/Fuzzer/FuzzerIOPosix.cpp6
-rw-r--r--lib/Fuzzer/FuzzerIOWindows.cpp2
-rw-r--r--lib/Fuzzer/FuzzerInternal.h1
-rw-r--r--lib/Fuzzer/FuzzerLoop.cpp5
-rw-r--r--lib/Fuzzer/FuzzerMerge.cpp12
-rw-r--r--lib/Fuzzer/FuzzerOptions.h1
-rw-r--r--lib/Fuzzer/FuzzerTraceState.cpp47
-rw-r--r--lib/Fuzzer/FuzzerUtilPosix.cpp6
-rw-r--r--lib/Fuzzer/FuzzerUtilWindows.cpp1
-rw-r--r--lib/Fuzzer/test/merge.test8
-rw-r--r--lib/LTO/LTO.cpp41
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp42
-rw-r--r--lib/Support/APInt.cpp2
-rw-r--r--lib/Support/CMakeLists.txt1
-rw-r--r--lib/Support/Host.cpp20
-rw-r--r--lib/Support/TarWriter.cpp166
-rw-r--r--lib/Support/Unix/Signals.inc2
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp1123
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp110
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp190
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h4
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp10
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.h8
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.h48
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp1
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp38
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp7
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp89
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.h9
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp13
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp27
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp3
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp10
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h3
-rw-r--r--lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp48
-rw-r--r--lib/Target/Lanai/Disassembler/LanaiDisassembler.h7
-rw-r--r--lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h13
-rw-r--r--lib/Target/Lanai/LanaiISelLowering.cpp42
-rw-r--r--lib/Target/Lanai/LanaiRegisterInfo.h9
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp12
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp29
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp15
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h2
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td3
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp172
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp630
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.h3
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp182
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp316
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp14
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp96
-rw-r--r--lib/Transforms/Scalar/GVN.cpp14
-rw-r--r--lib/Transforms/Scalar/LICM.cpp37
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopSink.cpp3
-rw-r--r--lib/Transforms/Utils/FunctionImportUtils.cpp24
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp59
-rw-r--r--test/Analysis/CostModel/AArch64/bswap.ll70
-rw-r--r--test/Analysis/CostModel/AArch64/falkor.ll26
-rw-r--r--test/Analysis/CostModel/AArch64/gep.ll66
-rw-r--r--test/Analysis/CostModel/X86/arith.ll4
-rw-r--r--test/Analysis/CostModel/X86/shuffle-broadcast.ll140
-rw-r--r--test/Analysis/CostModel/X86/vdiv-cost.ll66
-rw-r--r--test/Analysis/CostModel/X86/vshift-ashr-cost.ll256
-rw-r--r--test/Analysis/CostModel/X86/vshift-lshr-cost.ll259
-rw-r--r--test/Analysis/CostModel/X86/vshift-shl-cost.ll261
-rw-r--r--test/Bitcode/summary_version.ll2
-rw-r--r--test/Bitcode/thinlto-function-summary.ll2
-rw-r--r--test/Bitcode/thinlto-summary-section.ll8
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-instructionselect.mir6
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll88
-rw-r--r--test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-collect-loh-str.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-collect-loh.ll17
-rw-r--r--test/CodeGen/AArch64/loh.mir193
-rw-r--r--test/CodeGen/AArch64/machine-scheduler.mir5
-rw-r--r--test/CodeGen/AMDGPU/hsa-func.ll3
-rw-r--r--test/CodeGen/AMDGPU/hsa.ll4
-rw-r--r--test/CodeGen/Generic/cfi-sections.ll39
-rw-r--r--test/CodeGen/MIR/AArch64/spill-fold.mir82
-rw-r--r--test/CodeGen/MIR/X86/basic-block-liveins.mir9
-rw-r--r--test/CodeGen/MIR/X86/machine-verifier.mir3
-rw-r--r--test/CodeGen/NVPTX/tid-range.ll18
-rw-r--r--test/CodeGen/X86/GlobalISel/irtranslator-call.ll2
-rw-r--r--test/CodeGen/X86/avx512-intrinsics-upgrade.ll7
-rw-r--r--test/CodeGen/X86/avx512-trunc.ll107
-rw-r--r--test/CodeGen/X86/cmov.ll18
-rw-r--r--test/CodeGen/X86/lower-vec-shift-2.ll18
-rw-r--r--test/CodeGen/X86/shuffle-vs-trunc-128.ll481
-rw-r--r--test/CodeGen/X86/shuffle-vs-trunc-256.ll629
-rw-r--r--test/CodeGen/X86/shuffle-vs-trunc-512.ll537
-rw-r--r--test/CodeGen/X86/tail-call-conditional.mir3
-rw-r--r--test/CodeGen/X86/vector-rotate-128.ll2
-rw-r--r--test/CodeGen/X86/vector-shift-ashr-128.ll100
-rw-r--r--test/CodeGen/X86/vector-shift-ashr-256.ll74
-rw-r--r--test/CodeGen/X86/vector-shift-ashr-512.ll321
-rw-r--r--test/CodeGen/X86/vector-shift-lshr-128.ll74
-rw-r--r--test/CodeGen/X86/vector-shift-lshr-256.ll74
-rw-r--r--test/CodeGen/X86/vector-shift-lshr-512.ll317
-rw-r--r--test/CodeGen/X86/vector-shift-shl-128.ll74
-rw-r--r--test/CodeGen/X86/vector-shift-shl-256.ll74
-rw-r--r--test/CodeGen/X86/vector-shift-shl-512.ll317
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v16.ll89
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v4.ll83
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v8.ll112
-rw-r--r--test/CodeGen/X86/vector-shuffle-masked.ll450
-rw-r--r--test/CodeGen/X86/vector-tzcnt-128.ll425
-rw-r--r--test/CodeGen/X86/vshift-4.ll2
-rw-r--r--test/DebugInfo/Generic/licm-hoist-debug-loc.ll75
-rw-r--r--test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll76
-rw-r--r--test/MC/AsmParser/Inputs/function.x3
-rw-r--r--test/MC/AsmParser/Inputs/module.x3
-rw-r--r--test/MC/AsmParser/include.ll13
-rw-r--r--test/MC/Disassembler/PowerPC/ppc64-encoding-fp.txt18
-rw-r--r--test/MC/PowerPC/ppc64-encoding-fp.s32
-rw-r--r--test/ThinLTO/X86/Inputs/deadstrip.ll22
-rw-r--r--test/ThinLTO/X86/Inputs/lazyload_metadata.ll12
-rw-r--r--test/ThinLTO/X86/deadstrip.ll109
-rw-r--r--test/ThinLTO/X86/lazyload_metadata.ll54
-rw-r--r--test/Transforms/GVN/PRE/phi-translate.ll13
-rw-r--r--test/Transforms/InstCombine/amdgcn-intrinsics.ll34
-rw-r--r--test/Transforms/InstCombine/cos-intrinsic.ll55
-rw-r--r--test/Transforms/InstCombine/icmp-shl-nsw.ll218
-rw-r--r--test/Transforms/InstCombine/icmp.ll63
-rw-r--r--test/Transforms/InstSimplify/select.ll28
-rw-r--r--test/Transforms/LICM/scalar_promote.ll192
-rw-r--r--test/Transforms/LoopVectorize/X86/strided_load_cost.ll54
-rw-r--r--test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml10
-rw-r--r--test/Transforms/LowerTypeTests/export-nothing.ll7
-rw-r--r--test/Transforms/LowerTypeTests/function-disjoint.ll6
-rw-r--r--test/Transforms/LowerTypeTests/function-ext.ll3
-rw-r--r--test/Transforms/LowerTypeTests/function.ll2
-rw-r--r--test/Transforms/LowerTypeTests/import-unsat.ll23
-rw-r--r--test/Transforms/LowerTypeTests/simple.ll2
-rw-r--r--test/Transforms/LowerTypeTests/single-offset.ll2
-rw-r--r--test/Transforms/LowerTypeTests/unsat.ll3
-rw-r--r--tools/dsymutil/DwarfLinker.cpp12
-rw-r--r--tools/llc/llc.cpp3
-rw-r--r--tools/llvm-config/llvm-config.cpp10
-rw-r--r--unittests/ADT/APFloatTest.cpp275
-rw-r--r--unittests/ADT/IntrusiveRefCntPtrTest.cpp4
-rw-r--r--unittests/Bitcode/BitstreamReaderTest.cpp4
-rw-r--r--unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp121
-rw-r--r--unittests/DebugInfo/DWARF/DwarfGenerator.cpp4
-rw-r--r--unittests/DebugInfo/DWARF/DwarfGenerator.h3
-rw-r--r--utils/lit/lit/formats/googletest.py8
-rw-r--r--utils/unittest/CMakeLists.txt6
-rw-r--r--utils/unittest/googletest/README.LLVM16
-rw-r--r--utils/unittest/googletest/include/gtest/gtest-death-test.h17
-rw-r--r--utils/unittest/googletest/include/gtest/gtest-message.h112
-rw-r--r--utils/unittest/googletest/include/gtest/gtest-param-test.h56
-rw-r--r--utils/unittest/googletest/include/gtest/gtest-printers.h335
-rw-r--r--utils/unittest/googletest/include/gtest/gtest-spi.h7
-rw-r--r--utils/unittest/googletest/include/gtest/gtest-test-part.h23
-rw-r--r--utils/unittest/googletest/include/gtest/gtest-typed-test.h11
-rw-r--r--utils/unittest/googletest/include/gtest/gtest.h565
-rw-r--r--utils/unittest/googletest/include/gtest/gtest_pred_impl.h12
-rw-r--r--utils/unittest/googletest/include/gtest/internal/custom/gtest-port.h69
-rw-r--r--utils/unittest/googletest/include/gtest/internal/custom/gtest-printers.h42
-rw-r--r--utils/unittest/googletest/include/gtest/internal/custom/gtest.h41
-rw-r--r--utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h29
-rw-r--r--utils/unittest/googletest/include/gtest/internal/gtest-filepath.h16
-rw-r--r--utils/unittest/googletest/include/gtest/internal/gtest-internal.h403
-rw-r--r--utils/unittest/googletest/include/gtest/internal/gtest-linked_ptr.h22
-rw-r--r--utils/unittest/googletest/include/gtest/internal/gtest-param-util-generated.h679
-rw-r--r--utils/unittest/googletest/include/gtest/internal/gtest-param-util.h190
-rw-r--r--utils/unittest/googletest/include/gtest/internal/gtest-port-arch.h97
-rw-r--r--utils/unittest/googletest/include/gtest/internal/gtest-port.h1219
-rw-r--r--utils/unittest/googletest/include/gtest/internal/gtest-string.h217
-rw-r--r--utils/unittest/googletest/include/gtest/internal/gtest-tuple.h100
-rw-r--r--utils/unittest/googletest/include/gtest/internal/gtest-type-util.h21
-rw-r--r--utils/unittest/googletest/src/gtest-death-test.cc344
-rw-r--r--utils/unittest/googletest/src/gtest-filepath.cc43
-rw-r--r--utils/unittest/googletest/src/gtest-internal-inl.h332
-rw-r--r--utils/unittest/googletest/src/gtest-port.cc699
-rw-r--r--utils/unittest/googletest/src/gtest-printers.cc115
-rw-r--r--utils/unittest/googletest/src/gtest-test-part.cc12
-rw-r--r--utils/unittest/googletest/src/gtest-typed-test.cc42
-rw-r--r--utils/unittest/googletest/src/gtest.cc2012
235 files changed, 14791 insertions, 6255 deletions
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 530a5ddaab4d..d76f1293d02c 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -167,7 +167,10 @@ check_symbol_exists(futimens sys/stat.h HAVE_FUTIMENS)
check_symbol_exists(futimes sys/time.h HAVE_FUTIMES)
check_symbol_exists(posix_fallocate fcntl.h HAVE_POSIX_FALLOCATE)
# AddressSanitizer conflicts with lib/Support/Unix/Signals.inc
-if( HAVE_SIGNAL_H AND NOT LLVM_USE_SANITIZER MATCHES ".*Address.*")
+# Avoid sigaltstack on Apple platforms, where backtrace() cannot handle it
+# (rdar://7089625) and _Unwind_Backtrace is unusable because it cannot unwind
+# past the signal handler after an assertion failure (rdar://29866587).
+if( HAVE_SIGNAL_H AND NOT LLVM_USE_SANITIZER MATCHES ".*Address.*" AND NOT APPLE )
check_symbol_exists(sigaltstack signal.h HAVE_SIGALTSTACK)
endif()
if( HAVE_SYS_UIO_H )
@@ -314,6 +317,8 @@ else()
endif()
check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG)
+check_cxx_compiler_flag("-Wno-gnu-zero-variadic-macro-arguments"
+ SUPPORTS_NO_GNU_ZERO_VARIADIC_MACRO_ARGUMENTS_FLAG)
set(USE_NO_MAYBE_UNINITIALIZED 0)
set(USE_NO_UNINITIALIZED 0)
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index f35fcf444d20..fbef1d04eac4 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -1014,6 +1014,10 @@ function(add_unittest test_suite test_name)
if (SUPPORTS_NO_VARIADIC_MACROS_FLAG)
list(APPEND LLVM_COMPILE_FLAGS "-Wno-variadic-macros")
endif ()
+ # Some parts of gtest rely on this GNU extension, don't warn on it.
+ if(SUPPORTS_NO_GNU_ZERO_VARIADIC_MACRO_ARGUMENTS_FLAG)
+ list(APPEND LLVM_COMPILE_FLAGS "-Wno-gnu-zero-variadic-macro-arguments")
+ endif()
set(LLVM_REQUIRES_RTTI OFF)
diff --git a/docs/CompileCudaWithLLVM.rst b/docs/CompileCudaWithLLVM.rst
index af681aeead66..6ad8652cfc1d 100644
--- a/docs/CompileCudaWithLLVM.rst
+++ b/docs/CompileCudaWithLLVM.rst
@@ -35,8 +35,8 @@ by many Linux package managers; you probably need to install nvidia's package.
You will need CUDA 7.0, 7.5, or 8.0 to compile with clang.
-CUDA compilation is supported on Linux, and on MacOS as of XXXX-XX-XX. Windows
-support is planned but not yet in place.
+CUDA compilation is supported on Linux, on MacOS as of 2016-11-18, and on
+Windows as of 2017-01-05.
Invoking clang
--------------
diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst
index 06a9c6af9b4d..8d1984b65cd9 100644
--- a/docs/Phabricator.rst
+++ b/docs/Phabricator.rst
@@ -132,7 +132,7 @@ committed to trunk. If you do not have commit access, someone has to
commit the change for you (with attribution). It is sufficient to add
a comment to the approved review indicating you cannot commit the patch
yourself. If you have commit access, there are multiple workflows to commit the
-change. Whichever method you follow it is recommend that your commit message
+change. Whichever method you follow it is recommended that your commit message
ends with the line:
::
diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h
index 54ef1a688d37..6fbe532112b2 100644
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@@ -128,7 +128,7 @@ extern template class PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager,
/// \brief The CGSCC pass manager.
///
/// See the documentation for the PassManager template for details. It runs
-/// a sequency of SCC passes over each SCC that the manager is run over. This
+/// a sequence of SCC passes over each SCC that the manager is run over. This
/// typedef serves as a convenient way to refer to this construct.
typedef PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
CGSCCUpdateResult &>
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index d583614284ff..b4a6c5c2fae0 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -36,6 +36,8 @@ namespace llvm {
class Function;
class GlobalValue;
class Loop;
+class ScalarEvolution;
+class SCEV;
class Type;
class User;
class Value;
@@ -613,10 +615,11 @@ public:
/// merged into the instruction indexing mode. Some targets might want to
/// distinguish between address computation for memory operations on vector
/// types and scalar types. Such targets should override this function.
- /// The 'IsComplex' parameter is a hint that the address computation is likely
- /// to involve multiple instructions and as such unlikely to be merged into
- /// the address indexing mode.
- int getAddressComputationCost(Type *Ty, bool IsComplex = false) const;
+ /// The 'SE' parameter holds pointer for the scalar evolution object which
+ /// is used in order to get the Ptr step value in case of constant stride.
+ /// The 'Ptr' parameter holds SCEV of the access pointer.
+ int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
+ const SCEV *Ptr = nullptr) const;
/// \returns The cost, if any, of keeping values of the given types alive
/// over a callsite.
@@ -795,7 +798,8 @@ public:
virtual int getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys) = 0;
virtual unsigned getNumberOfParts(Type *Tp) = 0;
- virtual int getAddressComputationCost(Type *Ty, bool IsComplex) = 0;
+ virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) = 0;
virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) = 0;
@@ -1044,8 +1048,9 @@ public:
unsigned getNumberOfParts(Type *Tp) override {
return Impl.getNumberOfParts(Tp);
}
- int getAddressComputationCost(Type *Ty, bool IsComplex) override {
- return Impl.getAddressComputationCost(Ty, IsComplex);
+ int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) override {
+ return Impl.getAddressComputationCost(Ty, SE, Ptr);
}
unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
return Impl.getCostOfKeepingLiveOverCall(Tys);
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index 68b38a7fa538..1d7edbaf7df0 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -15,6 +15,7 @@
#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
@@ -370,7 +371,10 @@ public:
unsigned getNumberOfParts(Type *Tp) { return 0; }
- unsigned getAddressComputationCost(Type *Tp, bool) { return 0; }
+ unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
+ const SCEV *) {
+ return 0;
+ }
unsigned getReductionCost(unsigned, Type *, bool) { return 1; }
@@ -422,6 +426,30 @@ public:
VectorType *VecTy) const {
return VF;
}
+protected:
+ bool isStridedAccess(const SCEV *Ptr) {
+ return Ptr && isa<SCEVAddRecExpr>(Ptr);
+ }
+
+ const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
+ const SCEV *Ptr) {
+ if (!isStridedAccess(Ptr))
+ return nullptr;
+ const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
+ return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
+ }
+
+ bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
+ int64_t MergeDistance) {
+ const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
+ if (!Step)
+ return false;
+ APInt StrideVal = Step->getAPInt();
+ if (StrideVal.getBitWidth() > 64)
+ return false;
+ // FIXME: need to take absolute value for negtive stride case
+ return StrideVal.getSExtValue() < MergeDistance;
+ }
};
/// \brief CRTP base class for use as a mix-in that aids implementing
diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h
index cfc7a1d7d6bd..bf21e146e771 100644
--- a/include/llvm/Bitcode/BitCodes.h
+++ b/include/llvm/Bitcode/BitCodes.h
@@ -18,7 +18,6 @@
#ifndef LLVM_BITCODE_BITCODES_H
#define LLVM_BITCODE_BITCODES_H
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
@@ -166,11 +165,8 @@ template <> struct isPodLike<BitCodeAbbrevOp> { static const bool value=true; };
/// BitCodeAbbrev - This class represents an abbreviation record. An
/// abbreviation allows a complex record that has redundancy to be stored in a
/// specialized format instead of the fully-general, fully-vbr, format.
-class BitCodeAbbrev : public RefCountedBase<BitCodeAbbrev> {
+class BitCodeAbbrev {
SmallVector<BitCodeAbbrevOp, 32> OperandList;
- // Only RefCountedBase is allowed to delete.
- ~BitCodeAbbrev() = default;
- friend class RefCountedBase<BitCodeAbbrev>;
public:
unsigned getNumOperandInfos() const {
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 4d95a6ce8a16..fc06eeefbf26 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -16,7 +16,6 @@
#define LLVM_BITCODE_BITSTREAMREADER_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Bitcode/BitCodes.h"
#include "llvm/Support/Endian.h"
@@ -42,7 +41,7 @@ public:
/// describe abbreviations that all blocks of the specified ID inherit.
struct BlockInfo {
unsigned BlockID;
- std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
+ std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
std::string Name;
std::vector<std::pair<unsigned, std::string> > RecordNames;
};
@@ -316,11 +315,11 @@ class BitstreamCursor : SimpleBitstreamCursor {
unsigned CurCodeSize = 2;
/// Abbrevs installed at in this block.
- std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
+ std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
struct Block {
unsigned PrevCodeSize;
- std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
+ std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
};
@@ -478,8 +477,8 @@ public:
return CurAbbrevs[AbbrevNo].get();
}
- /// Read the current record and discard it.
- void skipRecord(unsigned AbbrevID);
+ /// Read the current record and discard it, returning the code for the record.
+ unsigned skipRecord(unsigned AbbrevID);
unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
StringRef *Blob = nullptr);
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index 8eb6e8aef7a2..e276db5f92f6 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -43,12 +43,12 @@ class BitstreamWriter {
unsigned BlockInfoCurBID;
/// CurAbbrevs - Abbrevs installed at in this block.
- std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
+ std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
struct Block {
unsigned PrevCodeSize;
size_t StartSizeWord;
- std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
+ std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
Block(unsigned PCS, size_t SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
};
@@ -59,7 +59,7 @@ class BitstreamWriter {
/// These describe abbreviations that all blocks of the specified ID inherit.
struct BlockInfo {
unsigned BlockID;
- std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
+ std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
};
std::vector<BlockInfo> BlockInfoRecords;
@@ -469,12 +469,12 @@ public:
private:
// Emit the abbreviation as a DEFINE_ABBREV record.
- void EncodeAbbrev(BitCodeAbbrev *Abbv) {
+ void EncodeAbbrev(const BitCodeAbbrev &Abbv) {
EmitCode(bitc::DEFINE_ABBREV);
- EmitVBR(Abbv->getNumOperandInfos(), 5);
- for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+ EmitVBR(Abbv.getNumOperandInfos(), 5);
+ for (unsigned i = 0, e = static_cast<unsigned>(Abbv.getNumOperandInfos());
i != e; ++i) {
- const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+ const BitCodeAbbrevOp &Op = Abbv.getOperandInfo(i);
Emit(Op.isLiteral(), 1);
if (Op.isLiteral()) {
EmitVBR64(Op.getLiteralValue(), 8);
@@ -489,10 +489,10 @@ public:
/// EmitAbbrev - This emits an abbreviation to the stream. Note that this
/// method takes ownership of the specified abbrev.
- unsigned EmitAbbrev(BitCodeAbbrev *Abbv) {
+ unsigned EmitAbbrev(std::shared_ptr<BitCodeAbbrev> Abbv) {
// Emit the abbreviation as a record.
- EncodeAbbrev(Abbv);
- CurAbbrevs.push_back(Abbv);
+ EncodeAbbrev(*Abbv);
+ CurAbbrevs.push_back(std::move(Abbv));
return static_cast<unsigned>(CurAbbrevs.size())-1 +
bitc::FIRST_APPLICATION_ABBREV;
}
@@ -532,13 +532,13 @@ public:
/// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
/// BlockID.
- unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) {
+ unsigned EmitBlockInfoAbbrev(unsigned BlockID, std::shared_ptr<BitCodeAbbrev> Abbv) {
SwitchToBlockID(BlockID);
- EncodeAbbrev(Abbv);
+ EncodeAbbrev(*Abbv);
// Add the abbrev to the specified block record.
BlockInfo &Info = getOrCreateBlockInfo(BlockID);
- Info.Abbrevs.push_back(Abbv);
+ Info.Abbrevs.push_back(std::move(Abbv));
return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV;
}
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index be8822df3dba..f0be955110fb 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -140,6 +140,9 @@ private:
/// If the target supports dwarf debug info, this pointer is non-null.
DwarfDebug *DD;
+ /// If the current module uses dwarf CFI annotations strictly for debugging.
+ bool isCFIMoveForDebugging;
+
protected:
explicit AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);
@@ -262,6 +265,10 @@ public:
enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug };
CFIMoveType needsCFIMoves();
+ /// Returns false if needsCFIMoves() == CFI_M_EH for any function
+ /// in the module.
+ bool needsOnlyDebugCFIMoves() const { return isCFIMoveForDebugging; }
+
bool needsSEHMoves();
/// Print to the current output stream assembly representations of the
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index df0dc1a38ae7..8e96336b981f 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -925,7 +925,10 @@ public:
return LT.first;
}
- unsigned getAddressComputationCost(Type *Ty, bool IsComplex) { return 0; }
+ unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
+ const SCEV *) {
+ return 0;
+ }
unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) {
assert(Ty->isVectorTy() && "Expect a vector type");
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
index 1e3476cd8395..09c3bf6a1b56 100644
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@@ -651,6 +651,9 @@ class DIE : IntrusiveBackListNode, public DIEValueList {
unsigned AbbrevNumber = ~0u;
/// Dwarf tag code.
dwarf::Tag Tag = (dwarf::Tag)0;
+ /// Set to true to force a DIE to emit an abbreviation that says it has
+ /// children even when it doesn't. This is used for unit testing purposes.
+ bool ForceChildren;
/// Children DIEs.
IntrusiveBackList<DIE> Children;
@@ -659,7 +662,8 @@ class DIE : IntrusiveBackListNode, public DIEValueList {
PointerUnion<DIE *, DIEUnit *> Owner;
DIE() = delete;
- explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Tag(Tag) {}
+ explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Tag(Tag),
+ ForceChildren(false) {}
public:
static DIE *get(BumpPtrAllocator &Alloc, dwarf::Tag Tag) {
@@ -677,7 +681,8 @@ public:
/// Get the compile/type unit relative offset of this DIE.
unsigned getOffset() const { return Offset; }
unsigned getSize() const { return Size; }
- bool hasChildren() const { return !Children.empty(); }
+ bool hasChildren() const { return ForceChildren || !Children.empty(); }
+ void setForceChildren(bool B) { ForceChildren = B; }
typedef IntrusiveBackList<DIE>::iterator child_iterator;
typedef IntrusiveBackList<DIE>::const_iterator const_child_iterator;
diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 76e0d47ceea3..26ba5c67beb5 100644
--- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -180,6 +180,8 @@ private:
/// \pre \p U is a branch instruction.
bool translateBr(const User &U, MachineIRBuilder &MIRBuilder);
+ bool translateSwitch(const User &U, MachineIRBuilder &MIRBuilder);
+
bool translateExtractValue(const User &U, MachineIRBuilder &MIRBuilder);
bool translateInsertValue(const User &U, MachineIRBuilder &MIRBuilder);
@@ -292,12 +294,8 @@ private:
return translateBinaryOp(TargetOpcode::G_FREM, U, MIRBuilder);
}
-
// Stubs to keep the compiler happy while we implement the rest of the
// translation.
- bool translateSwitch(const User &U, MachineIRBuilder &MIRBuilder) {
- return false;
- }
bool translateIndirectBr(const User &U, MachineIRBuilder &MIRBuilder) {
return false;
}
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index be811c6fe437..92a9896d7a18 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -308,7 +308,7 @@ public:
// Iteration support for live in sets. These sets are kept in sorted
// order by their register number.
typedef LiveInVector::const_iterator livein_iterator;
- livein_iterator livein_begin() const { return LiveIns.begin(); }
+ livein_iterator livein_begin() const;
livein_iterator livein_end() const { return LiveIns.end(); }
bool livein_empty() const { return LiveIns.empty(); }
iterator_range<livein_iterator> liveins() const {
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h
index f33758de6a55..5a24b7c87299 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -10,6 +10,8 @@
#ifndef LLVM_LIB_DEBUGINFO_DWARFDIE_H
#define LLVM_LIB_DEBUGINFO_DWARFDIE_H
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
@@ -40,9 +42,6 @@ public:
bool isValid() const { return U && Die; }
explicit operator bool() const { return isValid(); }
- bool operator ==(const DWARFDie &RHS) const {
- return Die == RHS.Die && U == RHS.U;
- }
const DWARFDebugInfoEntry *getDebugInfoEntry() const { return Die; }
DWARFUnit *getDwarfUnit() const { return U; }
@@ -361,8 +360,61 @@ public:
getInlinedChainForAddress(const uint64_t Address,
SmallVectorImpl<DWARFDie> &InlinedChain) const;
+ class iterator;
+
+ iterator begin() const;
+ iterator end() const;
+ iterator_range<iterator> children() const;
};
+
+inline bool operator==(const DWARFDie &LHS, const DWARFDie &RHS) {
+ return LHS.getDebugInfoEntry() == RHS.getDebugInfoEntry() &&
+ LHS.getDwarfUnit() == RHS.getDwarfUnit();
+}
+
+inline bool operator!=(const DWARFDie &LHS, const DWARFDie &RHS) {
+ return !(LHS == RHS);
+}
+
+class DWARFDie::iterator : public iterator_facade_base<iterator,
+ std::forward_iterator_tag,
+ const DWARFDie> {
+ DWARFDie Die;
+ void skipNull() {
+ if (Die && Die.isNULL())
+ Die = DWARFDie();
+ }
+public:
+ iterator() = default;
+ explicit iterator(DWARFDie D) : Die(D) {
+ // If we start out with only a Null DIE then invalidate.
+ skipNull();
+ }
+ iterator &operator++() {
+ Die = Die.getSibling();
+ // Don't include the NULL die when iterating.
+ skipNull();
+ return *this;
+ }
+ explicit operator bool() const { return Die.isValid(); }
+ const DWARFDie &operator*() const { return Die; }
+ bool operator==(const iterator &X) const { return Die == X.Die; }
+};
+
+// These inline functions must follow the DWARFDie::iterator definition above
+// as they use functions from that class.
+inline DWARFDie::iterator DWARFDie::begin() const {
+ return iterator(getFirstChild());
+}
+
+inline DWARFDie::iterator DWARFDie::end() const {
+ return iterator();
+}
+
+inline iterator_range<DWARFDie::iterator> DWARFDie::children() const {
+ return make_range(begin(), end());
+}
} // end namespace llvm
diff --git a/include/llvm/ExecutionEngine/Orc/RawByteChannel.h b/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
index 43b597de000f..83a7b9a844f2 100644
--- a/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
+++ b/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
@@ -47,9 +47,9 @@ public:
/// Locks the channel for writing.
template <typename FunctionIdT, typename SequenceIdT>
Error startSendMessage(const FunctionIdT &FnId, const SequenceIdT &SeqNo) {
+ writeLock.lock();
if (auto Err = serializeSeq(*this, FnId, SeqNo))
return Err;
- writeLock.lock();
return Error::success();
}
diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h
index 2cfe673d970f..ecb0435a1e11 100644
--- a/include/llvm/IR/ModuleSummaryIndex.h
+++ b/include/llvm/IR/ModuleSummaryIndex.h
@@ -28,6 +28,10 @@
namespace llvm {
+namespace yaml {
+template <typename T> struct MappingTraits;
+}
+
/// \brief Class to accumulate and hold information about a callee.
struct CalleeInfo {
enum class HotnessType : uint8_t { Unknown = 0, Cold = 1, None = 2, Hot = 3 };
@@ -102,7 +106,7 @@ public:
/// \brief Sububclass discriminator (for dyn_cast<> et al.)
enum SummaryKind : unsigned { AliasKind, FunctionKind, GlobalVarKind };
- /// Group flags (Linkage, noRename, isOptSize, etc.) as a bitfield.
+ /// Group flags (Linkage, NotEligibleToImport, etc.) as a bitfield.
struct GVFlags {
/// \brief The linkage type of the associated global value.
///
@@ -113,39 +117,20 @@ public:
/// types based on global summary-based analysis.
unsigned Linkage : 4;
- /// Indicate if the global value cannot be renamed (in a specific section,
- /// possibly referenced from inline assembly, etc).
- unsigned NoRename : 1;
-
- /// Indicate if a function contains inline assembly (which is opaque),
- /// that may reference a local value. This is used to prevent importing
- /// of this function, since we can't promote and rename the uses of the
- /// local in the inline assembly. Use a flag rather than bloating the
- /// summary with references to every possible local value in the
- /// llvm.used set.
- unsigned HasInlineAsmMaybeReferencingInternal : 1;
+ /// Indicate if the global value cannot be imported (e.g. it cannot
+ /// be renamed or references something that can't be renamed).
+ unsigned NotEligibleToImport : 1;
- /// Indicate if the function is not viable to inline.
- unsigned IsNotViableToInline : 1;
+ /// Indicate that the global value must be considered a live root for
+ /// index-based liveness analysis. Used for special LLVM values such as
+ /// llvm.global_ctors that the linker does not know about.
+ unsigned LiveRoot : 1;
/// Convenience Constructors
- explicit GVFlags(GlobalValue::LinkageTypes Linkage, bool NoRename,
- bool HasInlineAsmMaybeReferencingInternal,
- bool IsNotViableToInline)
- : Linkage(Linkage), NoRename(NoRename),
- HasInlineAsmMaybeReferencingInternal(
- HasInlineAsmMaybeReferencingInternal),
- IsNotViableToInline(IsNotViableToInline) {}
-
- GVFlags(const GlobalValue &GV)
- : Linkage(GV.getLinkage()), NoRename(GV.hasSection()),
- HasInlineAsmMaybeReferencingInternal(false) {
- IsNotViableToInline = false;
- if (const auto *F = dyn_cast<Function>(&GV))
- // Inliner doesn't handle variadic functions.
- // FIXME: refactor this to use the same code that inliner is using.
- IsNotViableToInline = F->isVarArg();
- }
+ explicit GVFlags(GlobalValue::LinkageTypes Linkage,
+ bool NotEligibleToImport, bool LiveRoot)
+ : Linkage(Linkage), NotEligibleToImport(NotEligibleToImport),
+ LiveRoot(LiveRoot) {}
};
private:
@@ -213,31 +198,19 @@ public:
Flags.Linkage = Linkage;
}
- bool isNotViableToInline() const { return Flags.IsNotViableToInline; }
-
- /// Return true if this summary is for a GlobalValue that needs promotion
- /// to be referenced from another module.
- bool needsRenaming() const { return GlobalValue::isLocalLinkage(linkage()); }
+ /// Return true if this global value can't be imported.
+ bool notEligibleToImport() const { return Flags.NotEligibleToImport; }
- /// Return true if this global value cannot be renamed (in a specific section,
- /// possibly referenced from inline assembly, etc).
- bool noRename() const { return Flags.NoRename; }
+ /// Return true if this global value must be considered a root for live
+ /// value analysis on the index.
+ bool liveRoot() const { return Flags.LiveRoot; }
- /// Flag that this global value cannot be renamed (in a specific section,
- /// possibly referenced from inline assembly, etc).
- void setNoRename() { Flags.NoRename = true; }
+ /// Flag that this global value must be considered a root for live
+ /// value analysis on the index.
+ void setLiveRoot() { Flags.LiveRoot = true; }
- /// Return true if this global value possibly references another value
- /// that can't be renamed.
- bool hasInlineAsmMaybeReferencingInternal() const {
- return Flags.HasInlineAsmMaybeReferencingInternal;
- }
-
- /// Flag that this global value possibly references another value that
- /// can't be renamed.
- void setHasInlineAsmMaybeReferencingInternal() {
- Flags.HasInlineAsmMaybeReferencingInternal = true;
- }
+ /// Flag that this global value cannot be imported.
+ void setNotEligibleToImport() { Flags.NotEligibleToImport = true; }
/// Return the list of values referenced by this global value definition.
ArrayRef<ValueInfo> refs() const { return RefEdgeList; }
@@ -330,6 +303,30 @@ public:
}
};
+struct TypeTestResolution {
+ /// Specifies which kind of type check we should emit for this byte array.
+ /// See http://clang.llvm.org/docs/ControlFlowIntegrityDesign.html for full
+ /// details on each kind of check; the enumerators are described with
+ /// reference to that document.
+ enum Kind {
+ Unsat, ///< Unsatisfiable type (i.e. no global has this type metadata)
+ ByteArray, ///< Test a byte array (first example)
+ Inline, ///< Inlined bit vector ("Short Inline Bit Vectors")
+ Single, ///< Single element (last example in "Short Inline Bit Vectors")
+ AllOnes, ///< All-ones bit vector ("Eliminating Bit Vector Checks for
+ /// All-Ones Bit Vectors")
+ } TheKind = Unsat;
+
+ /// Range of the size expressed as a bit width. For example, if the size is in
+ /// range [0,256), this number will be 8. This helps generate the most compact
+ /// instruction sequences.
+ unsigned SizeBitWidth = 0;
+};
+
+struct TypeIdSummary {
+ TypeTestResolution TTRes;
+};
+
/// 160 bits SHA1
typedef std::array<uint32_t, 5> ModuleHash;
@@ -370,11 +367,20 @@ private:
/// Holds strings for combined index, mapping to the corresponding module ID.
ModulePathStringTableTy ModulePathStringTable;
+ /// Mapping from type identifiers to summary information for that type
+ /// identifier.
+ // FIXME: Add bitcode read/write support for this field.
+ std::map<std::string, TypeIdSummary> TypeIdMap;
+
+ // YAML I/O support.
+ friend yaml::MappingTraits<ModuleSummaryIndex>;
+
public:
gvsummary_iterator begin() { return GlobalValueMap.begin(); }
const_gvsummary_iterator begin() const { return GlobalValueMap.begin(); }
gvsummary_iterator end() { return GlobalValueMap.end(); }
const_gvsummary_iterator end() const { return GlobalValueMap.end(); }
+ size_t size() const { return GlobalValueMap.size(); }
/// Get the list of global value summary objects for a given value name.
const GlobalValueSummaryList &getGlobalValueSummaryList(StringRef ValueName) {
diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h
new file mode 100644
index 000000000000..a8c8ff9ef2eb
--- /dev/null
+++ b/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -0,0 +1,111 @@
+//===-- llvm/ModuleSummaryIndexYAML.h - YAML I/O for summary ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_MODULESUMMARYINDEXYAML_H
+#define LLVM_IR_MODULESUMMARYINDEXYAML_H
+
+#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/Support/YAMLTraits.h"
+
+namespace llvm {
+namespace yaml {
+
+template <> struct ScalarEnumerationTraits<TypeTestResolution::Kind> {
+ static void enumeration(IO &io, TypeTestResolution::Kind &value) {
+ io.enumCase(value, "Unsat", TypeTestResolution::Unsat);
+ io.enumCase(value, "ByteArray", TypeTestResolution::ByteArray);
+ io.enumCase(value, "Inline", TypeTestResolution::Inline);
+ io.enumCase(value, "Single", TypeTestResolution::Single);
+ io.enumCase(value, "AllOnes", TypeTestResolution::AllOnes);
+ }
+};
+
+template <> struct MappingTraits<TypeTestResolution> {
+ static void mapping(IO &io, TypeTestResolution &res) {
+ io.mapRequired("Kind", res.TheKind);
+ io.mapRequired("SizeBitWidth", res.SizeBitWidth);
+ }
+};
+
+template <> struct MappingTraits<TypeIdSummary> {
+ static void mapping(IO &io, TypeIdSummary& summary) {
+ io.mapRequired("TTRes", summary.TTRes);
+ }
+};
+
+struct FunctionSummaryYaml {
+ std::vector<uint64_t> TypeTests;
+};
+
+} // End yaml namespace
+} // End llvm namespace
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(uint64_t)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<FunctionSummaryYaml> {
+ static void mapping(IO &io, FunctionSummaryYaml& summary) {
+ io.mapRequired("TypeTests", summary.TypeTests);
+ }
+};
+
+} // End yaml namespace
+} // End llvm namespace
+
+LLVM_YAML_IS_STRING_MAP(TypeIdSummary)
+LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummaryYaml)
+
+namespace llvm {
+namespace yaml {
+
+// FIXME: Add YAML mappings for the rest of the module summary.
+template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
+ static void inputOne(IO &io, StringRef Key, GlobalValueSummaryMapTy &V) {
+ std::vector<FunctionSummaryYaml> FSums;
+ io.mapRequired(Key.str().c_str(), FSums);
+ uint64_t KeyInt;
+ if (Key.getAsInteger(0, KeyInt)) {
+ io.setError("key not an integer");
+ return;
+ }
+ auto &Elem = V[KeyInt];
+ for (auto &FSum : FSums) {
+ GlobalValueSummary::GVFlags GVFlags(GlobalValue::ExternalLinkage, false,
+ false);
+ Elem.push_back(llvm::make_unique<FunctionSummary>(
+ GVFlags, 0, ArrayRef<ValueInfo>{},
+ ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests)));
+ }
+ }
+ static void output(IO &io, GlobalValueSummaryMapTy &V) {
+ for (auto &P : V) {
+ std::vector<FunctionSummaryYaml> FSums;
+ for (auto &Sum : P.second) {
+ if (auto *FSum = dyn_cast<FunctionSummary>(Sum.get()))
+ FSums.push_back(FunctionSummaryYaml{FSum->type_tests()});
+ }
+ if (!FSums.empty())
+ io.mapRequired(llvm::utostr(P.first).c_str(), FSums);
+ }
+ }
+};
+
+template <> struct MappingTraits<ModuleSummaryIndex> {
+ static void mapping(IO &io, ModuleSummaryIndex& index) {
+ io.mapRequired("GlobalValueMap", index.GlobalValueMap);
+ io.mapRequired("TypeIdMap", index.TypeIdMap);
+ }
+};
+
+} // End yaml namespace
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h
index 3e4edd893d3c..7a63956f1cdb 100644
--- a/include/llvm/IR/PassManager.h
+++ b/include/llvm/IR/PassManager.h
@@ -64,32 +64,31 @@ namespace llvm {
struct alignas(8) AnalysisKey {};
/// A special type used to provide an address that identifies a set of related
-/// analyses.
+/// analyses. These sets are primarily used below to mark sets of analyses as
+/// preserved.
///
-/// These sets are primarily used below to mark sets of analyses as preserved.
-/// An example would be analyses depending only on the CFG of a function.
-/// A transformation can mark that it is preserving the CFG of a function and
-/// then analyses can check for this rather than each transform having to fully
-/// enumerate every analysis preserved.
+/// For example, a transformation can indicate that it preserves the CFG of a
+/// function by preserving the appropriate AnalysisSetKey. An analysis that
+/// depends only on the CFG can then check if that AnalysisSetKey is preserved;
+/// if it is, the analysis knows that it itself is preserved.
struct alignas(8) AnalysisSetKey {};
-/// Class for tracking what analyses are preserved after a transformation pass
-/// runs over some unit of IR.
+/// A set of analyses that are preserved following a run of a transformation
+/// pass.
///
-/// Transformation passes build and return these objects when run over the IR
-/// to communicate which analyses remain valid afterward. For most passes this
-/// is fairly simple: if they don't change anything all analyses are preserved,
+/// Transformation passes build and return these objects to communicate which
+/// analyses are still valid after the transformation. For most passes this is
+/// fairly simple: if they don't change anything all analyses are preserved,
/// otherwise only a short list of analyses that have been explicitly updated
/// are preserved.
///
-/// This class also provides the ability to mark abstract *sets* of analyses as
-/// preserved. These sets allow passes to indicate that they preserve broad
-/// aspects of the IR (such as its CFG) and analyses to opt in to that being
-/// sufficient without the passes having to fully enumerate such analyses.
+/// This class also lets transformation passes mark abstract *sets* of analyses
+/// as preserved. A transformation that (say) does not alter the CFG can
+/// indicate such by marking a particular AnalysisSetKey as preserved, and
+/// then analyses can query whether that AnalysisSetKey is preserved.
///
-/// Finally, this class can represent "abandoning" an analysis, which marks it
-/// as not-preserved even if it would be covered by some abstract set of
-/// analyses.
+/// Finally, this class can represent an "abandoned" analysis, which is
+/// not preserved even if it would be covered by some abstract set of analyses.
///
/// Given a `PreservedAnalyses` object, an analysis will typically want to
/// figure out whether it is preserved. In the example below, MyAnalysisType is
@@ -120,7 +119,8 @@ public:
/// Mark an analysis as preserved.
template <typename AnalysisT> void preserve() { preserve(AnalysisT::ID()); }
- /// Mark an analysis as preserved using its ID.
+ /// \brief Given an analysis's ID, mark the analysis as preserved, adding it
+ /// to the set.
void preserve(AnalysisKey *ID) {
// Clear this ID from the explicit not-preserved set if present.
NotPreservedAnalysisIDs.erase(ID);
@@ -224,17 +224,17 @@ public:
: PA(PA), ID(ID), IsAbandoned(PA.NotPreservedAnalysisIDs.count(ID)) {}
public:
- /// Returns true if the checker's analysis was not abandoned and the
- /// analysis is either is explicitly preserved or all analyses are
- /// preserved.
+ /// Returns true if the checker's analysis was not abandoned and either
+ /// - the analysis is explicitly preserved or
+ /// - all analyses are preserved.
bool preserved() {
return !IsAbandoned && (PA.PreservedIDs.count(&AllAnalysesKey) ||
PA.PreservedIDs.count(ID));
}
- /// Returns true if the checker's analysis was not abandoned and either the
- /// provided set type is either explicitly preserved or all analyses are
- /// preserved.
+ /// Returns true if the checker's analysis was not abandoned and either
+ /// - \p AnalysisSetT is explicitly preserved or
+ /// - all analyses are preserved.
template <typename AnalysisSetT> bool preservedSet() {
AnalysisSetKey *SetID = AnalysisSetT::ID();
return !IsAbandoned && (PA.PreservedIDs.count(&AllAnalysesKey) ||
@@ -262,8 +262,8 @@ public:
/// Test whether all analyses are preserved (and none are abandoned).
///
- /// This lets analyses optimize for the common case where a transformation
- /// made no changes to the IR.
+ /// This is used primarily to optimize for the common case of a transformation
+ /// which makes no changes to the IR.
bool areAllPreserved() const {
return NotPreservedAnalysisIDs.empty() &&
PreservedIDs.count(&AllAnalysesKey);
@@ -307,9 +307,9 @@ template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager;
/// A CRTP mix-in to automatically provide informational APIs needed for
/// passes.
///
-/// This provides some boiler plate for types that are passes.
+/// This provides some boilerplate for types that are passes.
template <typename DerivedT> struct PassInfoMixin {
- /// Returns the name of the derived pass type.
+ /// Gets the name of the pass we are mixed into.
static StringRef name() {
StringRef Name = getTypeName<DerivedT>();
if (Name.startswith("llvm::"))
@@ -318,41 +318,35 @@ template <typename DerivedT> struct PassInfoMixin {
}
};
-/// A CRTP mix-in to automatically provide informational APIs needed for
-/// analysis passes.
+/// A CRTP mix-in that provides informational APIs needed for analysis passes.
///
-/// This provides some boiler plate for types that are analysis passes. It
-/// automatically mixes in \c PassInfoMixin and adds informational APIs
-/// specifically used for analyses.
+/// This provides some boilerplate for types that are analysis passes. It
+/// automatically mixes in \c PassInfoMixin.
template <typename DerivedT>
struct AnalysisInfoMixin : PassInfoMixin<DerivedT> {
/// Returns an opaque, unique ID for this analysis type.
///
- /// This ID is a pointer type that is guaranteed to be 8-byte aligned and
- /// thus suitable for use in sets, maps, and other data structures optimized
- /// for pointer-like types using the alignment-provided low bits.
+ /// This ID is a pointer type that is guaranteed to be 8-byte aligned and thus
+ /// suitable for use in sets, maps, and other data structures that use the low
+ /// bits of pointers.
///
/// Note that this requires the derived type provide a static \c AnalysisKey
/// member called \c Key.
///
- /// FIXME: The only reason the derived type needs to provide this rather than
- /// this mixin providing it is due to broken implementations which cannot
- /// correctly unique a templated static so that they have the same addresses
- /// for each instantiation and are definitively emitted once for each
- /// instantiation. The only currently known platform with this limitation are
- /// Windows DLL builds, specifically building each part of LLVM as a DLL. If
- /// we ever remove that build configuration, this mixin can provide the
- /// static key as well.
+ /// FIXME: The only reason the mixin type itself can't declare the Key value
+ /// is that some compilers cannot correctly unique a templated static variable
+ /// so it has the same addresses in each instantiation. The only currently
+ /// known platform with this limitation is Windows DLL builds, specifically
+ /// building each part of LLVM as a DLL. If we ever remove that build
+ /// configuration, this mixin can provide the static key as well.
static AnalysisKey *ID() { return &DerivedT::Key; }
};
-/// A class template to provide analysis sets for IR units.
+/// This templated class represents "all analyses that operate over \<a
+/// particular IR unit\>" (e.g. a Function or a Module) in instances of
+/// PreservedAnalysis.
///
-/// Analyses operate on units of IR. It is useful to be able to talk about
-/// preservation of all analyses for a given unit of IR as a set. This class
-/// template can be used with the \c PreservedAnalyses API for that purpose and
-/// the \c AnalysisManager will automatically check and use this set to skip
-/// invalidation events.
+/// This lets a transformation say e.g. "I preserved all function analyses".
///
/// Note that you must provide an explicit instantiation declaration and
/// definition for this template in order to get the correct behavior on
@@ -371,17 +365,18 @@ template <typename IRUnitT> AnalysisSetKey AllAnalysesOn<IRUnitT>::SetKey;
extern template class AllAnalysesOn<Module>;
extern template class AllAnalysesOn<Function>;
-/// \brief Manages a sequence of passes over units of IR.
+/// \brief Manages a sequence of passes over a particular unit of IR.
///
-/// A pass manager contains a sequence of passes to run over units of IR. It is
-/// itself a valid pass over that unit of IR, and when over some given IR will
-/// run each pass in sequence. This is the primary and most basic building
-/// block of a pass pipeline.
+/// A pass manager contains a sequence of passes to run over a particular unit
+/// of IR (e.g. Functions, Modules). It is itself a valid pass over that unit of
+/// IR, and when run over some given IR will run each of its contained passes in
+/// sequence. Pass managers are the primary and most basic building block of a
+/// pass pipeline.
///
-/// If it is run with an \c AnalysisManager<IRUnitT> argument, it will propagate
-/// that analysis manager to each pass it runs, as well as calling the analysis
-/// manager's invalidation routine with the PreservedAnalyses of each pass it
-/// runs.
+/// When you run a pass manager, you provide an \c AnalysisManager<IRUnitT>
+/// argument. The pass manager will propagate that analysis manager to each
+/// pass it runs, and will call the analysis manager's invalidation routine with
+/// the PreservedAnalyses of each pass it runs.
template <typename IRUnitT,
typename AnalysisManagerT = AnalysisManager<IRUnitT>,
typename... ExtraArgTs>
@@ -390,7 +385,7 @@ class PassManager : public PassInfoMixin<
public:
/// \brief Construct a pass manager.
///
- /// It can be passed a flag to get debug logging as the passes are run.
+ /// If \p DebugLogging is true, we'll log our progress to llvm::dbgs().
explicit PassManager(bool DebugLogging = false) : DebugLogging(DebugLogging) {}
// FIXME: These are equivalent to the default move constructor/move
@@ -400,13 +395,15 @@ public:
PassManager(PassManager &&Arg)
: Passes(std::move(Arg.Passes)),
DebugLogging(std::move(Arg.DebugLogging)) {}
+
PassManager &operator=(PassManager &&RHS) {
Passes = std::move(RHS.Passes);
DebugLogging = std::move(RHS.DebugLogging);
return *this;
}
- /// \brief Run all of the passes in this manager over the IR.
+ /// \brief Run all of the passes in this manager over the given unit of IR.
+ /// ExtraArgs are passed to each pass.
PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM,
ExtraArgTs... ExtraArgs) {
PreservedAnalyses PA = PreservedAnalyses::all();
@@ -425,7 +422,7 @@ public:
// invalidates analyses.
AM.invalidate(IR, PassPA);
- // Finally, we intersect the preserved analyses to compute the aggregate
+ // Finally, intersect the preserved analyses to compute the aggregate
// preserved set for this pass manager.
PA.intersect(std::move(PassPA));
@@ -473,30 +470,29 @@ extern template class PassManager<Function>;
/// \brief Convenience typedef for a pass manager over functions.
typedef PassManager<Function> FunctionPassManager;
-/// \brief A generic analysis pass manager with lazy running and caching of
+/// \brief A container for analyses that lazily runs them and caches their
/// results.
///
-/// This analysis manager can be used for any IR unit where the address of the
-/// IR unit sufficies as its identity. It manages the cache for a unit of IR via
-/// the address of each unit of IR cached.
+/// This class can manage analyses for any IR unit where the address of the IR
+/// unit sufficies as its identity.
template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager {
public:
class Invalidator;
private:
- // Now that we've defined our invalidator, we can build types for the concept
- // types.
+ // Now that we've defined our invalidator, we can define the concept types.
typedef detail::AnalysisResultConcept<IRUnitT, PreservedAnalyses, Invalidator>
ResultConceptT;
typedef detail::AnalysisPassConcept<IRUnitT, PreservedAnalyses, Invalidator,
ExtraArgTs...>
PassConceptT;
- /// \brief List of function analysis pass IDs and associated concept pointers.
+ /// \brief List of analysis pass IDs and associated concept pointers.
///
/// Requires iterators to be valid across appending new entries and arbitrary
- /// erases. Provides the analysis ID to enable finding iterators to a given entry
- /// in maps below, and provides the storage for the actual result concept.
+ /// erases. Provides the analysis ID to enable finding iterators to a given
+ /// entry in maps below, and provides the storage for the actual result
+ /// concept.
typedef std::list<std::pair<AnalysisKey *, std::unique_ptr<ResultConceptT>>>
AnalysisResultListT;
@@ -504,8 +500,8 @@ private:
typedef DenseMap<IRUnitT *, AnalysisResultListT> AnalysisResultListMapT;
/// \brief Map type from a pair of analysis ID and IRUnitT pointer to an
- /// iterator into a particular result list which is where the actual result
- /// is stored.
+ /// iterator into a particular result list (which is where the actual analysis
+ /// result is stored).
typedef DenseMap<std::pair<AnalysisKey *, IRUnitT *>,
typename AnalysisResultListT::iterator>
AnalysisResultMapT;
@@ -515,28 +511,28 @@ public:
///
/// When an analysis result embeds handles to other analysis results, it
/// needs to be invalidated both when its own information isn't preserved and
- /// if any of those embedded analysis results end up invalidated. We pass in
- /// an \c Invalidator object from the analysis manager in order to let the
- /// analysis results themselves define the dependency graph on the fly. This
- /// avoids building an explicit data structure representation of the
+ /// when any of its embedded analysis results end up invalidated. We pass an
+ /// \c Invalidator object as an argument to \c invalidate() in order to let
+ /// the analysis results themselves define the dependency graph on the fly.
+ /// This lets us avoid building building an explicit representation of the
/// dependencies between analysis results.
class Invalidator {
public:
/// Trigger the invalidation of some other analysis pass if not already
- /// handled and return whether it will in fact be invalidated.
+ /// handled and return whether it was in fact invalidated.
///
/// This is expected to be called from within a given analysis result's \c
/// invalidate method to trigger a depth-first walk of all inter-analysis
/// dependencies. The same \p IR unit and \p PA passed to that result's \c
/// invalidate method should in turn be provided to this routine.
///
- /// The first time this is called for a given analysis pass, it will
- /// trigger the corresponding result's \c invalidate method to be called.
- /// Subsequent calls will use a cache of the results of that initial call.
- /// It is an error to form cyclic dependencies between analysis results.
+ /// The first time this is called for a given analysis pass, it will call
+ /// the corresponding result's \c invalidate method. Subsequent calls will
+ /// use a cache of the results of that initial call. It is an error to form
+ /// cyclic dependencies between analysis results.
///
- /// This returns true if the given analysis pass's result is invalid and
- /// any dependecies on it will become invalid as a result.
+ /// This returns true if the given analysis's result is invalid. Any
+ /// dependecies on it will become invalid as a result.
template <typename PassT>
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA) {
typedef detail::AnalysisResultModel<IRUnitT, PassT,
@@ -577,10 +573,10 @@ public:
auto &Result = static_cast<ResultT &>(*RI->second->second);
- // Insert into the map whether the result should be invalidated and
- // return that. Note that we cannot re-use IMapI and must do a fresh
- // insert here as calling the invalidate routine could (recursively)
- // insert things into the map making any iterator or reference invalid.
+ // Insert into the map whether the result should be invalidated and return
+ // that. Note that we cannot reuse IMapI and must do a fresh insert here,
+ // as calling invalidate could (recursively) insert things into the map,
+ // making any iterator or reference invalid.
bool Inserted;
std::tie(IMapI, Inserted) =
IsResultInvalidated.insert({ID, Result.invalidate(IR, PA, *this)});
@@ -600,8 +596,7 @@ public:
/// \brief Construct an empty analysis manager.
///
- /// A flag can be passed to indicate that the manager should perform debug
- /// logging.
+ /// If \p DebugLogging is true, we'll log our progress to llvm::dbgs().
AnalysisManager(bool DebugLogging = false) : DebugLogging(DebugLogging) {}
AnalysisManager(AnalysisManager &&) = default;
AnalysisManager &operator=(AnalysisManager &&) = default;
@@ -614,11 +609,11 @@ public:
return AnalysisResults.empty();
}
- /// \brief Clear any results for a single unit of IR.
+ /// \brief Clear any cached analysis results for a single unit of IR.
///
- /// This doesn't invalidate but directly clears the results. It is useful
- /// when the IR is being removed and we want to clear out all the memory
- /// pinned for it.
+ /// This doesn't invalidate, but instead simply deletes, the relevant results.
+ /// It is useful when the IR is being removed and we want to clear out all the
+ /// memory pinned for it.
void clear(IRUnitT &IR) {
if (DebugLogging)
dbgs() << "Clearing all analysis results for: " << IR.getName() << "\n";
@@ -626,7 +621,7 @@ public:
auto ResultsListI = AnalysisResultLists.find(&IR);
if (ResultsListI == AnalysisResultLists.end())
return;
- // Clear the map pointing into the results list.
+ // Delete the map entries that point into the results list.
for (auto &IDAndResult : ResultsListI->second)
AnalysisResults.erase({IDAndResult.first, &IR});
@@ -634,21 +629,20 @@ public:
AnalysisResultLists.erase(ResultsListI);
}
- /// \brief Clear the analysis result cache.
+ /// \brief Clear all analysis results cached by this AnalysisManager.
///
- /// This routine allows cleaning up when the set of IR units itself has
- /// potentially changed, and thus we can't even look up a a result and
- /// invalidate it directly. Notably, this does *not* call invalidate
- /// functions as there is nothing to be done for them.
+ /// Like \c clear(IRUnitT&), this doesn't invalidate the results; it simply
+ /// deletes them. This lets you clean up the AnalysisManager when the set of
+ /// IR units itself has potentially changed, and thus we can't even look up a
+ /// a result and invalidate/clear it directly.
void clear() {
AnalysisResults.clear();
AnalysisResultLists.clear();
}
- /// \brief Get the result of an analysis pass for this module.
+ /// \brief Get the result of an analysis pass for a given IR unit.
///
- /// If there is not a valid cached result in the manager already, this will
- /// re-run the analysis to produce a valid result.
+ /// Runs the analysis if a cached result is not available.
template <typename PassT>
typename PassT::Result &getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs) {
assert(AnalysisPasses.count(PassT::ID()) &&
@@ -661,7 +655,7 @@ public:
return static_cast<ResultModelT &>(ResultConcept).Result;
}
- /// \brief Get the cached result of an analysis pass for this module.
+ /// \brief Get the cached result of an analysis pass for a given IR unit.
///
/// This method never runs the analysis.
///
@@ -683,22 +677,21 @@ public:
/// \brief Register an analysis pass with the manager.
///
- /// The argument is a callable whose result is a pass. This allows passing in
- /// a lambda to construct the pass.
+ /// The parameter is a callable whose result is an analysis pass. This allows
+ /// passing in a lambda to construct the analysis.
///
- /// The pass type registered is the result type of calling the argument. If
- /// that pass has already been registered, then the argument will not be
- /// called and this function will return false. Otherwise, the pass type
- /// becomes registered, with the instance provided by calling the argument
- /// once, and this function returns true.
+ /// The analysis type to register is the type returned by calling the \c
+ /// PassBuilder argument. If that type has already been registered, then the
+ /// argument will not be called and this function will return false.
+ /// Otherwise, we register the analysis returned by calling \c PassBuilder(),
+ /// and this function returns true.
///
- /// While this returns whether or not the pass type was already registered,
- /// there in't an independent way to query that as that would be prone to
- /// risky use when *querying* the analysis manager. Instead, the only
- /// supported use case is avoiding duplicate registry of an analysis. This
- /// interface also lends itself to minimizing the number of times we have to
- /// do lookups for analyses or construct complex passes only to throw them
- /// away.
+ /// (Note: Although the return value of this function indicates whether or not
+ /// an analysis was previously registered, there intentionally isn't a way to
+ /// query this directly. Instead, you should just register all the analyses
+ /// you might want and let this class run them lazily. This idiom lets us
+ /// minimize the number of times we have to look up analyses in our
+ /// hashtable.)
template <typename PassBuilderT>
bool registerPass(PassBuilderT &&PassBuilder) {
typedef decltype(PassBuilder()) PassT;
@@ -718,17 +711,18 @@ public:
/// \brief Invalidate a specific analysis pass for an IR module.
///
- /// Note that the analysis result can disregard invalidation.
+ /// Note that the analysis result can disregard invalidation, if it determines
+ /// it is in fact still valid.
template <typename PassT> void invalidate(IRUnitT &IR) {
assert(AnalysisPasses.count(PassT::ID()) &&
"This analysis pass was not registered prior to being invalidated");
invalidateImpl(PassT::ID(), IR);
}
- /// \brief Invalidate analyses cached for an IR unit.
+ /// \brief Invalidate cached analyses for an IR unit.
///
/// Walk through all of the analyses pertaining to this unit of IR and
- /// invalidate them unless they are preserved by the PreservedAnalyses set.
+ /// invalidate them, unless they are preserved by the PreservedAnalyses set.
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA) {
// We're done if all analyses on this IR unit are preserved.
if (PA.allAnalysesInSetPreserved<AllAnalysesOn<IRUnitT>>())
@@ -738,8 +732,8 @@ public:
dbgs() << "Invalidating all non-preserved analyses for: " << IR.getName()
<< "\n";
- // Track whether each pass's result is invalidated. Memoize the results
- // using the IsResultInvalidated map.
+ // Track whether each analysis's result is invalidated in
+ // IsResultInvalidated.
SmallDenseMap<AnalysisKey *, bool, 8> IsResultInvalidated;
Invalidator Inv(IsResultInvalidated, AnalysisResults);
AnalysisResultListT &ResultsList = AnalysisResultLists[&IR];
@@ -758,9 +752,9 @@ public:
// Try to invalidate the result, giving it the Invalidator so it can
// recursively query for any dependencies it has and record the result.
- // Note that we cannot re-use 'IMapI' here or pre-insert the ID as the
- // invalidate method may insert things into the map as well, invalidating
- // any iterator or pointer.
+ // Note that we cannot reuse 'IMapI' here or pre-insert the ID, as
+ // Result.invalidate may insert things into the map, invalidating our
+ // iterator.
bool Inserted =
IsResultInvalidated.insert({ID, Result.invalidate(IR, PA, Inv)})
.second;
@@ -873,7 +867,7 @@ private:
/// analysis result.
AnalysisResultMapT AnalysisResults;
- /// \brief A flag indicating whether debug logging is enabled.
+ /// \brief Indicates whether we log to \c llvm::dbgs().
bool DebugLogging;
};
diff --git a/include/llvm/LTO/LTO.h b/include/llvm/LTO/LTO.h
index bc435702157e..78ac73a7418c 100644
--- a/include/llvm/LTO/LTO.h
+++ b/include/llvm/LTO/LTO.h
@@ -382,6 +382,10 @@ private:
/// The unmangled name of the global.
std::string IRName;
+ /// Keep track if the symbol is visible outside of ThinLTO (i.e. in
+ /// either a regular object or the regular LTO partition).
+ bool VisibleOutsideThinLTO = false;
+
bool UnnamedAddr = true;
/// This field keeps track of the partition number of this global. The
@@ -405,6 +409,9 @@ private:
/// This global is either used by more than one partition or has an
/// external reference, and therefore cannot be internalized.
External = -2u,
+
+ /// The RegularLTO partition
+ RegularLTO = 0,
};
};
diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h
index a300c4f6fb00..25642379ac9f 100644
--- a/include/llvm/MC/MCTargetOptions.h
+++ b/include/llvm/MC/MCTargetOptions.h
@@ -11,6 +11,7 @@
#define LLVM_MC_MCTARGETOPTIONS_H
#include <string>
+#include <vector>
namespace llvm {
@@ -51,11 +52,17 @@ public:
bool PreserveAsmComments : 1;
int DwarfVersion;
+
/// getABIName - If this returns a non-empty string this represents the
/// textual name of the ABI that we want the backend to use, e.g. o32, or
/// aapcs-linux.
StringRef getABIName() const;
std::string ABIName;
+
+ /// Additional paths to search for `.include` directives when using the
+ /// integrated assembler.
+ std::vector<std::string> IASSearchPaths;
+
MCTargetOptions();
};
@@ -75,7 +82,8 @@ inline bool operator==(const MCTargetOptions &LHS, const MCTargetOptions &RHS) {
ARE_EQUAL(ShowMCInst) &&
ARE_EQUAL(AsmVerbose) &&
ARE_EQUAL(DwarfVersion) &&
- ARE_EQUAL(ABIName));
+ ARE_EQUAL(ABIName) &&
+ ARE_EQUAL(IASSearchPaths));
#undef ARE_EQUAL
}
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index 586999794d50..ad21d8af66e9 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -27,7 +27,6 @@
#ifndef LLVM_SUPPORT_FILESYSTEM_H
#define LLVM_SUPPORT_FILESYSTEM_H
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
@@ -37,6 +36,7 @@
#include <cassert>
#include <cstdint>
#include <ctime>
+#include <memory>
#include <stack>
#include <string>
#include <system_error>
@@ -829,28 +829,23 @@ public:
};
namespace detail {
- /// RecDirIterState - Keeps state for the recursive_directory_iterator. It is
- /// reference counted in order to preserve InputIterator semantics on copy.
- struct RecDirIterState : public RefCountedBase<RecDirIterState> {
- RecDirIterState()
- : Level(0)
- , HasNoPushRequest(false) {}
-
+ /// Keeps state for the recursive_directory_iterator.
+ struct RecDirIterState {
std::stack<directory_iterator, std::vector<directory_iterator>> Stack;
- uint16_t Level;
- bool HasNoPushRequest;
+ uint16_t Level = 0;
+ bool HasNoPushRequest = false;
};
} // end namespace detail
/// recursive_directory_iterator - Same as directory_iterator except for it
/// recurses down into child directories.
class recursive_directory_iterator {
- IntrusiveRefCntPtr<detail::RecDirIterState> State;
+ std::shared_ptr<detail::RecDirIterState> State;
public:
recursive_directory_iterator() = default;
explicit recursive_directory_iterator(const Twine &path, std::error_code &ec)
- : State(new detail::RecDirIterState) {
+ : State(std::make_shared<detail::RecDirIterState>()) {
State->Stack.push(directory_iterator(path, ec));
if (State->Stack.top() == directory_iterator())
State.reset();
diff --git a/include/llvm/Support/TarWriter.h b/include/llvm/Support/TarWriter.h
new file mode 100644
index 000000000000..44bdcaf2c465
--- /dev/null
+++ b/include/llvm/Support/TarWriter.h
@@ -0,0 +1,32 @@
+//===-- llvm/Support/TarWriter.h - Tar archive file creator -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TAR_WRITER_H
+#define LLVM_SUPPORT_TAR_WRITER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+class TarWriter {
+public:
+ static Expected<std::unique_ptr<TarWriter>> create(StringRef OutputPath,
+ StringRef BaseDir);
+
+ void append(StringRef Path, StringRef Data);
+
+private:
+ TarWriter(int FD, StringRef BaseDir);
+ raw_fd_ostream OS;
+ std::string BaseDir;
+};
+}
+
+#endif
diff --git a/include/llvm/Transforms/IPO/FunctionImport.h b/include/llvm/Transforms/IPO/FunctionImport.h
index d7acbe883c5d..eaea092c9179 100644
--- a/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/include/llvm/Transforms/IPO/FunctionImport.h
@@ -86,11 +86,15 @@ public:
/// \p ExportLists contains for each Module the set of globals (GUID) that will
/// be imported by another module, or referenced by such a function. I.e. this
/// is the set of globals that need to be promoted/renamed appropriately.
+///
+/// \p DeadSymbols (optional) contains a list of GUID that are deemed "dead" and
+/// will be ignored for the purpose of importing.
void ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
StringMap<FunctionImporter::ImportMapTy> &ImportLists,
- StringMap<FunctionImporter::ExportSetTy> &ExportLists);
+ StringMap<FunctionImporter::ExportSetTy> &ExportLists,
+ const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr);
/// Compute all the imports for the given module using the Index.
///
@@ -100,6 +104,13 @@ void ComputeCrossModuleImportForModule(
StringRef ModulePath, const ModuleSummaryIndex &Index,
FunctionImporter::ImportMapTy &ImportList);
+/// Compute all the symbols that are "dead": i.e these that can't be reached
+/// in the graph from any of the given symbols listed in
+/// \p GUIDPreservedSymbols.
+DenseSet<GlobalValue::GUID>
+computeDeadSymbols(const ModuleSummaryIndex &Index,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
+
/// Compute the set of summaries needed for a ThinLTO backend compilation of
/// \p ModulePath.
//
diff --git a/include/llvm/Transforms/IPO/LowerTypeTests.h b/include/llvm/Transforms/IPO/LowerTypeTests.h
index 23c59c199a3b..ca6e1b878dff 100644
--- a/include/llvm/Transforms/IPO/LowerTypeTests.h
+++ b/include/llvm/Transforms/IPO/LowerTypeTests.h
@@ -60,10 +60,6 @@ struct BitSetInfo {
bool containsGlobalOffset(uint64_t Offset) const;
- bool containsValue(const DataLayout &DL,
- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout,
- Value *V, uint64_t COffset = 0) const;
-
void print(raw_ostream &OS) const;
};
diff --git a/include/llvm/Transforms/Utils/FunctionImportUtils.h b/include/llvm/Transforms/Utils/FunctionImportUtils.h
index 57b7d0fcd7cc..f18cd92310b4 100644
--- a/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -40,9 +40,20 @@ class FunctionImportGlobalProcessing {
/// as part of a different backend compilation process.
bool HasExportedFunctions = false;
+ /// Set of llvm.*used values, in order to validate that we don't try
+ /// to promote any non-renamable values.
+ SmallPtrSet<GlobalValue *, 8> Used;
+
/// Check if we should promote the given local value to global scope.
bool shouldPromoteLocalToGlobal(const GlobalValue *SGV);
+#ifndef NDEBUG
+ /// Check if the given value is a local that can't be renamed (promoted).
+ /// Only used in assertion checking, and disabled under NDEBUG since the Used
+ /// set will not be populated.
+ bool isNonRenamableLocal(const GlobalValue &GV) const;
+#endif
+
/// Helper methods to check if we are importing from or potentially
/// exporting from the current source module.
bool isPerformingImport() const { return GlobalsToImport != nullptr; }
@@ -82,6 +93,13 @@ public:
// may be exported to another backend compilation.
if (!GlobalsToImport)
HasExportedFunctions = ImportIndex.hasExportedFunctions(M);
+
+#ifndef NDEBUG
+ // First collect those in the llvm.used set.
+ collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
+ // Next collect those in the llvm.compiler.used set.
+ collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ true);
+#endif
}
bool run();
diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap
index a86bc7e7fcbf..29e6d66b27ff 100644
--- a/include/llvm/module.modulemap
+++ b/include/llvm/module.modulemap
@@ -151,6 +151,7 @@ module LLVM_intrinsic_gen {
module IR_NoFolder { header "IR/NoFolder.h" export * }
module IR_Module { header "IR/Module.h" export * }
module IR_ModuleSummaryIndex { header "IR/ModuleSummaryIndex.h" export * }
+ module IR_ModuleSummaryIndexYAML { header "IR/ModuleSummaryIndexYAML.h" export * }
module IR_Function { header "IR/Function.h" export * }
module IR_InstrTypes { header "IR/InstrTypes.h" export * }
module IR_Instructions { header "IR/Instructions.h" export * }
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 1d2ffc1abe1f..6387bb36166e 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -80,10 +80,15 @@ static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount,
return CalleeInfo::HotnessType::None;
}
-static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
- const Function &F, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI,
- bool HasLocalsInUsed) {
+static bool isNonRenamableLocal(const GlobalValue &GV) {
+ return GV.hasSection() && GV.hasLocalLinkage();
+}
+
+static void
+computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
+ const Function &F, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, bool HasLocalsInUsed,
+ DenseSet<GlobalValue::GUID> &CantBePromoted) {
// Summary not currently supported for anonymous functions, they should
// have been named.
assert(F.hasName());
@@ -178,37 +183,64 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
}
}
- GlobalValueSummary::GVFlags Flags(F);
+ bool NonRenamableLocal = isNonRenamableLocal(F);
+ bool NotEligibleForImport =
+ NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
+ // Inliner doesn't handle variadic functions.
+ // FIXME: refactor this to use the same code that inliner is using.
+ F.isVarArg();
+ GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
+ /* LiveRoot = */ false);
auto FuncSummary = llvm::make_unique<FunctionSummary>(
Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(),
TypeTests.takeVector());
- if (HasInlineAsmMaybeReferencingInternal)
- FuncSummary->setHasInlineAsmMaybeReferencingInternal();
+ if (NonRenamableLocal)
+ CantBePromoted.insert(F.getGUID());
Index.addGlobalValueSummary(F.getName(), std::move(FuncSummary));
}
-static void computeVariableSummary(ModuleSummaryIndex &Index,
- const GlobalVariable &V) {
+static void
+computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
+ DenseSet<GlobalValue::GUID> &CantBePromoted) {
SetVector<ValueInfo> RefEdges;
SmallPtrSet<const User *, 8> Visited;
findRefEdges(&V, RefEdges, Visited);
- GlobalValueSummary::GVFlags Flags(V);
+ bool NonRenamableLocal = isNonRenamableLocal(V);
+ GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
+ /* LiveRoot = */ false);
auto GVarSummary =
llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
+ if (NonRenamableLocal)
+ CantBePromoted.insert(V.getGUID());
Index.addGlobalValueSummary(V.getName(), std::move(GVarSummary));
}
-static void computeAliasSummary(ModuleSummaryIndex &Index,
- const GlobalAlias &A) {
- GlobalValueSummary::GVFlags Flags(A);
+static void
+computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
+ DenseSet<GlobalValue::GUID> &CantBePromoted) {
+ bool NonRenamableLocal = isNonRenamableLocal(A);
+ GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
+ /* LiveRoot = */ false);
auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{});
auto *Aliasee = A.getBaseObject();
auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
assert(AliaseeSummary && "Alias expects aliasee summary to be parsed");
AS->setAliasee(AliaseeSummary);
+ if (NonRenamableLocal)
+ CantBePromoted.insert(A.getGUID());
Index.addGlobalValueSummary(A.getName(), std::move(AS));
}
+// Set LiveRoot flag on entries matching the given value name.
+static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) {
+ auto SummaryList =
+ Index.findGlobalValueSummaryList(GlobalValue::getGUID(Name));
+ if (SummaryList == Index.end())
+ return;
+ for (auto &Summary : SummaryList->second)
+ Summary->setLiveRoot();
+}
+
ModuleSummaryIndex llvm::buildModuleSummaryIndex(
const Module &M,
std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
@@ -226,9 +258,12 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
// Next collect those in the llvm.compiler.used set.
collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ true);
+ DenseSet<GlobalValue::GUID> CantBePromoted;
for (auto *V : Used) {
- if (V->hasLocalLinkage())
+ if (V->hasLocalLinkage()) {
LocalsUsed.insert(V);
+ CantBePromoted.insert(V->getGUID());
+ }
}
// Compute summaries for all functions defined in module, and save in the
@@ -248,7 +283,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
BFI = BFIPtr.get();
}
- computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty());
+ computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty(),
+ CantBePromoted);
}
// Compute summaries for all variables defined in module, and save in the
@@ -256,20 +292,29 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
for (const GlobalVariable &G : M.globals()) {
if (G.isDeclaration())
continue;
- computeVariableSummary(Index, G);
+ computeVariableSummary(Index, G, CantBePromoted);
}
// Compute summaries for all aliases defined in module, and save in the
// index.
for (const GlobalAlias &A : M.aliases())
- computeAliasSummary(Index, A);
+ computeAliasSummary(Index, A, CantBePromoted);
for (auto *V : LocalsUsed) {
auto *Summary = Index.getGlobalValueSummary(*V);
assert(Summary && "Missing summary for global value");
- Summary->setNoRename();
+ Summary->setNotEligibleToImport();
}
+ // The linker doesn't know about these LLVM produced values, so we need
+ // to flag them as live in the index to ensure index-based dead value
+ // analysis treats them as live roots of the analysis.
+ setLiveRoot(Index, "llvm.used");
+ setLiveRoot(Index, "llvm.compiler.used");
+ setLiveRoot(Index, "llvm.global_ctors");
+ setLiveRoot(Index, "llvm.global_dtors");
+ setLiveRoot(Index, "llvm.global.annotations");
+
if (!M.getModuleInlineAsm().empty()) {
// Collect the local values defined by module level asm, and set up
// summaries for these symbols so that they can be marked as NoRename,
@@ -282,7 +327,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
// referenced from there.
ModuleSymbolTable::CollectAsmSymbols(
Triple(M.getTargetTriple()), M.getModuleInlineAsm(),
- [&M, &Index](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+ [&M, &Index, &CantBePromoted](StringRef Name,
+ object::BasicSymbolRef::Flags Flags) {
// Symbols not marked as Weak or Global are local definitions.
if (Flags & (object::BasicSymbolRef::SF_Weak |
object::BasicSymbolRef::SF_Global))
@@ -291,11 +337,10 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
if (!GV)
return;
assert(GV->isDeclaration() && "Def in module asm already has definition");
- GlobalValueSummary::GVFlags GVFlags(
- GlobalValue::InternalLinkage,
- /* NoRename */ true,
- /* HasInlineAsmMaybeReferencingInternal */ false,
- /* IsNotViableToInline */ true);
+ GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
+ /* NotEligibleToImport */ true,
+ /* LiveRoot */ true);
+ CantBePromoted.insert(GlobalValue::getGUID(Name));
// Create the appropriate summary type.
if (isa<Function>(GV)) {
std::unique_ptr<FunctionSummary> Summary =
@@ -303,18 +348,41 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
GVFlags, 0, ArrayRef<ValueInfo>{},
ArrayRef<FunctionSummary::EdgeTy>{},
ArrayRef<GlobalValue::GUID>{});
- Summary->setNoRename();
Index.addGlobalValueSummary(Name, std::move(Summary));
} else {
std::unique_ptr<GlobalVarSummary> Summary =
llvm::make_unique<GlobalVarSummary>(GVFlags,
ArrayRef<ValueInfo>{});
- Summary->setNoRename();
Index.addGlobalValueSummary(Name, std::move(Summary));
}
});
}
+ for (auto &GlobalList : Index) {
+ assert(GlobalList.second.size() == 1 &&
+ "Expected module's index to have one summary per GUID");
+ auto &Summary = GlobalList.second[0];
+ bool AllRefsCanBeExternallyReferenced =
+ llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) {
+ return !CantBePromoted.count(VI.getValue()->getGUID());
+ });
+ if (!AllRefsCanBeExternallyReferenced) {
+ Summary->setNotEligibleToImport();
+ continue;
+ }
+
+ if (auto *FuncSummary = dyn_cast<FunctionSummary>(Summary.get())) {
+ bool AllCallsCanBeExternallyReferenced = llvm::all_of(
+ FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) {
+ auto GUID = Edge.first.isGUID() ? Edge.first.getGUID()
+ : Edge.first.getValue()->getGUID();
+ return !CantBePromoted.count(GUID);
+ });
+ if (!AllCallsCanBeExternallyReferenced)
+ Summary->setNotEligibleToImport();
+ }
+ }
+
return Index;
}
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 2a15b9b264e3..cd8c24630df1 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -389,8 +389,9 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
}
int TargetTransformInfo::getAddressComputationCost(Type *Tp,
- bool IsComplex) const {
- int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex);
+ ScalarEvolution *SE,
+ const SCEV *Ptr) const {
+ int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 03aefcf57118..d9e249aad21d 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -801,12 +801,12 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
// to getDecodedLinkage() will need to be taken into account here as above.
auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits
RawFlags = RawFlags >> 4;
- bool NoRename = RawFlags & 0x1;
- bool IsNotViableToInline = RawFlags & 0x2;
- bool HasInlineAsmMaybeReferencingInternal = RawFlags & 0x4;
- return GlobalValueSummary::GVFlags(Linkage, NoRename,
- HasInlineAsmMaybeReferencingInternal,
- IsNotViableToInline);
+ bool NotEligibleToImport = (RawFlags & 0x1) || Version < 3;
+ // The LiveRoot flag wasn't introduced until version 3. For dead stripping
+ // to work correctly on earlier versions, we must conservatively treat all
+ // values as live.
+ bool LiveRoot = (RawFlags & 0x2) || Version < 3;
+ return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, LiveRoot);
}
static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
@@ -4838,9 +4838,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
}
const uint64_t Version = Record[0];
const bool IsOldProfileFormat = Version == 1;
- if (!IsOldProfileFormat && Version != 2)
+ if (Version < 1 || Version > 3)
return error("Invalid summary version " + Twine(Version) +
- ", 1 or 2 expected");
+ ", 1, 2 or 3 expected");
Record.clear();
// Keep around the last seen summary to be used when we see an optional
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
index 43c9aebd79ef..771cf3d927bc 100644
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -93,20 +93,29 @@ static void skipAbbreviatedField(BitstreamCursor &Cursor,
}
/// skipRecord - Read the current record and discard it.
-void BitstreamCursor::skipRecord(unsigned AbbrevID) {
+unsigned BitstreamCursor::skipRecord(unsigned AbbrevID) {
// Skip unabbreviated records by reading past their entries.
if (AbbrevID == bitc::UNABBREV_RECORD) {
unsigned Code = ReadVBR(6);
- (void)Code;
unsigned NumElts = ReadVBR(6);
for (unsigned i = 0; i != NumElts; ++i)
(void)ReadVBR64(6);
- return;
+ return Code;
}
const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+ const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
+ unsigned Code;
+ if (CodeOp.isLiteral())
+ Code = CodeOp.getLiteralValue();
+ else {
+ if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
+ CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
+ report_fatal_error("Abbreviation starts with an Array or a Blob");
+ Code = readAbbreviatedField(*this, CodeOp);
+ }
- for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+ for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
if (Op.isLiteral())
continue;
@@ -164,6 +173,7 @@ void BitstreamCursor::skipRecord(unsigned AbbrevID) {
// Skip over the blob.
JumpToBit(NewEnd);
}
+ return Code;
}
unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
@@ -273,7 +283,7 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
}
void BitstreamCursor::ReadAbbrevRecord() {
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
unsigned NumOpInfo = ReadVBR(5);
for (unsigned i = 0; i != NumOpInfo; ++i) {
bool IsLiteral = Read(1);
@@ -307,7 +317,7 @@ void BitstreamCursor::ReadAbbrevRecord() {
if (Abbv->getNumOperandInfos() == 0)
report_fatal_error("Abbrev record with no operands");
- CurAbbrevs.push_back(Abbv);
+ CurAbbrevs.push_back(std::move(Abbv));
}
Optional<BitstreamBlockInfo>
diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp
index 5da421a79b7b..460d39cc28d8 100644
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -14,10 +14,12 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
@@ -86,12 +88,23 @@
using namespace llvm;
+#define DEBUG_TYPE "bitcode-reader"
+
+STATISTIC(NumMDStringLoaded, "Number of MDStrings loaded");
+STATISTIC(NumMDNodeTemporary, "Number of MDNode::Temporary created");
+STATISTIC(NumMDRecordLoaded, "Number of Metadata records loaded");
+
/// Flag whether we need to import full type definitions for ThinLTO.
/// Currently needed for Darwin and LLDB.
static cl::opt<bool> ImportFullTypeDefinitions(
"import-full-type-definitions", cl::init(false), cl::Hidden,
cl::desc("Import full type definitions for ThinLTO."));
+static cl::opt<bool> DisableLazyLoading(
+ "disable-ondemand-mds-loading", cl::init(false), cl::Hidden,
+ cl::desc("Force disable the lazy-loading on-demand of metadata when "
+ "loading bitcode for importing."));
+
namespace {
static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
@@ -165,6 +178,10 @@ public:
void assignValue(Metadata *MD, unsigned Idx);
void tryToResolveCycles();
bool hasFwdRefs() const { return !ForwardReference.empty(); }
+ int getNextFwdRef() {
+ assert(hasFwdRefs());
+ return *ForwardReference.begin();
+ }
/// Upgrade a type that had an MDString reference.
void addTypeRef(MDString &UUID, DICompositeType &CT);
@@ -215,6 +232,7 @@ Metadata *BitcodeReaderMetadataList::getMetadataFwdRef(unsigned Idx) {
ForwardReference.insert(Idx);
// Create and return a placeholder, which will later be RAUW'd.
+ ++NumMDNodeTemporary;
Metadata *MD = MDNode::getTemporary(Context, None).release();
MetadataPtrs[Idx].reset(MD);
return MD;
@@ -340,8 +358,26 @@ class PlaceholderQueue {
std::deque<DistinctMDOperandPlaceholder> PHs;
public:
+ bool empty() { return PHs.empty(); }
DistinctMDOperandPlaceholder &getPlaceholderOp(unsigned ID);
void flush(BitcodeReaderMetadataList &MetadataList);
+
+ /// Return the list of temporaries nodes in the queue, these need to be
+ /// loaded before we can flush the queue.
+ void getTemporaries(BitcodeReaderMetadataList &MetadataList,
+ DenseSet<unsigned> &Temporaries) {
+ for (auto &PH : PHs) {
+ auto ID = PH.getID();
+ auto *MD = MetadataList.lookup(ID);
+ if (!MD) {
+ Temporaries.insert(ID);
+ continue;
+ }
+ auto *N = dyn_cast_or_null<MDNode>(MD);
+ if (N && N->isTemporary())
+ Temporaries.insert(ID);
+ }
+ }
};
} // end anonymous namespace
@@ -375,6 +411,30 @@ class MetadataLoader::MetadataLoaderImpl {
Module &TheModule;
std::function<Type *(unsigned)> getTypeByID;
+ /// Cursor associated with the lazy-loading of Metadata. This is the easy way
+ /// to keep around the right "context" (Abbrev list) to be able to jump in
+ /// the middle of the metadata block and load any record.
+ BitstreamCursor IndexCursor;
+
+ /// Index that keeps track of MDString values.
+ std::vector<StringRef> MDStringRef;
+
+ /// On-demand loading of a single MDString. Requires the index above to be
+ /// populated.
+ MDString *lazyLoadOneMDString(unsigned Idx);
+
+ /// Index that keeps track of where to find a metadata record in the stream.
+ std::vector<uint64_t> GlobalMetadataBitPosIndex;
+
+ /// Populate the index above to enable lazily loading of metadata, and load
+ /// the named metadata as well as the transitively referenced global
+ /// Metadata.
+ Expected<bool> lazyLoadModuleMetadataBlock(PlaceholderQueue &Placeholders);
+
+ /// On-demand loading of a single metadata. Requires the index above to be
+ /// populated.
+ void lazyLoadOneMetadata(unsigned Idx, PlaceholderQueue &Placeholders);
+
// Keep mapping of seens pair of old-style CU <-> SP, and update pointers to
// point from SP to CU after a block is completly parsed.
std::vector<std::pair<DICompileUnit *, Metadata *>> CUSubprograms;
@@ -394,13 +454,25 @@ class MetadataLoader::MetadataLoaderImpl {
Error parseOneMetadata(SmallVectorImpl<uint64_t> &Record, unsigned Code,
PlaceholderQueue &Placeholders, StringRef Blob,
- bool ModuleLevel, unsigned &NextMetadataNo);
+ unsigned &NextMetadataNo);
Error parseMetadataStrings(ArrayRef<uint64_t> Record, StringRef Blob,
- unsigned &NextMetadataNo);
+ std::function<void(StringRef)> CallBack);
Error parseGlobalObjectAttachment(GlobalObject &GO,
ArrayRef<uint64_t> Record);
Error parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record);
+ void resolveForwardRefsAndPlaceholders(PlaceholderQueue &Placeholders);
+
+ /// Upgrade old-style CU <-> SP pointers to point from SP to CU.
+ void upgradeCUSubprograms() {
+ for (auto CU_SP : CUSubprograms)
+ if (auto *SPs = dyn_cast_or_null<MDTuple>(CU_SP.second))
+ for (auto &Op : SPs->operands())
+ if (auto *SP = dyn_cast_or_null<MDNode>(Op))
+ SP->replaceOperandWith(7, CU_SP.first);
+ CUSubprograms.clear();
+ }
+
public:
MetadataLoaderImpl(BitstreamCursor &Stream, Module &TheModule,
BitcodeReaderValueList &ValueList,
@@ -444,20 +516,217 @@ Error error(const Twine &Message) {
Message, make_error_code(BitcodeError::CorruptedBitcode));
}
+Expected<bool> MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock(
+ PlaceholderQueue &Placeholders) {
+ IndexCursor = Stream;
+ SmallVector<uint64_t, 64> Record;
+ // Get the abbrevs, and preload record positions to make them lazy-loadable.
+ while (true) {
+ BitstreamEntry Entry = IndexCursor.advanceSkippingSubblocks(
+ BitstreamCursor::AF_DontPopBlockAtEnd);
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock: {
+ return true;
+ }
+ case BitstreamEntry::Record: {
+ // The interesting case.
+ ++NumMDRecordLoaded;
+ uint64_t CurrentPos = IndexCursor.GetCurrentBitNo();
+ auto Code = IndexCursor.skipRecord(Entry.ID);
+ switch (Code) {
+ case bitc::METADATA_STRINGS: {
+ // Rewind and parse the strings.
+ IndexCursor.JumpToBit(CurrentPos);
+ StringRef Blob;
+ Record.clear();
+ IndexCursor.readRecord(Entry.ID, Record, &Blob);
+ unsigned NumStrings = Record[0];
+ MDStringRef.reserve(NumStrings);
+ auto IndexNextMDString = [&](StringRef Str) {
+ MDStringRef.push_back(Str);
+ };
+ if (auto Err = parseMetadataStrings(Record, Blob, IndexNextMDString))
+ return std::move(Err);
+ break;
+ }
+ case bitc::METADATA_INDEX_OFFSET: {
+ // This is the offset to the index, when we see this we skip all the
+ // records and load only an index to these.
+ IndexCursor.JumpToBit(CurrentPos);
+ Record.clear();
+ IndexCursor.readRecord(Entry.ID, Record);
+ if (Record.size() != 2)
+ return error("Invalid record");
+ auto Offset = Record[0] + (Record[1] << 32);
+ auto BeginPos = IndexCursor.GetCurrentBitNo();
+ IndexCursor.JumpToBit(BeginPos + Offset);
+ Entry = IndexCursor.advanceSkippingSubblocks(
+ BitstreamCursor::AF_DontPopBlockAtEnd);
+ assert(Entry.Kind == BitstreamEntry::Record &&
+ "Corrupted bitcode: Expected `Record` when trying to find the "
+ "Metadata index");
+ Record.clear();
+ auto Code = IndexCursor.readRecord(Entry.ID, Record);
+ (void)Code;
+ assert(Code == bitc::METADATA_INDEX && "Corrupted bitcode: Expected "
+ "`METADATA_INDEX` when trying "
+ "to find the Metadata index");
+
+ // Delta unpack
+ auto CurrentValue = BeginPos;
+ GlobalMetadataBitPosIndex.reserve(Record.size());
+ for (auto &Elt : Record) {
+ CurrentValue += Elt;
+ GlobalMetadataBitPosIndex.push_back(CurrentValue);
+ }
+ break;
+ }
+ case bitc::METADATA_INDEX:
+ // We don't expect to get there, the Index is loaded when we encounter
+ // the offset.
+ return error("Corrupted Metadata block");
+ case bitc::METADATA_NAME: {
+ // Named metadata need to be materialized now and aren't deferred.
+ IndexCursor.JumpToBit(CurrentPos);
+ Record.clear();
+ unsigned Code = IndexCursor.readRecord(Entry.ID, Record);
+ assert(Code == bitc::METADATA_NAME);
+
+ // Read name of the named metadata.
+ SmallString<8> Name(Record.begin(), Record.end());
+ Code = IndexCursor.ReadCode();
+
+ // Named Metadata comes in two parts, we expect the name to be followed
+ // by the node
+ Record.clear();
+ unsigned NextBitCode = IndexCursor.readRecord(Code, Record);
+ assert(NextBitCode == bitc::METADATA_NAMED_NODE);
+ (void)NextBitCode;
+
+ // Read named metadata elements.
+ unsigned Size = Record.size();
+ NamedMDNode *NMD = TheModule.getOrInsertNamedMetadata(Name);
+ for (unsigned i = 0; i != Size; ++i) {
+ // FIXME: We could use a placeholder here, however NamedMDNode are
+ // taking MDNode as operand and not using the Metadata infrastructure.
+ // It is acknowledged by 'TODO: Inherit from Metadata' in the
+ // NamedMDNode class definition.
+ MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[i]);
+ assert(MD && "Invalid record");
+ NMD->addOperand(MD);
+ }
+ break;
+ }
+ case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
+ // FIXME: we need to do this early because we don't materialize global
+ // value explicitly.
+ IndexCursor.JumpToBit(CurrentPos);
+ Record.clear();
+ IndexCursor.readRecord(Entry.ID, Record);
+ if (Record.size() % 2 == 0)
+ return error("Invalid record");
+ unsigned ValueID = Record[0];
+ if (ValueID >= ValueList.size())
+ return error("Invalid record");
+ if (auto *GO = dyn_cast<GlobalObject>(ValueList[ValueID]))
+ if (Error Err = parseGlobalObjectAttachment(
+ *GO, ArrayRef<uint64_t>(Record).slice(1)))
+ return std::move(Err);
+ break;
+ }
+ case bitc::METADATA_KIND:
+ case bitc::METADATA_STRING_OLD:
+ case bitc::METADATA_OLD_FN_NODE:
+ case bitc::METADATA_OLD_NODE:
+ case bitc::METADATA_VALUE:
+ case bitc::METADATA_DISTINCT_NODE:
+ case bitc::METADATA_NODE:
+ case bitc::METADATA_LOCATION:
+ case bitc::METADATA_GENERIC_DEBUG:
+ case bitc::METADATA_SUBRANGE:
+ case bitc::METADATA_ENUMERATOR:
+ case bitc::METADATA_BASIC_TYPE:
+ case bitc::METADATA_DERIVED_TYPE:
+ case bitc::METADATA_COMPOSITE_TYPE:
+ case bitc::METADATA_SUBROUTINE_TYPE:
+ case bitc::METADATA_MODULE:
+ case bitc::METADATA_FILE:
+ case bitc::METADATA_COMPILE_UNIT:
+ case bitc::METADATA_SUBPROGRAM:
+ case bitc::METADATA_LEXICAL_BLOCK:
+ case bitc::METADATA_LEXICAL_BLOCK_FILE:
+ case bitc::METADATA_NAMESPACE:
+ case bitc::METADATA_MACRO:
+ case bitc::METADATA_MACRO_FILE:
+ case bitc::METADATA_TEMPLATE_TYPE:
+ case bitc::METADATA_TEMPLATE_VALUE:
+ case bitc::METADATA_GLOBAL_VAR:
+ case bitc::METADATA_LOCAL_VAR:
+ case bitc::METADATA_EXPRESSION:
+ case bitc::METADATA_OBJC_PROPERTY:
+ case bitc::METADATA_IMPORTED_ENTITY:
+ case bitc::METADATA_GLOBAL_VAR_EXPR:
+ // We don't expect to see any of these, if we see one, give up on
+ // lazy-loading and fallback.
+ MDStringRef.clear();
+ GlobalMetadataBitPosIndex.clear();
+ return false;
+ }
+ break;
+ }
+ }
+ }
+}
+
/// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
/// module level metadata.
Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
if (!ModuleLevel && MetadataList.hasFwdRefs())
return error("Invalid metadata: fwd refs into function blocks");
+ // Record the entry position so that we can jump back here and efficiently
+ // skip the whole block in case we lazy-load.
+ auto EntryPos = Stream.GetCurrentBitNo();
+
if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
return error("Invalid record");
- unsigned NextMetadataNo = MetadataList.size();
SmallVector<uint64_t, 64> Record;
-
PlaceholderQueue Placeholders;
+ // We lazy-load module-level metadata: we build an index for each record, and
+ // then load individual record as needed, starting with the named metadata.
+ if (ModuleLevel && IsImporting && MetadataList.empty() &&
+ !DisableLazyLoading) {
+ auto SuccessOrErr = lazyLoadModuleMetadataBlock(Placeholders);
+ if (!SuccessOrErr)
+ return SuccessOrErr.takeError();
+ if (SuccessOrErr.get()) {
+ // An index was successfully created and we will be able to load metadata
+ // on-demand.
+ MetadataList.resize(MDStringRef.size() +
+ GlobalMetadataBitPosIndex.size());
+
+ // Reading the named metadata created forward references and/or
+ // placeholders, that we flush here.
+ resolveForwardRefsAndPlaceholders(Placeholders);
+ upgradeCUSubprograms();
+ // Return at the beginning of the block, since it is easy to skip it
+ // entirely from there.
+ Stream.ReadBlockEnd(); // Pop the abbrev block context.
+ Stream.JumpToBit(EntryPos);
+ if (Stream.SkipBlock())
+ return error("Invalid record");
+ return Error::success();
+ }
+ // Couldn't load an index, fallback to loading all the block "old-style".
+ }
+
+ unsigned NextMetadataNo = MetadataList.size();
+
// Read all the records.
while (true) {
BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@@ -467,16 +736,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
- // Upgrade old-style CU <-> SP pointers to point from SP to CU.
- for (auto CU_SP : CUSubprograms)
- if (auto *SPs = dyn_cast_or_null<MDTuple>(CU_SP.second))
- for (auto &Op : SPs->operands())
- if (auto *SP = dyn_cast_or_null<MDNode>(Op))
- SP->replaceOperandWith(7, CU_SP.first);
- CUSubprograms.clear();
-
- MetadataList.tryToResolveCycles();
- Placeholders.flush(MetadataList);
+ resolveForwardRefsAndPlaceholders(Placeholders);
+ upgradeCUSubprograms();
return Error::success();
case BitstreamEntry::Record:
// The interesting case.
@@ -486,20 +747,86 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
// Read a record.
Record.clear();
StringRef Blob;
+ ++NumMDRecordLoaded;
unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
- if (Error Err = parseOneMetadata(Record, Code, Placeholders, Blob,
- ModuleLevel, NextMetadataNo))
+ if (Error Err =
+ parseOneMetadata(Record, Code, Placeholders, Blob, NextMetadataNo))
return Err;
}
}
+MDString *MetadataLoader::MetadataLoaderImpl::lazyLoadOneMDString(unsigned ID) {
+ ++NumMDStringLoaded;
+ if (Metadata *MD = MetadataList.lookup(ID))
+ return cast<MDString>(MD);
+ auto MDS = MDString::get(Context, MDStringRef[ID]);
+ MetadataList.assignValue(MDS, ID);
+ return MDS;
+}
+
+void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata(
+ unsigned ID, PlaceholderQueue &Placeholders) {
+ assert(ID < (MDStringRef.size()) + GlobalMetadataBitPosIndex.size());
+ assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString");
+#ifndef NDEBUG
+ // Lookup first if the metadata hasn't already been loaded.
+ if (auto *MD = MetadataList.lookup(ID)) {
+ auto *N = dyn_cast_or_null<MDNode>(MD);
+ assert(N && N->isTemporary() && "Lazy loading an already loaded metadata");
+ }
+#endif
+ SmallVector<uint64_t, 64> Record;
+ StringRef Blob;
+ IndexCursor.JumpToBit(GlobalMetadataBitPosIndex[ID - MDStringRef.size()]);
+ auto Entry = IndexCursor.advanceSkippingSubblocks();
+ ++NumMDRecordLoaded;
+ unsigned Code = IndexCursor.readRecord(Entry.ID, Record, &Blob);
+ if (Error Err = parseOneMetadata(Record, Code, Placeholders, Blob, ID))
+ report_fatal_error("Can't lazyload MD");
+}
+
+/// Ensure that all forward-references and placeholders are resolved.
+/// Iteratively lazy-loading metadata on-demand if needed.
+void MetadataLoader::MetadataLoaderImpl::resolveForwardRefsAndPlaceholders(
+ PlaceholderQueue &Placeholders) {
+ DenseSet<unsigned> Temporaries;
+ while (1) {
+ // Populate Temporaries with the placeholders that haven't been loaded yet.
+ Placeholders.getTemporaries(MetadataList, Temporaries);
+
+ // If we don't have any temporary, or FwdReference, we're done!
+ if (Temporaries.empty() && !MetadataList.hasFwdRefs())
+ break;
+
+ // First, load all the temporaries. This can add new placeholders or
+ // forward references.
+ for (auto ID : Temporaries)
+ lazyLoadOneMetadata(ID, Placeholders);
+ Temporaries.clear();
+
+ // Second, load the forward-references. This can also add new placeholders
+ // or forward references.
+ while (MetadataList.hasFwdRefs())
+ lazyLoadOneMetadata(MetadataList.getNextFwdRef(), Placeholders);
+ }
+ // At this point we don't have any forward reference remaining, or temporary
+ // that haven't been loaded. We can safely drop RAUW support and mark cycles
+ // as resolved.
+ MetadataList.tryToResolveCycles();
+
+ // Finally, everything is in place, we can replace the placeholders operands
+ // with the final node they refer to.
+ Placeholders.flush(MetadataList);
+}
+
Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
SmallVectorImpl<uint64_t> &Record, unsigned Code,
- PlaceholderQueue &Placeholders, StringRef Blob, bool ModuleLevel,
- unsigned &NextMetadataNo) {
+ PlaceholderQueue &Placeholders, StringRef Blob, unsigned &NextMetadataNo) {
bool IsDistinct = false;
auto getMD = [&](unsigned ID) -> Metadata * {
+ if (ID < MDStringRef.size())
+ return lazyLoadOneMDString(ID);
if (!IsDistinct)
return MetadataList.getMetadataFwdRef(ID);
if (auto *MD = MetadataList.getMetadataIfResolved(ID))
@@ -519,7 +846,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
auto getMDString = [&](unsigned ID) -> MDString * {
// This requires that the ID is not really a forward reference. In
// particular, the MDString must already have been resolved.
- return cast_or_null<MDString>(getMDOrNull(ID));
+ auto MDS = getMDOrNull(ID);
+ return cast_or_null<MDString>(MDS);
};
// Support for old type refs.
@@ -539,6 +867,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Record.clear();
Code = Stream.ReadCode();
+ ++NumMDRecordLoaded;
unsigned NextBitCode = Stream.readRecord(Code, Record);
if (NextBitCode != bitc::METADATA_NAMED_NODE)
return error("METADATA_NAME not followed by METADATA_NAMED_NODE");
@@ -1137,15 +1466,20 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// Test for upgrading !llvm.loop.
HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String);
-
+ ++NumMDStringLoaded;
Metadata *MD = MDString::get(Context, String);
MetadataList.assignValue(MD, NextMetadataNo++);
break;
}
- case bitc::METADATA_STRINGS:
- if (Error Err = parseMetadataStrings(Record, Blob, NextMetadataNo))
+ case bitc::METADATA_STRINGS: {
+ auto CreateNextMDString = [&](StringRef Str) {
+ ++NumMDStringLoaded;
+ MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo++);
+ };
+ if (Error Err = parseMetadataStrings(Record, Blob, CreateNextMDString))
return Err;
break;
+ }
case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
if (Record.size() % 2 == 0)
return error("Invalid record");
@@ -1166,12 +1500,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
}
-#undef GET_OR_DISTINCT
return Error::success();
+#undef GET_OR_DISTINCT
}
Error MetadataLoader::MetadataLoaderImpl::parseMetadataStrings(
- ArrayRef<uint64_t> Record, StringRef Blob, unsigned &NextMetadataNo) {
+ ArrayRef<uint64_t> Record, StringRef Blob,
+ std::function<void(StringRef)> CallBack) {
// All the MDStrings in the block are emitted together in a single
// record. The strings are concatenated and stored in a blob along with
// their sizes.
@@ -1197,8 +1532,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataStrings(
if (Strings.size() < Size)
return error("Invalid record: metadata strings truncated chars");
- MetadataList.assignValue(MDString::get(Context, Strings.slice(0, Size)),
- NextMetadataNo++);
+ CallBack(Strings.slice(0, Size));
Strings = Strings.drop_front(Size);
} while (--NumStrings);
@@ -1228,6 +1562,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
SmallVector<uint64_t, 64> Record;
+ PlaceholderQueue Placeholders;
+
while (true) {
BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@@ -1236,6 +1572,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
+ resolveForwardRefsAndPlaceholders(Placeholders);
return Error::success();
case BitstreamEntry::Record:
// The interesting case.
@@ -1244,6 +1581,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
// Read a metadata attachment record.
Record.clear();
+ ++NumMDRecordLoaded;
switch (Stream.readRecord(Entry.ID, Record)) {
default: // Default behavior: ignore.
break;
@@ -1268,7 +1606,14 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
if (I->second == LLVMContext::MD_tbaa && StripTBAA)
continue;
- Metadata *Node = MetadataList.getMetadataFwdRef(Record[i + 1]);
+ auto Idx = Record[i + 1];
+ if (Idx < (MDStringRef.size() + GlobalMetadataBitPosIndex.size()) &&
+ !MetadataList.lookup(Idx))
+ // Load the attachment if it is in the lazy-loadable range and hasn't
+ // been loaded yet.
+ lazyLoadOneMetadata(Idx, Placeholders);
+
+ Metadata *Node = MetadataList.getMetadataFwdRef(Idx);
if (isa<LocalAsMetadata>(Node))
// Drop the attachment. This used to be legal, but there's no
// upgrade path.
@@ -1331,6 +1676,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataKinds() {
// Read a record.
Record.clear();
+ ++NumMDRecordLoaded;
unsigned Code = Stream.readRecord(Entry.ID, Record);
switch (Code) {
default: // Default behavior: ignore.
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index c10ba2399e71..ebb2022551f7 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -784,53 +784,53 @@ void ModuleBitcodeWriter::writeTypeTable() {
uint64_t NumBits = VE.computeBitsRequiredForTypeIndicies();
// Abbrev for TYPE_CODE_POINTER.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0
- unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned PtrAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for TYPE_CODE_FUNCTION.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
- unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FunctionAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for TYPE_CODE_STRUCT_ANON.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
- unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned StructAnonAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for TYPE_CODE_STRUCT_NAME.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- unsigned StructNameAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned StructNameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for TYPE_CODE_STRUCT_NAMED.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
- unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned StructNamedAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for TYPE_CODE_ARRAY.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
- unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned ArrayAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Emit an entry count so the reader can reserve space.
TypeVals.push_back(TypeList.size());
@@ -971,9 +971,8 @@ static unsigned getEncodedLinkage(const GlobalValue &GV) {
static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
uint64_t RawFlags = 0;
- RawFlags |= Flags.NoRename; // bool
- RawFlags |= (Flags.IsNotViableToInline << 1);
- RawFlags |= (Flags.HasInlineAsmMaybeReferencingInternal << 2);
+ RawFlags |= Flags.NotEligibleToImport; // bool
+ RawFlags |= (Flags.LiveRoot << 1);
// Linkage don't need to be remapped at that time for the summary. Any future
// change to the getEncodedLinkage() function will need to be taken into
// account here as well.
@@ -1059,13 +1058,13 @@ void BitcodeWriterBase::writeValueSymbolTableForwardDecl() {
// which is written after the function blocks so that it can include
// the offset of each function. The placeholder offset will be
// updated when the real VST is written.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_VSTOFFSET));
// Blocks are 32-bit aligned, so we can use a 32-bit word offset to
// hold the real VST offset. Must use fixed instead of VBR as we don't
// know how many VBR chunks to reserve ahead of time.
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
- unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Emit the placeholder
uint64_t Vals[] = {bitc::MODULE_CODE_VSTOFFSET, 0};
@@ -1155,7 +1154,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
unsigned SimpleGVarAbbrev = 0;
if (!M.global_empty()) {
// Add an abbrev for common globals with no visibility or thread localness.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
Log2_32_Ceil(MaxGlobalType+1)));
@@ -1177,7 +1176,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
Log2_32_Ceil(SectionMap.size()+1)));
// Don't bother emitting vis + thread local.
- SimpleGVarAbbrev = Stream.EmitAbbrev(Abbv);
+ SimpleGVarAbbrev = Stream.EmitAbbrev(std::move(Abbv));
}
// Emit the global variable information.
@@ -1285,11 +1284,11 @@ void ModuleBitcodeWriter::writeModuleInfo() {
AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(AbbrevOpToUse);
- unsigned FilenameAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
for (const auto P : M.getSourceFileName())
Vals.push_back((unsigned char)P);
@@ -1360,14 +1359,14 @@ void ModuleBitcodeWriter::writeMDTuple(const MDTuple *N,
unsigned ModuleBitcodeWriter::createDILocationAbbrev() {
// Assume the column is usually under 128, and always output the inlined-at
// location (it's never more expensive than building an array size 1).
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_LOCATION));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
- return Stream.EmitAbbrev(Abbv);
+ return Stream.EmitAbbrev(std::move(Abbv));
}
void ModuleBitcodeWriter::writeDILocation(const DILocation *N,
@@ -1389,7 +1388,7 @@ void ModuleBitcodeWriter::writeDILocation(const DILocation *N,
unsigned ModuleBitcodeWriter::createGenericDINodeAbbrev() {
// Assume the column is usually under 128, and always output the inlined-at
// location (it's never more expensive than building an array size 1).
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_GENERIC_DEBUG));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
@@ -1397,7 +1396,7 @@ unsigned ModuleBitcodeWriter::createGenericDINodeAbbrev() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
- return Stream.EmitAbbrev(Abbv);
+ return Stream.EmitAbbrev(std::move(Abbv));
}
void ModuleBitcodeWriter::writeGenericDINode(const GenericDINode *N,
@@ -1790,11 +1789,11 @@ void ModuleBitcodeWriter::writeDIImportedEntity(
}
unsigned ModuleBitcodeWriter::createNamedMetadataAbbrev() {
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_NAME));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
- return Stream.EmitAbbrev(Abbv);
+ return Stream.EmitAbbrev(std::move(Abbv));
}
void ModuleBitcodeWriter::writeNamedMetadata(
@@ -1819,12 +1818,12 @@ void ModuleBitcodeWriter::writeNamedMetadata(
}
unsigned ModuleBitcodeWriter::createMetadataStringsAbbrev() {
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRINGS));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of strings
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // offset to chars
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
- return Stream.EmitAbbrev(Abbv);
+ return Stream.EmitAbbrev(std::move(Abbv));
}
/// Write out a record for MDString.
@@ -1918,17 +1917,17 @@ void ModuleBitcodeWriter::writeModuleMetadata() {
MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] =
createGenericDINodeAbbrev();
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX_OFFSET));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
- unsigned OffsetAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned OffsetAbbrev = Stream.EmitAbbrev(std::move(Abbv));
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
- unsigned IndexAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned IndexAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Emit MDStrings together upfront.
writeMetadataStrings(VE.getMDStrings(), Record);
@@ -2125,30 +2124,30 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
// If this is a constant pool for the module, emit module-specific abbrevs.
if (isGlobal) {
// Abbrev for CST_CODE_AGGREGATE.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal+1)));
- AggregateAbbrev = Stream.EmitAbbrev(Abbv);
+ AggregateAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for CST_CODE_STRING.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
- String8Abbrev = Stream.EmitAbbrev(Abbv);
+ String8Abbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for CST_CODE_CSTRING.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
- CString7Abbrev = Stream.EmitAbbrev(Abbv);
+ CString7Abbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for CST_CODE_CSTRING.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- CString6Abbrev = Stream.EmitAbbrev(Abbv);
+ CString6Abbrev = Stream.EmitAbbrev(std::move(Abbv));
}
SmallVector<uint64_t, 64> Record;
@@ -2858,39 +2857,39 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
unsigned GUIDEntryAbbrev;
if (IsModuleLevel && hasVSTOffsetPlaceholder()) {
// 8-bit fixed-width VST_CODE_FNENTRY function strings.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
- FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv);
+ FnEntry8BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// 7-bit fixed width VST_CODE_FNENTRY function strings.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
- FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv);
+ FnEntry7BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// 6-bit char6 VST_CODE_FNENTRY function strings.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
+ FnEntry6BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// FIXME: Change the name of this record as it is now used by
// the per-module index as well.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
- GUIDEntryAbbrev = Stream.EmitAbbrev(Abbv);
+ GUIDEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));
}
// FIXME: Set up the abbrev, we know how many values there are!
@@ -2984,11 +2983,11 @@ void IndexBitcodeWriter::writeCombinedValueSymbolTable() {
Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
- unsigned EntryAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned EntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<uint64_t, 64> NameVals;
for (const auto &GVI : valueIds()) {
@@ -3121,7 +3120,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
Stream.EnterBlockInfoBlock();
{ // 8-bit fixed-width VST_CODE_ENTRY/VST_CODE_BBENTRY strings.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
@@ -3132,7 +3131,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
}
{ // 7-bit fixed width VST_CODE_ENTRY strings.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
@@ -3142,7 +3141,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // 6-bit char6 VST_CODE_ENTRY strings.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
@@ -3152,7 +3151,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // 6-bit char6 VST_CODE_BBENTRY strings.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
@@ -3165,7 +3164,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
{ // SETTYPE abbrev for CONSTANTS_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
VE.computeBitsRequiredForTypeIndicies()));
@@ -3175,7 +3174,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
}
{ // INTEGER abbrev for CONSTANTS_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) !=
@@ -3184,7 +3183,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
}
{ // CE_CAST abbrev for CONSTANTS_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // cast opc
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // typeid
@@ -3196,7 +3195,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // NULL abbrev for CONSTANTS_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL));
if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) !=
CONSTANTS_NULL_Abbrev)
@@ -3206,7 +3205,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
// FIXME: This should only use space for first class types!
{ // INST_LOAD abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
@@ -3218,7 +3217,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // INST_BINOP abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
@@ -3228,7 +3227,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
@@ -3239,7 +3238,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // INST_CAST abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpVal
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
@@ -3251,14 +3250,14 @@ void ModuleBitcodeWriter::writeBlockInfo() {
}
{ // INST_RET abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
FUNCTION_INST_RET_VOID_ABBREV)
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // INST_RET abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID
if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
@@ -3266,14 +3265,14 @@ void ModuleBitcodeWriter::writeBlockInfo() {
llvm_unreachable("Unexpected abbrev ordering!");
}
{ // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE));
if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
FUNCTION_INST_UNREACHABLE_ABBREV)
llvm_unreachable("Unexpected abbrev ordering!");
}
{
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_GEP));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
@@ -3296,38 +3295,38 @@ void IndexBitcodeWriter::writeModStrings() {
// TODO: See which abbrev sizes we actually need to emit
// 8-bit fixed-width MST_ENTRY strings.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
- unsigned Abbrev8Bit = Stream.EmitAbbrev(Abbv);
+ unsigned Abbrev8Bit = Stream.EmitAbbrev(std::move(Abbv));
// 7-bit fixed width MST_ENTRY strings.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
- unsigned Abbrev7Bit = Stream.EmitAbbrev(Abbv);
+ unsigned Abbrev7Bit = Stream.EmitAbbrev(std::move(Abbv));
// 6-bit char6 MST_ENTRY strings.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv);
+ unsigned Abbrev6Bit = Stream.EmitAbbrev(std::move(Abbv));
// Module Hash, 160 bits SHA1. Optionally, emitted after each MST_CODE_ENTRY.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_HASH));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
- unsigned AbbrevHash = Stream.EmitAbbrev(Abbv);
+ unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<unsigned, 64> Vals;
for (const auto &MPSE : Index.modulePaths()) {
@@ -3435,7 +3434,7 @@ void ModuleBitcodeWriter::writeModuleLevelReferences(
// Current version for the summary.
// This is bumped whenever we introduce changes in the way some record are
// interpreted, like flags for instance.
-static const uint64_t INDEX_VERSION = 2;
+static const uint64_t INDEX_VERSION = 3;
/// Emit the per-module summary section alongside the rest of
/// the module's bitcode.
@@ -3450,7 +3449,7 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
}
// Abbrev for FS_PERMODULE.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
@@ -3459,10 +3458,10 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
// numrefs x valueid, n x (valueid)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_PERMODULE_PROFILE.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
@@ -3471,24 +3470,24 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
// numrefs x valueid, n x (valueid, hotness)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_PERMODULE_GLOBALVAR_INIT_REFS.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); // valueids
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSModRefsAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_ALIAS.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_ALIAS));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
- unsigned FSAliasAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<uint64_t, 64> NameVals;
// Iterate over the list of functions instead of the Index to
@@ -3542,7 +3541,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
// Abbrev for FS_COMBINED.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
@@ -3552,10 +3551,10 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
// numrefs x valueid, n x (valueid)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_COMBINED_PROFILE.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_PROFILE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
@@ -3565,26 +3564,26 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
// numrefs x valueid, n x (valueid, hotness)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_COMBINED_GLOBALVAR_INIT_REFS.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_GLOBALVAR_INIT_REFS));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); // valueids
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned FSModRefsAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// Abbrev for FS_COMBINED_ALIAS.
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALIAS));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
- unsigned FSAliasAbbrev = Stream.EmitAbbrev(Abbv);
+ unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// The aliases are emitted as a post-pass, and will point to the value
// id of the aliasee. Save them in a vector for post-processing.
@@ -3702,19 +3701,19 @@ void writeIdentificationBlock(BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);
// Write the "user readable" string identifying the bitcode producer
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_STRING));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- auto StringAbbrev = Stream.EmitAbbrev(Abbv);
+ auto StringAbbrev = Stream.EmitAbbrev(std::move(Abbv));
writeStringRecord(Stream, bitc::IDENTIFICATION_CODE_STRING,
"LLVM" LLVM_VERSION_STRING, StringAbbrev);
// Write the epoch version
- Abbv = new BitCodeAbbrev();
+ Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_EPOCH));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
- auto EpochAbbrev = Stream.EmitAbbrev(Abbv);
+ auto EpochAbbrev = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<unsigned, 1> Vals = {bitc::BITCODE_CURRENT_EPOCH};
Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals, EpochAbbrev);
Stream.ExitBlock();
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 0c79def87933..61149d9229b7 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -53,7 +53,8 @@ void ARMException::beginFunction(const MachineFunction *MF) {
if (MoveType == AsmPrinter::CFI_M_Debug) {
if (!hasEmittedCFISections) {
- Asm->OutStreamer->EmitCFISections(false, true);
+ if (Asm->needsOnlyDebugCFIMoves())
+ Asm->OutStreamer->EmitCFISections(false, true);
hasEmittedCFISections = true;
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 5f15ac1d503b..9f6caa95a9ed 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -108,7 +108,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
: MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
- LastMI(nullptr), LastFn(0), Counter(~0U) {
+ isCFIMoveForDebugging(false), LastMI(nullptr), LastFn(0), Counter(~0U) {
DD = nullptr;
MMI = nullptr;
LI = nullptr;
@@ -264,6 +264,28 @@ bool AsmPrinter::doInitialization(Module &M) {
}
}
+ switch (MAI->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ isCFIMoveForDebugging = true;
+ if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+ break;
+ for (auto &F: M.getFunctionList()) {
+ // If the module contains any function with unwind data,
+ // .eh_frame has to be emitted.
+ // Ignore functions that won't get emitted.
+ if (!F.isDeclarationForLinker() && F.needsUnwindTableEntry()) {
+ isCFIMoveForDebugging = false;
+ break;
+ }
+ }
+ break;
+ default:
+ isCFIMoveForDebugging = false;
+ break;
+ }
+
EHStreamer *ES = nullptr;
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::None:
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 20075e41977f..57864e4e4d4f 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -100,6 +100,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
}
SourceMgr SrcMgr;
+ SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+
SrcMgrDiagInfo DiagInfo;
// If the current LLVMContext has an inline asm handler, set it in SourceMgr.
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index ef30e279aed2..e08306b001fb 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -137,7 +137,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
return;
if (!hasEmittedCFISections) {
- if (Asm->needsCFIMoves() == AsmPrinter::CFI_M_Debug)
+ if (Asm->needsOnlyDebugCFIMoves())
Asm->OutStreamer->EmitCFISections(false, true);
hasEmittedCFISections = true;
}
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index cf35afbc6e5f..89a042ffc477 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -125,8 +125,11 @@ unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) {
MachineBasicBlock *&MBB = BBToMBB[&BB];
if (!MBB) {
- MBB = MF->CreateMachineBasicBlock();
+ MBB = MF->CreateMachineBasicBlock(&BB);
MF->push_back(MBB);
+
+ if (BB.hasAddressTaken())
+ MBB->setHasAddressTaken();
}
return *MBB;
}
@@ -195,6 +198,45 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
+bool IRTranslator::translateSwitch(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // For now, just translate as a chain of conditional branches.
+ // FIXME: could we share most of the logic/code in
+ // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel?
+ // At first sight, it seems most of the logic in there is independent of
+ // SelectionDAG-specifics and a lot of work went in to optimize switch
+ // lowering in there.
+
+ const SwitchInst &SwInst = cast<SwitchInst>(U);
+ const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
+
+ LLT LLTi1 = LLT(*Type::getInt1Ty(U.getContext()), *DL);
+ for (auto &CaseIt : SwInst.cases()) {
+ const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
+ const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
+ MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
+ MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+ MachineBasicBlock &TrueBB = getOrCreateBB(*CaseIt.getCaseSuccessor());
+
+ MIRBuilder.buildBrCond(Tst, TrueBB);
+ CurBB.addSuccessor(&TrueBB);
+
+ MachineBasicBlock *FalseBB =
+ MF->CreateMachineBasicBlock(SwInst.getParent());
+ MF->push_back(FalseBB);
+ MIRBuilder.buildBr(*FalseBB);
+ CurBB.addSuccessor(FalseBB);
+
+ MIRBuilder.setMBB(*FalseBB);
+ }
+ // handle default case
+ MachineBasicBlock &DefaultBB = getOrCreateBB(*SwInst.getDefaultDest());
+ MIRBuilder.buildBr(DefaultBB);
+ MIRBuilder.getMBB().addSuccessor(&DefaultBB);
+
+ return true;
+}
+
bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
const LoadInst &LI = cast<LoadInst>(U);
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index a6c93bc0f3d7..7d405dd92ac3 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -55,11 +55,10 @@ const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1;
RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks,
unsigned NumRegBanks)
: RegBanks(RegBanks), NumRegBanks(NumRegBanks) {
- DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+#ifndef NDEBUG
+ for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx)
assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank");
- assert(!RegBanks[Idx]->isValid() &&
- "RegisterBank should be invalid before initialization");
- });
+#endif // NDEBUG
}
RegisterBankInfo::~RegisterBankInfo() {
@@ -70,13 +69,15 @@ RegisterBankInfo::~RegisterBankInfo() {
}
bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
- DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+#ifndef NDEBUG
+ for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
const RegisterBank &RegBank = getRegBank(Idx);
assert(Idx == RegBank.getID() &&
"ID does not match the index in the array");
dbgs() << "Verify " << RegBank << '\n';
assert(RegBank.verify(TRI) && "RegBank is invalid");
- });
+ }
+#endif // NDEBUG
return true;
}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 0cac7b71e241..b9f3d86eabd8 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1495,16 +1495,18 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
if (TII->reverseBranchCondition(Cond))
llvm_unreachable("Unable to reverse branch condition!");
- // Initialize liveins to the first BB. These are potentiall redefined by
- // predicated instructions.
Redefs.init(*TRI);
- Redefs.addLiveIns(CvtMBB);
- Redefs.addLiveIns(NextMBB);
-
- // Compute a set of registers which must not be killed by instructions in
- // BB1: This is everything live-in to BB2.
DontKill.init(*TRI);
- DontKill.addLiveIns(NextMBB);
+
+ if (MRI->tracksLiveness()) {
+ // Initialize liveins to the first BB. These are potentiall redefined by
+ // predicated instructions.
+ Redefs.addLiveIns(CvtMBB);
+ Redefs.addLiveIns(NextMBB);
+ // Compute a set of registers which must not be killed by instructions in
+ // BB1: This is everything live-in to BB2.
+ DontKill.addLiveIns(NextMBB);
+ }
if (CvtMBB.pred_size() > 1) {
BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
@@ -1602,8 +1604,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// Initialize liveins to the first BB. These are potentially redefined by
// predicated instructions.
Redefs.init(*TRI);
- Redefs.addLiveIns(CvtMBB);
- Redefs.addLiveIns(NextMBB);
+ if (MRI->tracksLiveness()) {
+ Redefs.addLiveIns(CvtMBB);
+ Redefs.addLiveIns(NextMBB);
+ }
DontKill.clear();
@@ -1766,8 +1770,10 @@ bool IfConverter::IfConvertDiamondCommon(
// instructions. We start with BB1 live-ins so we have the live-out regs
// after tracking the BB1 instructions.
Redefs.init(*TRI);
- Redefs.addLiveIns(MBB1);
- Redefs.addLiveIns(MBB2);
+ if (MRI->tracksLiveness()) {
+ Redefs.addLiveIns(MBB1);
+ Redefs.addLiveIns(MBB2);
+ }
// Remove the duplicated instructions at the beginnings of both paths.
// Skip dbg_value instructions
@@ -1792,12 +1798,14 @@ bool IfConverter::IfConvertDiamondCommon(
// This is everything used+live in BB2 after the duplicated instructions. We
// can compute this set by simulating liveness backwards from the end of BB2.
DontKill.init(*TRI);
- for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse()))
- DontKill.stepBackward(MI);
+ if (MRI->tracksLiveness()) {
+ for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse()))
+ DontKill.stepBackward(MI);
- for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
- SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers;
- Redefs.stepForward(MI, IgnoredClobbers);
+ for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
+ SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy;
+ Redefs.stepForward(MI, Dummy);
+ }
}
BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1);
MBB2.erase(MBB2.begin(), DI2);
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index eb13d2d3ec0c..db87092177ca 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -488,16 +488,16 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
}
// Print the live in registers.
- const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
- assert(TRI && "Expected target register info");
- if (!MBB.livein_empty()) {
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ if (MRI.tracksLiveness() && !MBB.livein_empty()) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
OS.indent(2) << "liveins: ";
bool First = true;
for (const auto &LI : MBB.liveins()) {
if (!First)
OS << ", ";
First = false;
- printReg(LI.PhysReg, OS, TRI);
+ printReg(LI.PhysReg, OS, &TRI);
if (!LI.LaneMask.all())
OS << ":0x" << PrintLaneMask(LI.LaneMask);
}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 549424d257fe..3869f976854d 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -286,7 +286,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (!livein_empty()) {
if (Indexes) OS << '\t';
OS << " Live Ins:";
- for (const auto &LI : make_range(livein_begin(), livein_end())) {
+ for (const auto &LI : LiveIns) {
OS << ' ' << PrintReg(LI.PhysReg, TRI);
if (!LI.LaneMask.all())
OS << ':' << PrintLaneMask(LI.LaneMask);
@@ -1292,3 +1292,10 @@ MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
void MachineBasicBlock::clearLiveIns() {
LiveIns.clear();
}
+
+MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
+ assert(getParent()->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksLiveness) &&
+ "Liveness information is accurate");
+ return LiveIns.begin();
+}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 426a4666c649..a98139f9e5af 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -566,7 +566,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
FirstTerminator = nullptr;
if (!MF->getProperties().hasProperty(
- MachineFunctionProperties::Property::NoPHIs)) {
+ MachineFunctionProperties::Property::NoPHIs) && MRI->tracksLiveness()) {
// If this block has allocatable physical registers live-in, check that
// it is an entry block or landing pad.
for (const auto &LI : MBB->liveins()) {
@@ -741,14 +741,16 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
regsLive.clear();
- for (const auto &LI : MBB->liveins()) {
- if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
- report("MBB live-in list contains non-physical register", MBB);
- continue;
+ if (MRI->tracksLiveness()) {
+ for (const auto &LI : MBB->liveins()) {
+ if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
+ report("MBB live-in list contains non-physical register", MBB);
+ continue;
+ }
+ for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
}
- for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- regsLive.insert(*SubRegs);
}
regsLiveInButUnused = regsLive;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index de1c35caa1a0..fdf741fd58f7 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -48,11 +48,6 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
"Target changed?");
- // It is not possible to use the register scavenger after late optimization
- // passes that don't preserve accurate liveness information.
- assert(MRI->tracksLiveness() &&
- "Cannot use register scavenger with inaccurate liveness");
-
// Self-initialize.
if (!this->MBB) {
NumRegUnits = TRI->getNumRegUnits();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index cb803585282f..a07bd8f83546 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -7339,19 +7340,23 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
if (!Range)
return Op;
- Constant *Lo = cast<ConstantAsMetadata>(Range->getOperand(0))->getValue();
- if (!Lo->isNullValue())
+ ConstantRange CR = getConstantRangeFromMetadata(*Range);
+ if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet())
return Op;
- Constant *Hi = cast<ConstantAsMetadata>(Range->getOperand(1))->getValue();
- unsigned Bits = cast<ConstantInt>(Hi)->getValue().logBase2();
+ APInt Lo = CR.getUnsignedMin();
+ if (!Lo.isMinValue())
+ return Op;
+
+ APInt Hi = CR.getUnsignedMax();
+ unsigned Bits = Hi.getActiveBits();
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
SDLoc SL = getCurSDLoc();
- SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(),
- Op, DAG.getValueType(SmallVT));
+ SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
+ DAG.getValueType(SmallVT));
unsigned NumVals = Op.getNode()->getNumValues();
if (NumVals == 1)
return ZExt;
diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp
index deec16330224..2aac3474654f 100644
--- a/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -299,11 +299,8 @@ DWARFDie::collectChildrenAddressRanges(DWARFAddressRangesVector& Ranges) const {
Ranges.insert(Ranges.end(), DIERanges.begin(), DIERanges.end());
}
- DWARFDie Child = getFirstChild();
- while (Child) {
+ for (auto Child: children())
Child.collectChildrenAddressRanges(Ranges);
- Child = Child.getSibling();
- }
}
bool DWARFDie::addressRangeContainsAddress(const uint64_t Address) const {
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index e6c9764f1133..2bbcb25275e4 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -468,6 +468,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
Options.HandleInt = Flags.handle_int;
Options.HandleSegv = Flags.handle_segv;
Options.HandleTerm = Flags.handle_term;
+ Options.HandleXfsz = Flags.handle_xfsz;
SetSignalHandler(Options);
if (Flags.minimize_crash_internal_step)
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 08eaad9856be..22aad353acec 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -91,6 +91,7 @@ FUZZER_FLAG_INT(handle_ill, 1, "If 1, try to intercept SIGILL.")
FUZZER_FLAG_INT(handle_fpe, 1, "If 1, try to intercept SIGFPE.")
FUZZER_FLAG_INT(handle_int, 1, "If 1, try to intercept SIGINT.")
FUZZER_FLAG_INT(handle_term, 1, "If 1, try to intercept SIGTERM.")
+FUZZER_FLAG_INT(handle_xfsz, 1, "If 1, try to intercept SIGXFSZ.")
FUZZER_FLAG_INT(close_fd_mask, 0, "If 1, close stdout at startup; "
"if 2, close stderr; if 3, close both. "
"Be careful, this will also close e.g. asan's stderr/stdout.")
diff --git a/lib/Fuzzer/FuzzerIO.h b/lib/Fuzzer/FuzzerIO.h
index 741fecf415b0..15bfd3d34727 100644
--- a/lib/Fuzzer/FuzzerIO.h
+++ b/lib/Fuzzer/FuzzerIO.h
@@ -37,6 +37,9 @@ std::string DirPlusFile(const std::string &DirPath,
// Returns the name of the dir, similar to the 'dirname' utility.
std::string DirName(const std::string &FileName);
+// Returns path to a TmpDir.
+std::string TmpDir();
+
void DupAndCloseStderr();
void CloseStdout();
diff --git a/lib/Fuzzer/FuzzerIOPosix.cpp b/lib/Fuzzer/FuzzerIOPosix.cpp
index 720bc1304594..6d8edf6ff538 100644
--- a/lib/Fuzzer/FuzzerIOPosix.cpp
+++ b/lib/Fuzzer/FuzzerIOPosix.cpp
@@ -83,6 +83,12 @@ std::string DirName(const std::string &FileName) {
return Res;
}
+std::string TmpDir() {
+ if (auto Env = getenv("TMPDIR"))
+ return Env;
+ return "/tmp";
+}
+
} // namespace fuzzer
#endif // LIBFUZZER_POSIX
diff --git a/lib/Fuzzer/FuzzerIOWindows.cpp b/lib/Fuzzer/FuzzerIOWindows.cpp
index a4738eb9dfe5..056f0721a336 100644
--- a/lib/Fuzzer/FuzzerIOWindows.cpp
+++ b/lib/Fuzzer/FuzzerIOWindows.cpp
@@ -277,6 +277,8 @@ std::string DirName(const std::string &FileName) {
return FileName.substr(0, LocationLen + DirLen);
}
+std::string TmpDir() { return "TODO: implement TmpDir"; }
+
} // namespace fuzzer
#endif // LIBFUZZER_WINDOWS
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index c041706092db..0d2c7a78aca8 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -82,6 +82,7 @@ public:
static void StaticAlarmCallback();
static void StaticCrashSignalCallback();
static void StaticInterruptCallback();
+ static void StaticFileSizeExceedCallback();
void ExecuteCallback(const uint8_t *Data, size_t Size);
size_t RunOne(const uint8_t *Data, size_t Size);
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 1336f5e4aeeb..9f49d1557990 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -266,6 +266,11 @@ void Fuzzer::StaticInterruptCallback() {
F->InterruptCallback();
}
+void Fuzzer::StaticFileSizeExceedCallback() {
+ Printf("==%lu== ERROR: libFuzzer: file size exceeded\n", GetPid());
+ exit(1);
+}
+
void Fuzzer::CrashCallback() {
Printf("==%lu== ERROR: libFuzzer: deadly signal\n", GetPid());
if (EF->__sanitizer_print_stack_trace)
diff --git a/lib/Fuzzer/FuzzerMerge.cpp b/lib/Fuzzer/FuzzerMerge.cpp
index 84660e0fe53f..9e559115680c 100644
--- a/lib/Fuzzer/FuzzerMerge.cpp
+++ b/lib/Fuzzer/FuzzerMerge.cpp
@@ -220,8 +220,8 @@ void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
ListFilesInDirRecursive(Corpora[i], nullptr, &AllFiles, /*TopDir*/true);
Printf("MERGE-OUTER: %zd files, %zd in the initial corpus\n",
AllFiles.size(), NumFilesInFirstCorpus);
- std::string CFPath =
- "libFuzzerTemp." + std::to_string(GetPid()) + ".txt";
+ auto CFPath = DirPlusFile(TmpDir(),
+ "libFuzzerTemp." + std::to_string(GetPid()) + ".txt");
// Write the control file.
RemoveFile(CFPath);
std::ofstream ControlFile(CFPath);
@@ -229,6 +229,11 @@ void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
ControlFile << NumFilesInFirstCorpus << "\n";
for (auto &Path: AllFiles)
ControlFile << Path << "\n";
+ if (!ControlFile) {
+ Printf("MERGE-OUTER: failed to write to the control file: %s\n",
+ CFPath.c_str());
+ exit(1);
+ }
ControlFile.close();
// Execute the inner process untill it passes.
@@ -246,6 +251,9 @@ void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
// Read the control file and do the merge.
Merger M;
std::ifstream IF(CFPath);
+ IF.seekg(0, IF.end);
+ Printf("MERGE-OUTER: the control file has %zd bytes\n", (size_t)IF.tellg());
+ IF.seekg(0, IF.beg);
M.ParseOrExit(IF, true);
IF.close();
std::vector<std::string> NewFiles;
diff --git a/lib/Fuzzer/FuzzerOptions.h b/lib/Fuzzer/FuzzerOptions.h
index cb702d285200..6f72205600b9 100644
--- a/lib/Fuzzer/FuzzerOptions.h
+++ b/lib/Fuzzer/FuzzerOptions.h
@@ -62,6 +62,7 @@ struct FuzzingOptions {
bool HandleInt = false;
bool HandleSegv = false;
bool HandleTerm = false;
+ bool HandleXfsz = false;
};
} // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp
index be62a6624b27..2ad9702fab0e 100644
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/lib/Fuzzer/FuzzerTraceState.cpp
@@ -46,10 +46,6 @@ public:
void TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
const uint8_t *Data2);
- void TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits, uint64_t Val,
- size_t NumCases, uint64_t *Cases);
- int TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
- size_t DataSize);
int TryToAddDesiredData(const uint8_t *PresentData,
const uint8_t *DesiredData, size_t DataSize);
@@ -147,29 +143,6 @@ public:
size_t AutoDictAdds = 0;
};
-int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
- size_t DataSize) {
- if (NumMutations >= kMaxMutations || !WantToHandleOneMoreMutation()) return 0;
- ScopedDoingMyOwnMemmem scoped_doing_my_own_memmem;
- const uint8_t *UnitData;
- auto UnitSize = F->GetCurrentUnitInFuzzingThead(&UnitData);
- int Res = 0;
- const uint8_t *Beg = UnitData;
- const uint8_t *End = Beg + UnitSize;
- for (const uint8_t *Cur = Beg; Cur < End; Cur++) {
- Cur = (uint8_t *)SearchMemory(Cur, End - Cur, &PresentData, DataSize);
- if (!Cur)
- break;
- size_t Pos = Cur - Beg;
- assert(Pos < UnitSize);
- AddMutation(Pos, DataSize, DesiredData);
- AddMutation(Pos, DataSize, DesiredData + 1);
- AddMutation(Pos, DataSize, DesiredData - 1);
- Res++;
- }
- return Res;
-}
-
int TraceState::TryToAddDesiredData(const uint8_t *PresentData,
const uint8_t *DesiredData,
size_t DataSize) {
@@ -206,26 +179,6 @@ void TraceState::TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
}
}
-void TraceState::TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits,
- uint64_t Val, size_t NumCases,
- uint64_t *Cases) {
- if (F->InFuzzingThread()) return;
- size_t ValSize = ValSizeInBits / 8;
- bool TryShort = IsTwoByteData(Val);
- for (size_t i = 0; i < NumCases; i++)
- TryShort &= IsTwoByteData(Cases[i]);
-
- if (Options.Verbosity >= 3)
- Printf("TraceSwitch: %p %zd # %zd; TryShort %d\n", PC, Val, NumCases,
- TryShort);
-
- for (size_t i = 0; i < NumCases; i++) {
- TryToAddDesiredData(Val, Cases[i], ValSize);
- if (TryShort)
- TryToAddDesiredData(Val, Cases[i], 2);
- }
-}
-
static TraceState *TS;
void Fuzzer::StartTraceRecording() {
diff --git a/lib/Fuzzer/FuzzerUtilPosix.cpp b/lib/Fuzzer/FuzzerUtilPosix.cpp
index 8b484b8effa4..e8d48dc81a3b 100644
--- a/lib/Fuzzer/FuzzerUtilPosix.cpp
+++ b/lib/Fuzzer/FuzzerUtilPosix.cpp
@@ -41,6 +41,10 @@ static void InterruptHandler(int, siginfo_t *, void *) {
Fuzzer::StaticInterruptCallback();
}
+static void FileSizeExceedHandler(int, siginfo_t *, void *) {
+ Fuzzer::StaticFileSizeExceedCallback();
+}
+
static void SetSigaction(int signum,
void (*callback)(int, siginfo_t *, void *)) {
struct sigaction sigact;
@@ -80,6 +84,8 @@ void SetSignalHandler(const FuzzingOptions& Options) {
SetSigaction(SIGILL, CrashHandler);
if (Options.HandleFpe)
SetSigaction(SIGFPE, CrashHandler);
+ if (Options.HandleXfsz)
+ SetSigaction(SIGXFSZ, FileSizeExceedHandler);
}
void SleepSeconds(int Seconds) {
diff --git a/lib/Fuzzer/FuzzerUtilWindows.cpp b/lib/Fuzzer/FuzzerUtilWindows.cpp
index 64adb7cd1380..3ca1f2c8f562 100644
--- a/lib/Fuzzer/FuzzerUtilWindows.cpp
+++ b/lib/Fuzzer/FuzzerUtilWindows.cpp
@@ -58,6 +58,7 @@ LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) {
if (HandlerOpt->HandleFpe)
Fuzzer::StaticCrashSignalCallback();
break;
+ // TODO: handle (Options.HandleXfsz)
}
return EXCEPTION_CONTINUE_SEARCH;
}
diff --git a/lib/Fuzzer/test/merge.test b/lib/Fuzzer/test/merge.test
index 1f1810eb0195..5c7d30e41caa 100644
--- a/lib/Fuzzer/test/merge.test
+++ b/lib/Fuzzer/test/merge.test
@@ -44,3 +44,11 @@ MERGE_WITH_CRASH: MERGE-OUTER: 3 new files
# Check that we actually limit the size with max_len
RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 -max_len=5 2>&1 | FileCheck %s --check-prefix=MERGE_LEN5
MERGE_LEN5: MERGE-OUTER: succesfull in 1 attempt(s)
+
+# Check that we honor TMPDIR
+RUN: TMPDIR=DIR_DOES_NOT_EXIST not LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=TMPDIR
+TMPDIR: MERGE-OUTER: failed to write to the control file: DIR_DOES_NOT_EXIST/libFuzzerTemp
+
+# Check that we can report an error if file size exceeded
+RUN: (ulimit -f 1; not LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=SIGXFSZ)
+SIGXFSZ: ERROR: libFuzzer: file size exceeded
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 42b3a344352b..e3e2f9f806c8 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -337,12 +337,21 @@ void LTO::addSymbolToGlobalRes(SmallPtrSet<GlobalValue *, 8> &Used,
if (Res.Prevailing)
GlobalRes.IRName = GV->getName();
}
+ // Set the partition to external if we know it is used elsewhere, e.g.
+ // it is visible to a regular object, is referenced from llvm.compiler_used,
+ // or was already recorded as being referenced from a different partition.
if (Res.VisibleToRegularObj || (GV && Used.count(GV)) ||
(GlobalRes.Partition != GlobalResolution::Unknown &&
- GlobalRes.Partition != Partition))
+ GlobalRes.Partition != Partition)) {
GlobalRes.Partition = GlobalResolution::External;
- else
+ } else
+ // First recorded reference, save the current partition.
GlobalRes.Partition = Partition;
+
+ // Flag as visible outside of ThinLTO if visible from a regular object or
+ // if this is a reference in the regular LTO partition.
+ GlobalRes.VisibleOutsideThinLTO |=
+ (Res.VisibleToRegularObj || (Partition == GlobalResolution::RegularLTO));
}
static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
@@ -848,6 +857,19 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
if (!ModuleToDefinedGVSummaries.count(Mod.first))
ModuleToDefinedGVSummaries.try_emplace(Mod.first);
+ // Compute "dead" symbols, we don't want to import/export these!
+ DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
+ for (auto &Res : GlobalResolutions) {
+ if (Res.second.VisibleOutsideThinLTO &&
+ // IRName will be defined if we have seen the prevailing copy of
+ // this value. If not, no need to preserve any ThinLTO copies.
+ !Res.second.IRName.empty())
+ GUIDPreservedSymbols.insert(GlobalValue::getGUID(Res.second.IRName));
+ }
+
+ auto DeadSymbols =
+ computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols);
+
StringMap<FunctionImporter::ImportMapTy> ImportLists(
ThinLTO.ModuleMap.size());
StringMap<FunctionImporter::ExportSetTy> ExportLists(
@@ -856,12 +878,21 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
if (Conf.OptLevel > 0) {
ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
- ImportLists, ExportLists);
+ ImportLists, ExportLists, &DeadSymbols);
std::set<GlobalValue::GUID> ExportedGUIDs;
for (auto &Res : GlobalResolutions) {
- if (!Res.second.IRName.empty() &&
- Res.second.Partition == GlobalResolution::External)
+ // First check if the symbol was flagged as having external references.
+ if (Res.second.Partition != GlobalResolution::External)
+ continue;
+ // IRName will be defined if we have seen the prevailing copy of
+ // this value. If not, no need to mark as exported from a ThinLTO
+ // partition (and we can't get the GUID).
+ if (Res.second.IRName.empty())
+ continue;
+ auto GUID = GlobalValue::getGUID(Res.second.IRName);
+ // Mark exported unless index-based analysis determined it to be dead.
+ if (!DeadSymbols.count(GUID))
ExportedGUIDs.insert(GlobalValue::getGUID(Res.second.IRName));
}
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index 880dc3dfae98..66ffe6db29d6 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -581,11 +581,18 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries;
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+ // Convert the preserved symbols set from string to GUID
+ auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+ PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+ // Compute "dead" symbols, we don't want to import/export these!
+ auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists);
+ ExportLists, &DeadSymbols);
// Resolve LinkOnce/Weak symbols.
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
@@ -594,10 +601,6 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
thinLTOResolveWeakForLinkerModule(
TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
- // Convert the preserved symbols set from string to GUID
- auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
- PreservedSymbols, Triple(TheModule.getTargetTriple()));
-
// Promote the exported values in the index, so that they are promoted
// in the module.
auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
@@ -623,11 +626,18 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+ // Convert the preserved symbols set from string to GUID
+ auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+ PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+ // Compute "dead" symbols, we don't want to import/export these!
+ auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists);
+ ExportLists, &DeadSymbols);
auto &ImportList = ImportLists[TheModule.getModuleIdentifier()];
crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
@@ -697,11 +707,14 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+ // Compute "dead" symbols, we don't want to import/export these!
+ auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists);
+ ExportLists, &DeadSymbols);
auto &ExportList = ExportLists[ModuleIdentifier];
// Be friendly and don't nuke totally the module when the client didn't
@@ -836,17 +849,20 @@ void ThinLTOCodeGenerator::run() {
StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+ // Convert the preserved symbols set from string to GUID, this is needed for
+ // computing the caching hash and the internalization.
+ auto GUIDPreservedSymbols =
+ computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+
+ // Compute "dead" symbols, we don't want to import/export these!
+ auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols);
+
// Collect the import/export lists for all modules from the call-graph in the
// combined index.
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists);
-
- // Convert the preserved symbols set from string to GUID, this is needed for
- // computing the caching hash and the internalization.
- auto GUIDPreservedSymbols =
- computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+ ExportLists, &DeadSymbols);
// We use a std::map here to be able to have a defined ordering when
// producing a hash for the cache entry.
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 0c0b498f1375..fb8b45166a41 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -205,7 +205,7 @@ APInt& APInt::operator++() {
/// This function subtracts a single "digit" (64-bit word), y, from
/// the multi-digit integer array, x[], propagating the borrowed 1 value until
-/// no further borrowing is neeeded or it runs out of "digits" in x. The result
+/// no further borrowing is needed or it runs out of "digits" in x. The result
/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
/// In other words, if y > x then this function returns 1, otherwise 0.
/// @returns the borrow out of the subtraction
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index ca344b1dc058..15418ad2fd06 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -90,6 +90,7 @@ add_llvm_library(LLVMSupport
StringSaver.cpp
StringRef.cpp
SystemUtils.cpp
+ TarWriter.cpp
TargetParser.cpp
ThreadPool.cpp
Timer.cpp
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 49d0ed55a716..8a09589aa884 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -474,15 +474,25 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
break;
// Skylake:
- case 0x4e:
- *Type = INTEL_COREI7; // "skylake-avx512"
- *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
- break;
- case 0x5e:
+ case 0x4e: // Skylake mobile
+ case 0x5e: // Skylake desktop
+ case 0x8e: // Kaby Lake mobile
+ case 0x9e: // Kaby Lake desktop
*Type = INTEL_COREI7; // "skylake"
*Subtype = INTEL_COREI7_SKYLAKE;
break;
+ // Skylake Xeon:
+ case 0x55:
+ *Type = INTEL_COREI7;
+ // Check that we really have AVX512
+ if (Features & (1 << FEATURE_AVX512)) {
+ *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+ } else {
+ *Subtype = INTEL_COREI7_SKYLAKE; // "skylake"
+ }
+ break;
+
case 0x1c: // Most 45 nm Intel Atom processors
case 0x26: // 45 nm Atom Lincroft
case 0x27: // 32 nm Atom Medfield
diff --git a/lib/Support/TarWriter.cpp b/lib/Support/TarWriter.cpp
new file mode 100644
index 000000000000..5fc17d276377
--- /dev/null
+++ b/lib/Support/TarWriter.cpp
@@ -0,0 +1,166 @@
+//===-- TarWriter.cpp - Tar archive file creator --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TarWriter class provides a feature to create a tar archive file.
+//
+// I put emphasis on simplicity over comprehensiveness when implementing this
+// class because we don't need a full-fledged archive file generator in LLVM
+// at the moment.
+//
+// The filename field in the Unix V7 tar header is 100 bytes. Longer filenames
+// are stored using the PAX extension. The PAX header is standardized in
+// POSIX.1-2001.
+//
+// The struct definition of UstarHeader is copied from
+// https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TarWriter.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+// Each file in an archive must be aligned to this block size.
+static const int BlockSize = 512;
+
+struct UstarHeader {
+ char Name[100];
+ char Mode[8];
+ char Uid[8];
+ char Gid[8];
+ char Size[12];
+ char Mtime[12];
+ char Checksum[8];
+ char TypeFlag;
+ char Linkname[100];
+ char Magic[6];
+ char Version[2];
+ char Uname[32];
+ char Gname[32];
+ char DevMajor[8];
+ char DevMinor[8];
+ char Prefix[155];
+ char Pad[12];
+};
+static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header");
+
+// A PAX attribute is in the form of "<length> <key>=<value>\n"
+// where <length> is the length of the entire string including
+// the length field itself. An example string is this.
+//
+// 25 ctime=1084839148.1212\n
+//
+// This function create such string.
+static std::string formatPax(StringRef Key, StringRef Val) {
+ int Len = Key.size() + Val.size() + 3; // +3 for " ", "=" and "\n"
+
+ // We need to compute total size twice because appending
+ // a length field could change total size by one.
+ int Total = Len + Twine(Len).str().size();
+ Total = Len + Twine(Total).str().size();
+ return (Twine(Total) + " " + Key + "=" + Val + "\n").str();
+}
+
+// Headers in tar files must be aligned to 512 byte boundaries.
+// This function forwards the current file position to the next boundary.
+static void pad(raw_fd_ostream &OS) {
+ uint64_t Pos = OS.tell();
+ OS.seek(alignTo(Pos, BlockSize));
+}
+
+// Computes a checksum for a tar header.
+static void computeChecksum(UstarHeader &Hdr) {
+ // Before computing a checksum, checksum field must be
+ // filled with space characters.
+ memset(Hdr.Checksum, ' ', sizeof(Hdr.Checksum));
+
+ // Compute a checksum and set it to the checksum field.
+ unsigned Chksum = 0;
+ for (size_t I = 0; I < sizeof(Hdr); ++I)
+ Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I];
+ snprintf(Hdr.Checksum, sizeof(Hdr.Checksum), "%06o", Chksum);
+}
+
+// Create a tar header and write it to a given output stream.
+static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) {
+ // A PAX header consists of a 512-byte header followed
+ // by key-value strings. First, create key-value strings.
+ std::string PaxAttr = formatPax("path", Path);
+
+ // Create a 512-byte header.
+ UstarHeader Hdr = {};
+ snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", PaxAttr.size());
+ Hdr.TypeFlag = 'x'; // PAX magic
+ memcpy(Hdr.Magic, "ustar", 6); // Ustar magic
+ computeChecksum(Hdr);
+
+ // Write them down.
+ OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
+ OS << PaxAttr;
+ pad(OS);
+}
+
+// The PAX header is an extended format, so a PAX header needs
+// to be followed by a "real" header.
+static void writeUstarHeader(raw_fd_ostream &OS, StringRef Path, size_t Size) {
+ UstarHeader Hdr = {};
+ memcpy(Hdr.Name, Path.data(), Path.size());
+ memcpy(Hdr.Mode, "0000664", 8);
+ snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size);
+ memcpy(Hdr.Magic, "ustar", 6);
+ computeChecksum(Hdr);
+ OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
+}
+
+// We want to use '/' as a path separator even on Windows.
+// This function canonicalizes a given path.
+static std::string canonicalize(std::string S) {
+#ifdef LLVM_ON_WIN32
+ std::replace(S.begin(), S.end(), '\\', '/');
+#endif
+ return S;
+}
+
+// Creates a TarWriter instance and returns it.
+Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath,
+ StringRef BaseDir) {
+ int FD;
+ if (std::error_code EC = openFileForWrite(OutputPath, FD, sys::fs::F_None))
+ return make_error<StringError>("cannot open " + OutputPath, EC);
+ return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir));
+}
+
+TarWriter::TarWriter(int FD, StringRef BaseDir)
+ : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), BaseDir(BaseDir) {}
+
+// Append a given file to an archive.
+void TarWriter::append(StringRef Path, StringRef Data) {
+ // Write Path and Data.
+ std::string S = BaseDir + "/" + canonicalize(Path) + "\0";
+ if (S.size() <= sizeof(UstarHeader::Name)) {
+ writeUstarHeader(OS, S, Data.size());
+ } else {
+ writePaxHeader(OS, S);
+ writeUstarHeader(OS, "", Data.size());
+ }
+
+ OS << Data;
+ pad(OS);
+
+ // POSIX requires tar archives end with two null blocks.
+ // Here, we write the terminator and then seek back, so that
+ // the file being output is terminated correctly at any moment.
+ uint64_t Pos = OS.tell();
+ OS << std::string(BlockSize * 2, '\0');
+ OS.seek(Pos);
+ OS.flush();
+}
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 3750d7f4c09d..9752b70644c6 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -48,7 +48,7 @@
// _Unwind_Backtrace function, but on FreeBSD the configure test passes
// despite the function not existing, and on Android, <unwind.h> conflicts
// with <link.h>.
-#if defined(__GLIBC__) || defined(__APPLE__)
+#ifdef __GLIBC__
#include <unwind.h>
#else
#undef HAVE__UNWIND_BACKTRACE
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 7666011f75b6..17aafa0c3d6e 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -110,72 +110,34 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-collect-loh"
-static cl::opt<bool>
-PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden,
- cl::desc("Restrict analysis to registers invovled"
- " in LOHs"),
- cl::init(true));
-
-static cl::opt<bool>
-BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
- cl::desc("Restrict analysis at basic block scope"),
- cl::init(true));
-
STATISTIC(NumADRPSimpleCandidate,
"Number of simplifiable ADRP dominate by another");
-#ifndef NDEBUG
-STATISTIC(NumADRPComplexCandidate2,
- "Number of simplifiable ADRP reachable by 2 defs");
-STATISTIC(NumADRPComplexCandidate3,
- "Number of simplifiable ADRP reachable by 3 defs");
-STATISTIC(NumADRPComplexCandidateOther,
- "Number of simplifiable ADRP reachable by 4 or more defs");
-STATISTIC(NumADDToSTRWithImm,
- "Number of simplifiable STR with imm reachable by ADD");
-STATISTIC(NumLDRToSTRWithImm,
- "Number of simplifiable STR with imm reachable by LDR");
STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
-STATISTIC(NumADDToLDRWithImm,
- "Number of simplifiable LDR with imm reachable by ADD");
-STATISTIC(NumLDRToLDRWithImm,
- "Number of simplifiable LDR with imm reachable by LDR");
STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
-#endif // NDEBUG
STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
-#ifndef NDEBUG
-STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
-STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
-STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
-STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
-#endif // NDEBUG
STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
-STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
#define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
namespace {
+
struct AArch64CollectLOH : public MachineFunctionPass {
static char ID;
- AArch64CollectLOH() : MachineFunctionPass(ID) {
- initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry());
- }
+ AArch64CollectLOH() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -187,351 +149,57 @@ struct AArch64CollectLOH : public MachineFunctionPass {
StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineDominatorTree>();
+ AU.setPreservesAll();
}
-
-private:
};
-/// A set of MachineInstruction.
-typedef SetVector<const MachineInstr *> SetOfMachineInstr;
-/// Map a basic block to a set of instructions per register.
-/// This is used to represent the exposed uses of a basic block
-/// per register.
-typedef MapVector<const MachineBasicBlock *,
- std::unique_ptr<SetOfMachineInstr[]>>
-BlockToSetOfInstrsPerColor;
-/// Map a basic block to an instruction per register.
-/// This is used to represent the live-out definitions of a basic block
-/// per register.
-typedef MapVector<const MachineBasicBlock *,
- std::unique_ptr<const MachineInstr *[]>>
-BlockToInstrPerColor;
-/// Map an instruction to a set of instructions. Used to represent the
-/// mapping def to reachable uses or use to definitions.
-typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs;
-/// Map a basic block to a BitVector.
-/// This is used to record the kill registers per basic block.
-typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet;
-
-/// Map a register to a dense id.
-typedef DenseMap<unsigned, unsigned> MapRegToId;
-/// Map a dense id to a register. Used for debug purposes.
-typedef SmallVector<unsigned, 32> MapIdToReg;
-} // end anonymous namespace.
-
char AArch64CollectLOH::ID = 0;
-INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
- AARCH64_COLLECT_LOH_NAME, false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
- AARCH64_COLLECT_LOH_NAME, false, false)
-
-/// Given a couple (MBB, reg) get the corresponding set of instruction from
-/// the given "sets".
-/// If this couple does not reference any set, an empty set is added to "sets"
-/// for this couple and returned.
-/// \param nbRegs is used internally allocate some memory. It must be consistent
-/// with the way sets is used.
-static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
- const MachineBasicBlock &MBB, unsigned reg,
- unsigned nbRegs) {
- SetOfMachineInstr *result;
- BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
- if (it != sets.end())
- result = it->second.get();
- else
- result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get();
-
- return result[reg];
-}
-
-/// Given a couple (reg, MI) get the corresponding set of instructions from the
-/// the given "sets".
-/// This is used to get the uses record in sets of a definition identified by
-/// MI and reg, i.e., MI defines reg.
-/// If the couple does not reference anything, an empty set is added to
-/// "sets[reg]".
-/// \pre set[reg] is valid.
-static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
- const MachineInstr &MI) {
- return sets[reg][&MI];
-}
-
-/// Same as getUses but does not modify the input map: sets.
-/// \return NULL if the couple (reg, MI) is not in sets.
-static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
- const MachineInstr &MI) {
- InstrToInstrs::const_iterator Res = sets[reg].find(&MI);
- if (Res != sets[reg].end())
- return &(Res->second);
- return nullptr;
-}
-
-/// Initialize the reaching definition algorithm:
-/// For each basic block BB in MF, record:
-/// - its kill set.
-/// - its reachable uses (uses that are exposed to BB's predecessors).
-/// - its the generated definitions.
-/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to
-/// the list of uses of exposed defintions.
-/// \param ADRPMode specifies to only consider ADRP instructions for generated
-/// definition. It also consider definitions of ADRP instructions as uses and
-/// ignore other uses. The ADRPMode is used to collect the information for LHO
-/// that involve ADRP operation only.
-static void initReachingDef(const MachineFunction &MF,
- InstrToInstrs *ColorOpToReachedUses,
- BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
- BlockToSetOfInstrsPerColor &ReachableUses,
- const MapRegToId &RegToId,
- const MachineInstr *DummyOp, bool ADRPMode) {
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- unsigned NbReg = RegToId.size();
-
- for (const MachineBasicBlock &MBB : MF) {
- auto &BBGen = Gen[&MBB];
- BBGen = make_unique<const MachineInstr *[]>(NbReg);
- std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
-
- BitVector &BBKillSet = Kill[&MBB];
- BBKillSet.resize(NbReg);
- for (const MachineInstr &MI : MBB) {
- bool IsADRP = MI.getOpcode() == AArch64::ADRP;
-
- // Process uses first.
- if (IsADRP || !ADRPMode)
- for (const MachineOperand &MO : MI.operands()) {
- // Treat ADRP def as use, as the goal of the analysis is to find
- // ADRP defs reached by other ADRP defs.
- if (!MO.isReg() || (!ADRPMode && !MO.isUse()) ||
- (ADRPMode && (!IsADRP || !MO.isDef())))
- continue;
- unsigned CurReg = MO.getReg();
- MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
- if (ItCurRegId == RegToId.end())
- continue;
- CurReg = ItCurRegId->second;
-
- // if CurReg has not been defined, this use is reachable.
- if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
- getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI);
- // current basic block definition for this color, if any, is in Gen.
- if (BBGen[CurReg])
- getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI);
- }
-
- // Process clobbers.
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isRegMask())
- continue;
- // Clobbers kill the related colors.
- const uint32_t *PreservedRegs = MO.getRegMask();
-
- // Set generated regs.
- for (const auto &Entry : RegToId) {
- unsigned Reg = Entry.second;
- // Use the global register ID when querying APIs external to this
- // pass.
- if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) {
- // Do not register clobbered definition for no ADRP.
- // This definition is not used anyway (otherwise register
- // allocation is wrong).
- BBGen[Reg] = ADRPMode ? &MI : nullptr;
- BBKillSet.set(Reg);
- }
- }
- }
-
- // Process register defs.
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
- unsigned CurReg = MO.getReg();
- MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
- if (ItCurRegId == RegToId.end())
- continue;
-
- for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
- MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
- // If this alias has not been recorded, then it is not interesting
- // for the current analysis.
- // We can end up in this situation because of tuple registers.
- // E.g., Let say we are interested in S1. When we register
- // S1, we will also register its aliases and in particular
- // the tuple Q1_Q2.
- // Now, when we encounter Q1_Q2, we will look through its aliases
- // and will find that S2 is not registered.
- if (ItRegId == RegToId.end())
- continue;
-
- BBKillSet.set(ItRegId->second);
- BBGen[ItRegId->second] = &MI;
- }
- BBGen[ItCurRegId->second] = &MI;
- }
- }
-
- // If we restrict our analysis to basic block scope, conservatively add a
- // dummy
- // use for each generated value.
- if (!ADRPMode && DummyOp && !MBB.succ_empty())
- for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
- if (BBGen[CurReg])
- getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp);
- }
-}
-
-/// Reaching def core algorithm:
-/// while an Out has changed
-/// for each bb
-/// for each color
-/// In[bb][color] = U Out[bb.predecessors][color]
-/// insert reachableUses[bb][color] in each in[bb][color]
-/// op.reachedUses
-///
-/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-static void reachingDefAlgorithm(const MachineFunction &MF,
- InstrToInstrs *ColorOpToReachedUses,
- BlockToSetOfInstrsPerColor &In,
- BlockToSetOfInstrsPerColor &Out,
- BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
- BlockToSetOfInstrsPerColor &ReachableUses,
- unsigned NbReg) {
- bool HasChanged;
- do {
- HasChanged = false;
- for (const MachineBasicBlock &MBB : MF) {
- unsigned CurReg;
- for (CurReg = 0; CurReg < NbReg; ++CurReg) {
- SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
- SetOfMachineInstr &BBReachableUses =
- getSet(ReachableUses, MBB, CurReg, NbReg);
- SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
- unsigned Size = BBOutSet.size();
- // In[bb][color] = U Out[bb.predecessors][color]
- for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
- SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
- BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
- }
- // insert reachableUses[bb][color] in each in[bb][color] op.reachedses
- for (const MachineInstr *MI : BBInSet) {
- SetOfMachineInstr &OpReachedUses =
- getUses(ColorOpToReachedUses, CurReg, *MI);
- OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
- }
- // Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
- if (!Kill[&MBB].test(CurReg))
- BBOutSet.insert(BBInSet.begin(), BBInSet.end());
- if (Gen[&MBB][CurReg])
- BBOutSet.insert(Gen[&MBB][CurReg]);
- HasChanged |= BBOutSet.size() != Size;
- }
- }
- } while (HasChanged);
-}
-
-/// Reaching definition algorithm.
-/// \param MF function on which the algorithm will operate.
-/// \param[out] ColorOpToReachedUses will contain the result of the reaching
-/// def algorithm.
-/// \param ADRPMode specify whether the reaching def algorithm should be tuned
-/// for ADRP optimization. \see initReachingDef for more details.
-/// \param DummyOp if not NULL, the algorithm will work at
-/// basic block scope and will set for every exposed definition a use to
-/// @p DummyOp.
-/// \pre ColorOpToReachedUses is an array of at least number of registers of
-/// InstrToInstrs.
-static void reachingDef(const MachineFunction &MF,
- InstrToInstrs *ColorOpToReachedUses,
- const MapRegToId &RegToId, bool ADRPMode = false,
- const MachineInstr *DummyOp = nullptr) {
- // structures:
- // For each basic block.
- // Out: a set per color of definitions that reach the
- // out boundary of this block.
- // In: Same as Out but for in boundary.
- // Gen: generated color in this block (one operation per color).
- // Kill: register set of killed color in this block.
- // ReachableUses: a set per color of uses (operation) reachable
- // for "In" definitions.
- BlockToSetOfInstrsPerColor Out, In, ReachableUses;
- BlockToInstrPerColor Gen;
- BlockToRegSet Kill;
-
- // Initialize Gen, kill and reachableUses.
- initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId,
- DummyOp, ADRPMode);
-
- // Algo.
- if (!DummyOp)
- reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
- ReachableUses, RegToId.size());
-}
+} // end anonymous namespace.
-#ifndef NDEBUG
-/// print the result of the reaching definition algorithm.
-static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
- unsigned NbReg, const TargetRegisterInfo *TRI,
- const MapIdToReg &IdToReg) {
- unsigned CurReg;
- for (CurReg = 0; CurReg < NbReg; ++CurReg) {
- if (ColorOpToReachedUses[CurReg].empty())
- continue;
- DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
+INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh",
+ AARCH64_COLLECT_LOH_NAME, false, false)
- for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
- DEBUG(dbgs() << "Def:\n");
- DEBUG(DefsIt.first->print(dbgs()));
- DEBUG(dbgs() << "Reachable uses:\n");
- for (const MachineInstr *MI : DefsIt.second) {
- DEBUG(MI->print(dbgs()));
- }
- }
+static bool canAddBePartOfLOH(const MachineInstr &MI) {
+ // Check immediate to see if the immediate is an address.
+ switch (MI.getOperand(2).getType()) {
+ default:
+ return false;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return true;
}
}
-#endif // NDEBUG
/// Answer the following question: Can Def be one of the definition
/// involved in a part of a LOH?
-static bool canDefBePartOfLOH(const MachineInstr *Def) {
- unsigned Opc = Def->getOpcode();
+static bool canDefBePartOfLOH(const MachineInstr &MI) {
// Accept ADRP, ADDLow and LOADGot.
- switch (Opc) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::ADRP:
return true;
case AArch64::ADDXri:
- // Check immediate to see if the immediate is an address.
- switch (Def->getOperand(2).getType()) {
- default:
- return false;
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_JumpTableIndex:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_BlockAddress:
- return true;
- }
+ return canAddBePartOfLOH(MI);
case AArch64::LDRXui:
// Check immediate to see if the immediate is an address.
- switch (Def->getOperand(2).getType()) {
+ switch (MI.getOperand(2).getType()) {
default:
return false;
case MachineOperand::MO_GlobalAddress:
- return true;
+ return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT;
}
}
- // Unreachable.
- return false;
}
/// Check whether the given instruction can the end of a LOH chain involving a
/// store.
-static bool isCandidateStore(const MachineInstr *Instr) {
- switch (Instr->getOpcode()) {
+static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::STRBBui:
@@ -543,109 +211,19 @@ static bool isCandidateStore(const MachineInstr *Instr) {
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
+ // We can only optimize the index operand.
// In case we have str xA, [xA, #imm], this is two different uses
// of xA and we cannot fold, otherwise the xA stored may be wrong,
// even if #imm == 0.
- if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg())
- return true;
- }
- return false;
-}
-
-/// Given the result of a reaching definition algorithm in ColorOpToReachedUses,
-/// Build the Use to Defs information and filter out obvious non-LOH candidates.
-/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
-/// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
-/// i.e., no simple chain.
-/// \param ADRPMode -- \see initReachingDef.
-static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
- const InstrToInstrs *ColorOpToReachedUses,
- const MapRegToId &RegToId,
- bool ADRPMode = false) {
-
- SetOfMachineInstr NotCandidate;
- unsigned NbReg = RegToId.size();
- MapRegToId::const_iterator EndIt = RegToId.end();
- for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) {
- // If this color is never defined, continue.
- if (ColorOpToReachedUses[CurReg].empty())
- continue;
-
- for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
- for (const MachineInstr *MI : DefsIt.second) {
- const MachineInstr *Def = DefsIt.first;
- MapRegToId::const_iterator It;
- // if all the reaching defs are not adrp, this use will not be
- // simplifiable.
- if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) ||
- (!ADRPMode && !canDefBePartOfLOH(Def)) ||
- (!ADRPMode && isCandidateStore(MI) &&
- // store are LOH candidate iff the end of the chain is used as
- // base.
- ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt ||
- It->second != CurReg))) {
- NotCandidate.insert(MI);
- continue;
- }
- // Do not consider self reaching as a simplifiable case for ADRP.
- if (!ADRPMode || MI != DefsIt.first) {
- UseToReachingDefs[MI].insert(DefsIt.first);
- // If UsesIt has several reaching definitions, it is not
- // candidate for simplificaton in non-ADRPMode.
- if (!ADRPMode && UseToReachingDefs[MI].size() > 1)
- NotCandidate.insert(MI);
- }
- }
- }
- }
- for (const MachineInstr *Elem : NotCandidate) {
- DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n");
- // It would have been better if we could just remove the entry
- // from the map. Because of that, we have to filter the garbage
- // (second.empty) in the subsequence analysis.
- UseToReachingDefs[Elem].clear();
- }
-}
-
-/// Based on the use to defs information (in ADRPMode), compute the
-/// opportunities of LOH ADRP-related.
-static void computeADRP(const InstrToInstrs &UseToDefs,
- AArch64FunctionInfo &AArch64FI,
- const MachineDominatorTree *MDT) {
- DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
- for (const auto &Entry : UseToDefs) {
- unsigned Size = Entry.second.size();
- if (Size == 0)
- continue;
- if (Size == 1) {
- const MachineInstr *L2 = *Entry.second.begin();
- const MachineInstr *L1 = Entry.first;
- if (!MDT->dominates(L2, L1)) {
- DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
- << '\n');
- continue;
- }
- DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
- AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, {L2, L1});
- ++NumADRPSimpleCandidate;
- }
-#ifndef NDEBUG
- else if (Size == 2)
- ++NumADRPComplexCandidate2;
- else if (Size == 3)
- ++NumADRPComplexCandidate3;
- else
- ++NumADRPComplexCandidateOther;
-#endif
- // if Size < 1, the use should have been removed from the candidates
- assert(Size >= 1 && "No reaching defs for that use!");
+ return MI.getOperandNo(&MO) == 1 &&
+ MI.getOperand(0).getReg() != MI.getOperand(1).getReg();
}
}
/// Check whether the given instruction can be the end of a LOH chain
/// involving a load.
-static bool isCandidateLoad(const MachineInstr *Instr) {
- switch (Instr->getOpcode()) {
+static bool isCandidateLoad(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::LDRSBWui:
@@ -660,17 +238,13 @@ static bool isCandidateLoad(const MachineInstr *Instr) {
case AArch64::LDRSui:
case AArch64::LDRDui:
case AArch64::LDRQui:
- if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)
- return false;
- return true;
+ return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT);
}
- // Unreachable.
- return false;
}
/// Check whether the given instruction can load a litteral.
-static bool supportLoadFromLiteral(const MachineInstr *Instr) {
- switch (Instr->getOpcode()) {
+static bool supportLoadFromLiteral(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::LDRSWui:
@@ -681,353 +255,233 @@ static bool supportLoadFromLiteral(const MachineInstr *Instr) {
case AArch64::LDRQui:
return true;
}
- // Unreachable.
- return false;
}
-/// Check whether the given instruction is a LOH candidate.
-/// \param UseToDefs is used to check that Instr is at the end of LOH supported
-/// chain.
-/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are
-/// already been filtered out.
-static bool isCandidate(const MachineInstr *Instr,
- const InstrToInstrs &UseToDefs,
- const MachineDominatorTree *MDT) {
- if (!isCandidateLoad(Instr) && !isCandidateStore(Instr))
- return false;
+/// Number of GPR registers traked by mapRegToGPRIndex()
+static const unsigned N_GPR_REGS = 31;
+/// Map register number to index from 0-30.
+static int mapRegToGPRIndex(MCPhysReg Reg) {
+ static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs");
+ static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs");
+ if (AArch64::X0 <= Reg && Reg <= AArch64::X28)
+ return Reg - AArch64::X0;
+ if (AArch64::W0 <= Reg && Reg <= AArch64::W30)
+ return Reg - AArch64::W0;
+ // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to
+ // handle them as special cases.
+ if (Reg == AArch64::FP)
+ return 29;
+ if (Reg == AArch64::LR)
+ return 30;
+ return -1;
+}
- const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
- if (Def->getOpcode() != AArch64::ADRP) {
- // At this point, Def is ADDXri or LDRXui of the right type of
- // symbol, because we filtered out the uses that were not defined
- // by these kind of instructions (+ ADRP).
+/// State tracked per register.
+/// The main algorithm walks backwards over a basic block maintaining this
+/// datastructure for each tracked general purpose register.
+struct LOHInfo {
+ MCLOHType Type : 8; ///< "Best" type of LOH possible.
+ bool IsCandidate : 1; ///< Possible LOH candidate.
+ bool OneUser : 1; ///< Found exactly one user (yet).
+ bool MultiUsers : 1; ///< Found multiple users.
+ const MachineInstr *MI0; ///< First instruction involved in the LOH.
+ const MachineInstr *MI1; ///< Second instruction involved in the LOH
+ /// (if any).
+ const MachineInstr *LastADRP; ///< Last ADRP in same register.
+};
- // Check if this forms a simple chain: each intermediate node must
- // dominates the next one.
- if (!MDT->dominates(Def, Instr))
- return false;
- // Move one node up in the simple chain.
- if (UseToDefs.find(Def) ==
- UseToDefs.end()
- // The map may contain garbage we have to ignore.
- ||
- UseToDefs.find(Def)->second.empty())
- return false;
- Instr = Def;
- Def = *UseToDefs.find(Def)->second.begin();
+/// Update state \p Info given \p MI uses the tracked register.
+static void handleUse(const MachineInstr &MI, const MachineOperand &MO,
+ LOHInfo &Info) {
+ // We have multiple uses if we already found one before.
+ if (Info.MultiUsers || Info.OneUser) {
+ Info.IsCandidate = false;
+ Info.MultiUsers = true;
+ return;
}
- // Check if we reached the top of the simple chain:
- // - top is ADRP.
- // - check the simple chain property: each intermediate node must
- // dominates the next one.
- if (Def->getOpcode() == AArch64::ADRP)
- return MDT->dominates(Def, Instr);
- return false;
-}
-
-static bool registerADRCandidate(const MachineInstr &Use,
- const InstrToInstrs &UseToDefs,
- const InstrToInstrs *DefsPerColorToUses,
- AArch64FunctionInfo &AArch64FI,
- SetOfMachineInstr *InvolvedInLOHs,
- const MapRegToId &RegToId) {
- // Look for opportunities to turn ADRP -> ADD or
- // ADRP -> LDR GOTPAGEOFF into ADR.
- // If ADRP has more than one use. Give up.
- if (Use.getOpcode() != AArch64::ADDXri &&
- (Use.getOpcode() != AArch64::LDRXui ||
- !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
- return false;
- InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
- // The map may contain garbage that we need to ignore.
- if (It == UseToDefs.end() || It->second.empty())
- return false;
- const MachineInstr &Def = **It->second.begin();
- if (Def.getOpcode() != AArch64::ADRP)
- return false;
- // Check the number of users of ADRP.
- const SetOfMachineInstr *Users =
- getUses(DefsPerColorToUses,
- RegToId.find(Def.getOperand(0).getReg())->second, Def);
- if (Users->size() > 1) {
- ++NumADRComplexCandidate;
- return false;
+ Info.OneUser = true;
+
+ // Start new LOHInfo if applicable.
+ if (isCandidateLoad(MI)) {
+ Info.Type = MCLOH_AdrpLdr;
+ Info.IsCandidate = true;
+ Info.MI0 = &MI;
+ // Note that even this is AdrpLdr now, we can switch to a Ldr variant
+ // later.
+ } else if (isCandidateStore(MI, MO)) {
+ Info.Type = MCLOH_AdrpAddStr;
+ Info.IsCandidate = true;
+ Info.MI0 = &MI;
+ Info.MI1 = nullptr;
+ } else if (MI.getOpcode() == AArch64::ADDXri) {
+ Info.Type = MCLOH_AdrpAdd;
+ Info.IsCandidate = true;
+ Info.MI0 = &MI;
+ } else if (MI.getOpcode() == AArch64::LDRXui &&
+ MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) {
+ Info.Type = MCLOH_AdrpLdrGot;
+ Info.IsCandidate = true;
+ Info.MI0 = &MI;
}
- ++NumADRSimpleCandidate;
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) &&
- "ADRP already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) &&
- "ADD already involved in LOH.");
- DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n');
-
- AArch64FI.addLOHDirective(
- Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd : MCLOH_AdrpLdrGot,
- {&Def, &Use});
- return true;
}
-/// Based on the use to defs information (in non-ADRPMode), compute the
-/// opportunities of LOH non-ADRP-related
-static void computeOthers(const InstrToInstrs &UseToDefs,
- const InstrToInstrs *DefsPerColorToUses,
- AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
- const MachineDominatorTree *MDT) {
- SetOfMachineInstr *InvolvedInLOHs = nullptr;
-#ifndef NDEBUG
- SetOfMachineInstr InvolvedInLOHsStorage;
- InvolvedInLOHs = &InvolvedInLOHsStorage;
-#endif // NDEBUG
- DEBUG(dbgs() << "*** Compute LOH for Others\n");
- // ADRP -> ADD/LDR -> LDR/STR pattern.
- // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
+/// Update state \p Info given the tracked register is clobbered.
+static void handleClobber(LOHInfo &Info) {
+ Info.IsCandidate = false;
+ Info.OneUser = false;
+ Info.MultiUsers = false;
+ Info.LastADRP = nullptr;
+}
- // FIXME: When the statistics are not important,
- // This initial filtering loop can be merged into the next loop.
- // Currently, we didn't do it to have the same code for both DEBUG and
- // NDEBUG builds. Indeed, the iterator of the second loop would need
- // to be changed.
- SetOfMachineInstr PotentialCandidates;
- SetOfMachineInstr PotentialADROpportunities;
- for (auto &Use : UseToDefs) {
- // If no definition is available, this is a non candidate.
- if (Use.second.empty())
- continue;
- // Keep only instructions that are load or store and at the end of
- // a ADRP -> ADD/LDR/Nothing chain.
- // We already filtered out the no-chain cases.
- if (!isCandidate(Use.first, UseToDefs, MDT)) {
- PotentialADROpportunities.insert(Use.first);
- continue;
+/// Update state \p Info given that \p MI is possibly the middle instruction
+/// of an LOH involving 3 instructions.
+static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo,
+ LOHInfo &OpInfo) {
+ if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser))
+ return false;
+ // Copy LOHInfo for dest register to LOHInfo for source register.
+ if (&DefInfo != &OpInfo) {
+ OpInfo = DefInfo;
+ // Invalidate \p DefInfo because we track it in \p OpInfo now.
+ handleClobber(DefInfo);
+ } else
+ DefInfo.LastADRP = nullptr;
+
+ // Advance state machine.
+ assert(OpInfo.IsCandidate && "Expect valid state");
+ if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) {
+ if (OpInfo.Type == MCLOH_AdrpLdr) {
+ OpInfo.Type = MCLOH_AdrpAddLdr;
+ OpInfo.IsCandidate = true;
+ OpInfo.MI1 = &MI;
+ return true;
+ } else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
+ OpInfo.Type = MCLOH_AdrpAddStr;
+ OpInfo.IsCandidate = true;
+ OpInfo.MI1 = &MI;
+ return true;
}
- PotentialCandidates.insert(Use.first);
- }
-
- // Make the following distinctions for statistics as the linker does
- // know how to decode instructions:
- // - ADD/LDR/Nothing make there different patterns.
- // - LDR/STR make two different patterns.
- // Hence, 6 - 1 base patterns.
- // (because ADRP-> Nothing -> STR is not simplifiable)
-
- // The linker is only able to have a simple semantic, i.e., if pattern A
- // do B.
- // However, we want to see the opportunity we may miss if we were able to
- // catch more complex cases.
-
- // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
- // A potential candidate becomes a candidate, if its current immediate
- // operand is zero and all nodes of the chain have respectively only one user
-#ifndef NDEBUG
- SetOfMachineInstr DefsOfPotentialCandidates;
-#endif
- for (const MachineInstr *Candidate : PotentialCandidates) {
- // Get the definition of the candidate i.e., ADD or LDR.
- const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
- // Record the elements of the chain.
- const MachineInstr *L1 = Def;
- const MachineInstr *L2 = nullptr;
- unsigned ImmediateDefOpc = Def->getOpcode();
- if (Def->getOpcode() != AArch64::ADRP) {
- // Check the number of users of this node.
- const SetOfMachineInstr *Users =
- getUses(DefsPerColorToUses,
- RegToId.find(Def->getOperand(0).getReg())->second, *Def);
- if (Users->size() > 1) {
-#ifndef NDEBUG
- // if all the uses of this def are in potential candidate, this is
- // a complex candidate of level 2.
- bool IsLevel2 = true;
- for (const MachineInstr *MI : *Users) {
- if (!PotentialCandidates.count(MI)) {
- ++NumTooCplxLvl2;
- IsLevel2 = false;
- break;
- }
- }
- if (IsLevel2)
- ++NumCplxLvl2;
-#endif // NDEBUG
- PotentialADROpportunities.insert(Def);
- continue;
- }
- L2 = Def;
- Def = *UseToDefs.find(Def)->second.begin();
- L1 = Def;
- } // else the element in the middle of the chain is nothing, thus
- // Def already contains the first element of the chain.
-
- // Check the number of users of the first node in the chain, i.e., ADRP
- const SetOfMachineInstr *Users =
- getUses(DefsPerColorToUses,
- RegToId.find(Def->getOperand(0).getReg())->second, *Def);
- if (Users->size() > 1) {
-#ifndef NDEBUG
- // if all the uses of this def are in the defs of the potential candidate,
- // this is a complex candidate of level 1
- if (DefsOfPotentialCandidates.empty()) {
- // lazy init
- DefsOfPotentialCandidates = PotentialCandidates;
- for (const MachineInstr *Candidate : PotentialCandidates) {
- if (!UseToDefs.find(Candidate)->second.empty())
- DefsOfPotentialCandidates.insert(
- *UseToDefs.find(Candidate)->second.begin());
- }
- }
- bool Found = false;
- for (auto &Use : *Users) {
- if (!DefsOfPotentialCandidates.count(Use)) {
- ++NumTooCplxLvl1;
- Found = true;
- break;
- }
- }
- if (!Found)
- ++NumCplxLvl1;
-#endif // NDEBUG
- continue;
+ } else {
+ assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui");
+ assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) &&
+ "Expected GOT relocation");
+ if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
+ OpInfo.Type = MCLOH_AdrpLdrGotStr;
+ OpInfo.IsCandidate = true;
+ OpInfo.MI1 = &MI;
+ return true;
+ } else if (OpInfo.Type == MCLOH_AdrpLdr) {
+ OpInfo.Type = MCLOH_AdrpLdrGotLdr;
+ OpInfo.IsCandidate = true;
+ OpInfo.MI1 = &MI;
+ return true;
}
+ }
+ return false;
+}
- bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
- // If the chain is three instructions long and ldr is the second element,
- // then this ldr must load form GOT, otherwise this is not a correct chain.
- if (L2 && !IsL2Add &&
- !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
- continue;
- SmallVector<const MachineInstr *, 3> Args;
- MCLOHType Kind;
- if (isCandidateLoad(Candidate)) {
- if (!L2) {
- // At this point, the candidate LOH indicates that the ldr instruction
- // may use a direct access to the symbol. There is not such encoding
- // for loads of byte and half.
- if (!supportLoadFromLiteral(Candidate))
- continue;
+/// Update state when seeing and ADRP instruction.
+static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI,
+ LOHInfo &Info) {
+ if (Info.LastADRP != nullptr) {
+ DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n" << '\t' << MI << '\t'
+ << *Info.LastADRP);
+ AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP});
+ ++NumADRPSimpleCandidate;
+ }
- DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
- << '\n');
- Kind = MCLOH_AdrpLdr;
- Args.push_back(L1);
- Args.push_back(Candidate);
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
- "L1 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
- "Candidate already involved in LOH.");
+ // Produce LOH directive if possible.
+ if (Info.IsCandidate) {
+ switch (Info.Type) {
+ case MCLOH_AdrpAdd:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n" << '\t' << MI << '\t'
+ << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0});
+ ++NumADRSimpleCandidate;
+ break;
+ case MCLOH_AdrpLdr:
+ if (supportLoadFromLiteral(*Info.MI0)) {
+ DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n" << '\t' << MI << '\t'
+ << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0});
++NumADRPToLDR;
- } else {
- DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
- << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
- << '\n');
-
- Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
- Args.push_back(L1);
- Args.push_back(L2);
- Args.push_back(Candidate);
-
- PotentialADROpportunities.remove(L2);
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
- "L1 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
- "L2 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
- "Candidate already involved in LOH.");
-#ifndef NDEBUG
- // get the immediate of the load
- if (Candidate->getOperand(2).getImm() == 0)
- if (ImmediateDefOpc == AArch64::ADDXri)
- ++NumADDToLDR;
- else
- ++NumLDRToLDR;
- else if (ImmediateDefOpc == AArch64::ADDXri)
- ++NumADDToLDRWithImm;
- else
- ++NumLDRToLDRWithImm;
-#endif // NDEBUG
}
- } else {
- if (ImmediateDefOpc == AArch64::ADRP)
- continue;
- else {
-
- DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
- << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
- << '\n');
-
- Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
- Args.push_back(L1);
- Args.push_back(L2);
- Args.push_back(Candidate);
-
- PotentialADROpportunities.remove(L2);
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
- "L1 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
- "L2 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
- "Candidate already involved in LOH.");
-#ifndef NDEBUG
- // get the immediate of the store
- if (Candidate->getOperand(2).getImm() == 0)
- if (ImmediateDefOpc == AArch64::ADDXri)
- ++NumADDToSTR;
- else
- ++NumLDRToSTR;
- else if (ImmediateDefOpc == AArch64::ADDXri)
- ++NumADDToSTRWithImm;
- else
- ++NumLDRToSTRWithImm;
-#endif // DEBUG
+ break;
+ case MCLOH_AdrpAddLdr:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" << '\t' << MI << '\t'
+ << *Info.MI1 << '\t' << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0});
+ ++NumADDToLDR;
+ break;
+ case MCLOH_AdrpAddStr:
+ if (Info.MI1 != nullptr) {
+ DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" << '\t' << MI << '\t'
+ << *Info.MI1 << '\t' << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0});
+ ++NumADDToSTR;
}
+ break;
+ case MCLOH_AdrpLdrGotLdr:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n" << '\t' << MI << '\t'
+ << *Info.MI1 << '\t' << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0});
+ ++NumLDRToLDR;
+ break;
+ case MCLOH_AdrpLdrGotStr:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n" << '\t' << MI << '\t'
+ << *Info.MI1 << '\t' << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0});
+ ++NumLDRToSTR;
+ break;
+ case MCLOH_AdrpLdrGot:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n" << '\t' << MI << '\t'
+ << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0});
+ break;
+ case MCLOH_AdrpAdrp:
+ llvm_unreachable("MCLOH_AdrpAdrp not used in state machine");
}
- AArch64FI.addLOHDirective(Kind, Args);
}
- // Now, we grabbed all the big patterns, check ADR opportunities.
- for (const MachineInstr *Candidate : PotentialADROpportunities)
- registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
- InvolvedInLOHs, RegToId);
+ handleClobber(Info);
+ Info.LastADRP = &MI;
}
-/// Look for every register defined by potential LOHs candidates.
-/// Map these registers with dense id in @p RegToId and vice-versa in
-/// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
-static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
- MapIdToReg &IdToReg,
- const TargetRegisterInfo *TRI) {
- unsigned CurRegId = 0;
- if (!PreCollectRegister) {
- unsigned NbReg = TRI->getNumRegs();
- for (; CurRegId < NbReg; ++CurRegId) {
- RegToId[CurRegId] = CurRegId;
- DEBUG(IdToReg.push_back(CurRegId));
- DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches"));
- }
+static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg,
+ LOHInfo *LOHInfos) {
+ if (!MachineOperand::clobbersPhysReg(RegMask, Reg))
return;
- }
-
- DEBUG(dbgs() << "** Collect Involved Register\n");
- for (const auto &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
- if (!canDefBePartOfLOH(&MI) &&
- !isCandidateLoad(&MI) && !isCandidateStore(&MI))
- continue;
+ int Idx = mapRegToGPRIndex(Reg);
+ if (Idx >= 0)
+ handleClobber(LOHInfos[Idx]);
+}
- // Process defs
- for (MachineInstr::const_mop_iterator IO = MI.operands_begin(),
- IOEnd = MI.operands_end();
- IO != IOEnd; ++IO) {
- if (!IO->isReg() || !IO->isDef())
- continue;
- unsigned CurReg = IO->getReg();
- for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI)
- if (RegToId.find(*AI) == RegToId.end()) {
- DEBUG(IdToReg.push_back(*AI);
- assert(IdToReg[CurRegId] == *AI &&
- "Reg index mismatches insertion index."));
- RegToId[*AI] = CurRegId++;
- DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n');
- }
- }
+static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {
+ // Handle defs and regmasks.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask()) {
+ const uint32_t *RegMask = MO.getRegMask();
+ for (MCPhysReg Reg : AArch64::GPR32RegClass)
+ handleRegMaskClobber(RegMask, Reg, LOHInfos);
+ for (MCPhysReg Reg : AArch64::GPR64RegClass)
+ handleRegMaskClobber(RegMask, Reg, LOHInfos);
+ continue;
}
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ int Idx = mapRegToGPRIndex(MO.getReg());
+ if (Idx < 0)
+ continue;
+ handleClobber(LOHInfos[Idx]);
+ }
+ // Handle uses.
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ int Idx = mapRegToGPRIndex(MO.getReg());
+ if (Idx < 0)
+ continue;
+ handleUse(MI, MO, LOHInfos[Idx]);
}
}
@@ -1035,74 +489,59 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
-
- MapRegToId RegToId;
- MapIdToReg IdToReg;
- AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>();
- assert(AArch64FI && "No MachineFunctionInfo for this function!");
-
- DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n');
+ DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n"
+ << "Looking in function " << MF.getName() << '\n');
- collectInvolvedReg(MF, RegToId, IdToReg, TRI);
- if (RegToId.empty())
- return false;
+ LOHInfo LOHInfos[N_GPR_REGS];
+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+ for (const MachineBasicBlock &MBB : MF) {
+ // Reset register tracking state.
+ memset(LOHInfos, 0, sizeof(LOHInfos));
+ // Live-out registers are used.
+ for (const MachineBasicBlock *Succ : MBB.successors()) {
+ for (const auto &LI : Succ->liveins()) {
+ int RegIdx = mapRegToGPRIndex(LI.PhysReg);
+ if (RegIdx >= 0)
+ LOHInfos[RegIdx].OneUser = true;
+ }
+ }
- MachineInstr *DummyOp = nullptr;
- if (BasicBlockScopeOnly) {
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- // For local analysis, create a dummy operation to record uses that are not
- // local.
- DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
+ // Walk the basic block backwards and update the per register state machine
+ // in the process.
+ for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case AArch64::ADDXri:
+ case AArch64::LDRXui:
+ if (canDefBePartOfLOH(MI)) {
+ const MachineOperand &Def = MI.getOperand(0);
+ const MachineOperand &Op = MI.getOperand(1);
+ assert(Def.isReg() && Def.isDef() && "Expected reg def");
+ assert(Op.isReg() && Op.isUse() && "Expected reg use");
+ int DefIdx = mapRegToGPRIndex(Def.getReg());
+ int OpIdx = mapRegToGPRIndex(Op.getReg());
+ if (DefIdx >= 0 && OpIdx >= 0 &&
+ handleMiddleInst(MI, LOHInfos[DefIdx], LOHInfos[OpIdx]))
+ continue;
+ }
+ break;
+ case AArch64::ADRP:
+ const MachineOperand &Op0 = MI.getOperand(0);
+ int Idx = mapRegToGPRIndex(Op0.getReg());
+ if (Idx >= 0) {
+ handleADRP(MI, AFI, LOHInfos[Idx]);
+ continue;
+ }
+ break;
+ }
+ handleNormalInst(MI, LOHInfos);
+ }
}
- unsigned NbReg = RegToId.size();
- bool Modified = false;
-
- // Start with ADRP.
- InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg];
-
- // Compute the reaching def in ADRP mode, meaning ADRP definitions
- // are first considered as uses.
- reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp);
- DEBUG(dbgs() << "ADRP reaching defs\n");
- DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
-
- // Translate the definition to uses map into a use to definitions map to ease
- // statistic computation.
- InstrToInstrs ADRPToReachingDefs;
- reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
-
- // Compute LOH for ADRP.
- computeADRP(ADRPToReachingDefs, *AArch64FI, MDT);
- delete[] ColorOpToReachedUses;
-
- // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
- ColorOpToReachedUses = new InstrToInstrs[NbReg];
-
- // first perform a regular reaching def analysis.
- reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp);
- DEBUG(dbgs() << "All reaching defs\n");
- DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
-
- // Turn that into a use to defs to ease statistic computation.
- InstrToInstrs UsesToReachingDefs;
- reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
-
- // Compute other than AdrpAdrp LOH.
- computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId,
- MDT);
- delete[] ColorOpToReachedUses;
-
- if (BasicBlockScopeOnly)
- MF.DeleteMachineInstr(DummyOp);
-
- return Modified;
+ // Return "no change": The pass only collects information.
+ return false;
}
-/// createAArch64CollectLOHPass - returns an instance of the Statistic for
-/// linker optimization pass.
FunctionPass *llvm::createAArch64CollectLOHPass() {
return new AArch64CollectLOH();
}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4c98253878e4..74a01835171b 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11,28 +11,79 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64ISelLowering.h"
#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64ISelLowering.h"
#include "AArch64PerfectShuffle.h"
+#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
-#include "AArch64TargetMachine.h"
-#include "AArch64TargetObjectFile.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <bitset>
+#include <cassert>
+#include <cctype>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "aarch64-lower"
@@ -59,7 +110,6 @@ static const MVT MVT_CC = MVT::i32;
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
-
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
// we have to make something up. Arbitrarily, choose ZeroOrOne.
setBooleanContents(ZeroOrOneBooleanContent);
@@ -218,7 +268,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
@@ -3632,6 +3681,7 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
llvm_unreachable("Unexpected platform trying to use TLS");
}
+
SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
@@ -4549,7 +4599,6 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
return DAG.getMergeValues(Ops, dl);
}
-
/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
/// i64 values and take a 2 x i64 value to shift plus a shift amount.
SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
@@ -5074,10 +5123,11 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
int WindowBase;
int WindowScale;
- bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
ShuffleSourceInfo(SDValue Vec)
- : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
- WindowScale(1) {}
+ : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
+ ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
+
+ bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
@@ -7028,7 +7078,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
case Intrinsic::aarch64_ldaxp:
- case Intrinsic::aarch64_ldxp: {
+ case Intrinsic::aarch64_ldxp:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(0);
@@ -7038,9 +7088,8 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.readMem = true;
Info.writeMem = false;
return true;
- }
case Intrinsic::aarch64_stlxp:
- case Intrinsic::aarch64_stxp: {
+ case Intrinsic::aarch64_stxp:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(2);
@@ -7050,7 +7099,6 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.readMem = false;
Info.writeMem = true;
return true;
- }
default:
break;
}
@@ -8044,13 +8092,13 @@ static SDValue tryCombineToEXTR(SDNode *N,
SDValue LHS;
uint32_t ShiftLHS = 0;
- bool LHSFromHi = 0;
+ bool LHSFromHi = false;
if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
return SDValue();
SDValue RHS;
uint32_t ShiftRHS = 0;
- bool RHSFromHi = 0;
+ bool RHSFromHi = false;
if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
return SDValue();
@@ -9732,52 +9780,51 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,
switch(CC) {
case AArch64CC::LE:
- case AArch64CC::GT: {
+ case AArch64CC::GT:
if ((AddConstant == 0) ||
(CompConstant == MaxUInt - 1 && AddConstant < 0) ||
(AddConstant >= 0 && CompConstant < 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
return true;
- } break;
+ break;
case AArch64CC::LT:
- case AArch64CC::GE: {
+ case AArch64CC::GE:
if ((AddConstant == 0) ||
(AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
return true;
- } break;
+ break;
case AArch64CC::HI:
- case AArch64CC::LS: {
+ case AArch64CC::LS:
if ((AddConstant >= 0 && CompConstant < 0) ||
(AddConstant <= 0 && CompConstant >= -1 &&
CompConstant < AddConstant + MaxUInt))
return true;
- } break;
+ break;
case AArch64CC::PL:
- case AArch64CC::MI: {
+ case AArch64CC::MI:
if ((AddConstant == 0) ||
(AddConstant > 0 && CompConstant <= 0) ||
(AddConstant < 0 && CompConstant <= AddConstant))
return true;
- } break;
+ break;
case AArch64CC::LO:
- case AArch64CC::HS: {
+ case AArch64CC::HS:
if ((AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant >= 0 &&
CompConstant <= AddConstant + MaxUInt))
return true;
- } break;
+ break;
case AArch64CC::EQ:
- case AArch64CC::NE: {
+ case AArch64CC::NE:
if ((AddConstant > 0 && CompConstant < 0) ||
(AddConstant < 0 && CompConstant >= 0 &&
CompConstant < AddConstant + MaxUInt) ||
(AddConstant >= 0 && CompConstant >= 0 &&
CompConstant >= AddConstant) ||
(AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
-
return true;
- } break;
+ break;
case AArch64CC::VS:
case AArch64CC::VC:
case AArch64CC::AL:
@@ -10501,7 +10548,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
if (ValTy->getPrimitiveSizeInBits() == 128) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
- Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int);
+ Function *Ldxr = Intrinsic::getDeclaration(M, Int);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
@@ -10517,7 +10564,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
- Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+ Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
return Builder.CreateTruncOrBitCast(
Builder.CreateCall(Ldxr, Addr),
@@ -10527,8 +10574,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
IRBuilder<> &Builder) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Builder.CreateCall(
- llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
+ Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
}
Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 626c934f236e..5c8acba26aab 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -14,16 +14,37 @@
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include <algorithm>
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
using namespace llvm;
@@ -529,19 +550,19 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
default:
llvm_unreachable("Unknown branch opcode in Cond");
case AArch64::CBZW:
- Is64Bit = 0;
+ Is64Bit = false;
CC = AArch64CC::EQ;
break;
case AArch64::CBZX:
- Is64Bit = 1;
+ Is64Bit = true;
CC = AArch64CC::EQ;
break;
case AArch64::CBNZW:
- Is64Bit = 0;
+ Is64Bit = false;
CC = AArch64CC::NE;
break;
case AArch64::CBNZX:
- Is64Bit = 1;
+ Is64Bit = true;
CC = AArch64CC::NE;
break;
}
@@ -1044,7 +1065,7 @@ static unsigned sForm(MachineInstr &Instr) {
case AArch64::SUBSWri:
case AArch64::SUBSXrr:
case AArch64::SUBSXri:
- return Instr.getOpcode();;
+ return Instr.getOpcode();
case AArch64::ADDWrr: return AArch64::ADDSWrr;
case AArch64::ADDWri: return AArch64::ADDSWri;
@@ -1072,12 +1093,15 @@ static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
}
namespace {
+
struct UsedNZCV {
- bool N;
- bool Z;
- bool C;
- bool V;
- UsedNZCV(): N(false), Z(false), C(false), V(false) {}
+ bool N = false;
+ bool Z = false;
+ bool C = false;
+ bool V = false;
+
+ UsedNZCV() = default;
+
UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
this->N |= UsedFlags.N;
this->Z |= UsedFlags.Z;
@@ -1086,6 +1110,7 @@ struct UsedNZCV {
return *this;
}
};
+
} // end anonymous namespace
/// Find a condition code used by the instruction.
@@ -1561,7 +1586,7 @@ bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
/// Check all MachineMemOperands for a hint to suppress pairing.
bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
- return any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+ return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
return MMO->getFlags() & MOSuppressPair;
});
}
@@ -1994,7 +2019,7 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
void AArch64InstrInfo::copyPhysRegTuple(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
- llvm::ArrayRef<unsigned> Indices) const {
+ ArrayRef<unsigned> Indices) const {
assert(Subtarget.hasNEON() &&
"Unexpected register copy without NEON");
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2583,7 +2608,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
//
// <rdar://problem/11522048>
//
- if (MI.isCopy()) {
+ if (MI.isFullCopy()) {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
if (SrcReg == AArch64::SP &&
@@ -2598,7 +2623,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
}
}
- // Handle the case where a copy is being spilled or refilled but the source
+ // Handle the case where a copy is being spilled or filled but the source
// and destination register class don't match. For example:
//
// %vreg0<def> = COPY %XZR; GPR64common:%vreg0
@@ -2613,7 +2638,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
//
// %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
//
- // will be refilled as
+ // will be filled as
//
// LDRDui %vreg0, fi<#0>
//
@@ -2622,9 +2647,11 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
// LDRXui %vregTemp, fi<#0>
// %vreg0 = FMOV %vregTemp
//
- if (MI.isFullCopy() && Ops.size() == 1 &&
+ if (MI.isCopy() && Ops.size() == 1 &&
// Make sure we're only folding the explicit COPY defs/uses.
(Ops[0] == 0 || Ops[0] == 1)) {
+ bool IsSpill = Ops[0] == 0;
+ bool IsFill = !IsSpill;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
MachineBasicBlock &MBB = *MI.getParent();
@@ -2632,21 +2659,112 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
const MachineOperand &SrcMO = MI.getOperand(1);
unsigned DstReg = DstMO.getReg();
unsigned SrcReg = SrcMO.getReg();
+ // This is slightly expensive to compute for physical regs since
+ // getMinimalPhysRegClass is slow.
auto getRegClass = [&](unsigned Reg) {
return TargetRegisterInfo::isVirtualRegister(Reg)
? MRI.getRegClass(Reg)
: TRI.getMinimalPhysRegClass(Reg);
};
- const TargetRegisterClass &DstRC = *getRegClass(DstReg);
- const TargetRegisterClass &SrcRC = *getRegClass(SrcReg);
- if (DstRC.getSize() == SrcRC.getSize()) {
- if (Ops[0] == 0)
+
+ if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
+ assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() &&
+ "Mismatched register size in non subreg COPY");
+ if (IsSpill)
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
- &SrcRC, &TRI);
+ getRegClass(SrcReg), &TRI);
else
- loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI);
+ loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
+ getRegClass(DstReg), &TRI);
return &*--InsertPt;
}
+
+ // Handle cases like spilling def of:
+ //
+ // %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
+ //
+ // where the physical register source can be widened and stored to the full
+ // virtual reg destination stack slot, in this case producing:
+ //
+ // STRXui %XZR, <fi#0>
+ //
+ if (IsSpill && DstMO.isUndef() &&
+ TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ assert(SrcMO.getSubReg() == 0 &&
+ "Unexpected subreg on physical register");
+ const TargetRegisterClass *SpillRC;
+ unsigned SpillSubreg;
+ switch (DstMO.getSubReg()) {
+ default:
+ SpillRC = nullptr;
+ break;
+ case AArch64::sub_32:
+ case AArch64::ssub:
+ if (AArch64::GPR32RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::GPR64RegClass;
+ SpillSubreg = AArch64::sub_32;
+ } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::FPR64RegClass;
+ SpillSubreg = AArch64::ssub;
+ } else
+ SpillRC = nullptr;
+ break;
+ case AArch64::dsub:
+ if (AArch64::FPR64RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::FPR128RegClass;
+ SpillSubreg = AArch64::dsub;
+ } else
+ SpillRC = nullptr;
+ break;
+ }
+
+ if (SpillRC)
+ if (unsigned WidenedSrcReg =
+ TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
+ storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
+ FrameIndex, SpillRC, &TRI);
+ return &*--InsertPt;
+ }
+ }
+
+ // Handle cases like filling use of:
+ //
+ // %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
+ //
+ // where we can load the full virtual reg source stack slot, into the subreg
+ // destination, in this case producing:
+ //
+ // LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
+ //
+ if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
+ const TargetRegisterClass *FillRC;
+ switch (DstMO.getSubReg()) {
+ default:
+ FillRC = nullptr;
+ break;
+ case AArch64::sub_32:
+ FillRC = &AArch64::GPR32RegClass;
+ break;
+ case AArch64::ssub:
+ FillRC = &AArch64::FPR32RegClass;
+ break;
+ case AArch64::dsub:
+ FillRC = &AArch64::FPR64RegClass;
+ break;
+ }
+
+ if (FillRC) {
+ assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() &&
+ "Mismatched regclass size on folded subreg COPY");
+ loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
+ MachineInstr &LoadMI = *--InsertPt;
+ MachineOperand &LoadDst = LoadMI.getOperand(0);
+ assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
+ LoadDst.setSubReg(DstMO.getSubReg());
+ LoadDst.setIsUndef();
+ return &LoadMI;
+ }
+ }
}
// Cannot fold.
@@ -2936,7 +3054,7 @@ bool AArch64InstrInfo::useMachineCombiner() const {
return true;
}
-//
+
// True when Opc sets flag
static bool isCombineInstrSettingFlag(unsigned Opc) {
switch (Opc) {
@@ -2955,7 +3073,7 @@ static bool isCombineInstrSettingFlag(unsigned Opc) {
}
return false;
}
-//
+
// 32b Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate32(unsigned Opc) {
switch (Opc) {
@@ -2974,7 +3092,7 @@ static bool isCombineInstrCandidate32(unsigned Opc) {
}
return false;
}
-//
+
// 64b Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate64(unsigned Opc) {
switch (Opc) {
@@ -2993,7 +3111,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
}
return false;
}
-//
+
// FP Opcodes that can be combined with a FMUL
static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
switch (Inst.getOpcode()) {
@@ -3009,13 +3127,13 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
case AArch64::FSUBv2f32:
case AArch64::FSUBv2f64:
case AArch64::FSUBv4f32:
- TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
- return (Options.UnsafeFPMath ||
- Options.AllowFPOpFusion == FPOpFusion::Fast);
+ TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
+ return (Options.UnsafeFPMath ||
+ Options.AllowFPOpFusion == FPOpFusion::Fast);
}
return false;
}
-//
+
// Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate(unsigned Opc) {
return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
@@ -3205,7 +3323,7 @@ static bool getFMAPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) {
if (!isCombineInstrCandidateFP(Root))
- return 0;
+ return false;
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
@@ -3971,8 +4089,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// Record MUL and ADD/SUB for deletion
DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
-
- return;
}
/// \brief Replace csincr-branch sequence by simple conditional branch
@@ -4148,6 +4264,7 @@ AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
ArrayRef<std::pair<unsigned, const char *>>
AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
using namespace AArch64II;
+
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_PAGE, "aarch64-page"},
{MO_PAGEOFF, "aarch64-pageoff"},
@@ -4162,6 +4279,7 @@ AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
ArrayRef<std::pair<unsigned, const char *>>
AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
using namespace AArch64II;
+
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_GOT, "aarch64-got"},
{MO_NC, "aarch64-nc"},
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 90b2c0896872..5037866925d3 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -162,6 +162,10 @@ public:
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ // This tells target independent code that it is okay to pass instructions
+ // with subreg operands to foldMemoryOperandImpl.
+ bool isSubregFoldable() const override { return true; }
+
using TargetInstrInfo::foldMemoryOperandImpl;
MachineInstr *
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 20de07424c53..b51473524c72 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -1071,8 +1071,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return false;
}
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
- (CmpInst::Predicate)I.getOperand(1).getPredicate());
+ // CSINC increments the result by one when the condition code is false.
+ // Therefore, we have to invert the predicate to get an increment by 1 when
+ // the predicate is true.
+ const AArch64CC::CondCode invCC =
+ changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
+ (CmpInst::Predicate)I.getOperand(1).getPredicate()));
MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
.addDef(ZReg)
@@ -1084,7 +1088,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
.addDef(I.getOperand(0).getReg())
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
- .addImm(CC);
+ .addImm(invCC);
constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.h b/lib/Target/AArch64/AArch64InstructionSelector.h
index 0d44e696ac20..2c6e5a912fb7 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.h
+++ b/lib/Target/AArch64/AArch64InstructionSelector.h
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
namespace llvm {
+
class AArch64InstrInfo;
class AArch64RegisterBankInfo;
class AArch64RegisterInfo;
@@ -29,7 +30,7 @@ public:
const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI);
- virtual bool select(MachineInstr &I) const override;
+ bool select(MachineInstr &I) const override;
private:
/// tblgen-erated 'select' implementation, used as the initial selector for
@@ -43,5 +44,6 @@ private:
const AArch64RegisterBankInfo &RBI;
};
-} // End llvm namespace.
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index ca2860afe13d..f0bffe544158 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -14,17 +14,18 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
+#include <cassert>
namespace llvm {
/// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
/// contains private AArch64-specific information for each MachineFunction.
class AArch64FunctionInfo final : public MachineFunctionInfo {
-
/// Number of bytes of arguments this function has on the stack. If the callee
/// is expected to restore the argument stack this should be a multiple of 16,
/// all usable during a tail call.
@@ -34,16 +35,16 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// space to a function with 16-bytes then misalignment of this value would
/// make a stack adjustment necessary, which could not be undone by the
/// callee.
- unsigned BytesInStackArgArea;
+ unsigned BytesInStackArgArea = 0;
/// The number of bytes to restore to deallocate space for incoming
/// arguments. Canonically 0 in the C calling convention, but non-zero when
/// callee is expected to pop the args.
- unsigned ArgumentStackToRestore;
+ unsigned ArgumentStackToRestore = 0;
/// HasStackFrame - True if this function has a stack frame. Set by
/// determineCalleeSaves().
- bool HasStackFrame;
+ bool HasStackFrame = false;
/// \brief Amount of stack frame size, not including callee-saved registers.
unsigned LocalStackSize;
@@ -53,54 +54,44 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// \brief Number of TLS accesses using the special (combinable)
/// _TLS_MODULE_BASE_ symbol.
- unsigned NumLocalDynamicTLSAccesses;
+ unsigned NumLocalDynamicTLSAccesses = 0;
/// \brief FrameIndex for start of varargs area for arguments passed on the
/// stack.
- int VarArgsStackIndex;
+ int VarArgsStackIndex = 0;
/// \brief FrameIndex for start of varargs area for arguments passed in
/// general purpose registers.
- int VarArgsGPRIndex;
+ int VarArgsGPRIndex = 0;
/// \brief Size of the varargs area for arguments passed in general purpose
/// registers.
- unsigned VarArgsGPRSize;
+ unsigned VarArgsGPRSize = 0;
/// \brief FrameIndex for start of varargs area for arguments passed in
/// floating-point registers.
- int VarArgsFPRIndex;
+ int VarArgsFPRIndex = 0;
/// \brief Size of the varargs area for arguments passed in floating-point
/// registers.
- unsigned VarArgsFPRSize;
+ unsigned VarArgsFPRSize = 0;
/// True if this function has a subset of CSRs that is handled explicitly via
/// copies.
- bool IsSplitCSR;
+ bool IsSplitCSR = false;
/// True when the stack gets realigned dynamically because the size of stack
/// frame is unknown at compile time. e.g., in case of VLAs.
- bool StackRealigned;
+ bool StackRealigned = false;
/// True when the callee-save stack area has unused gaps that may be used for
/// other stack allocations.
- bool CalleeSaveStackHasFreeSpace;
+ bool CalleeSaveStackHasFreeSpace = false;
public:
- AArch64FunctionInfo()
- : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
- NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
- VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
- IsSplitCSR(false), StackRealigned(false),
- CalleeSaveStackHasFreeSpace(false) {}
-
- explicit AArch64FunctionInfo(MachineFunction &MF)
- : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
- NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
- VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
- IsSplitCSR(false), StackRealigned(false),
- CalleeSaveStackHasFreeSpace(false) {
+ AArch64FunctionInfo() = default;
+
+ explicit AArch64FunctionInfo(MachineFunction &MF) {
(void)MF;
}
@@ -193,6 +184,7 @@ private:
MILOHContainer LOHContainerSet;
SetOfInstructions LOHRelated;
};
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index f58bbbd26132..03e01329e036 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -71,6 +71,7 @@ void AArch64Subtarget::initializeProperties() {
break;
case Falkor:
MaxInterleaveFactor = 4;
+ VectorInsertExtractBaseCost = 2;
break;
case Kryo:
MaxInterleaveFactor = 4;
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index e4ef0d4bb8db..d2883941e2c4 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -15,24 +15,35 @@
#include "AArch64InstructionSelector.h"
#include "AArch64LegalizerInfo.h"
#include "AArch64RegisterBankInfo.h"
+#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
#include "AArch64TargetTransformInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
+#include <memory>
+#include <string>
+
using namespace llvm;
static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
@@ -154,9 +165,9 @@ extern "C" void LLVMInitializeAArch64Target() {
//===----------------------------------------------------------------------===//
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO())
- return make_unique<AArch64_MachoTargetObjectFile>();
+ return llvm::make_unique<AArch64_MachoTargetObjectFile>();
- return make_unique<AArch64_ELFTargetObjectFile>();
+ return llvm::make_unique<AArch64_ELFTargetObjectFile>();
}
// Helper function to build a DataLayout string
@@ -202,29 +213,35 @@ AArch64TargetMachine::AArch64TargetMachine(
initAsmInfo();
}
-AArch64TargetMachine::~AArch64TargetMachine() {}
+AArch64TargetMachine::~AArch64TargetMachine() = default;
#ifdef LLVM_BUILD_GLOBAL_ISEL
namespace {
+
struct AArch64GISelActualAccessor : public GISelAccessor {
std::unique_ptr<CallLowering> CallLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
const CallLowering *getCallLowering() const override {
return CallLoweringInfo.get();
}
+
const InstructionSelector *getInstructionSelector() const override {
return InstSelector.get();
}
+
const LegalizerInfo *getLegalizerInfo() const override {
return Legalizer.get();
}
+
const RegisterBankInfo *getRegBankInfo() const override {
return RegBankInfo.get();
}
};
-} // End anonymous namespace.
+
+} // end anonymous namespace
#endif
const AArch64Subtarget *
@@ -287,6 +304,7 @@ AArch64beTargetMachine::AArch64beTargetMachine(
: AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
namespace {
+
/// AArch64 Code Generator Pass Configuration Options.
class AArch64PassConfig : public TargetPassConfig {
public:
@@ -324,7 +342,8 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
-} // namespace
+
+} // end anonymous namespace
TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
return TargetIRAnalysis([this](const Function &F) {
@@ -414,14 +433,17 @@ bool AArch64PassConfig::addIRTranslator() {
addPass(new IRTranslator());
return false;
}
+
bool AArch64PassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
}
+
bool AArch64PassConfig::addRegBankSelect() {
addPass(new RegBankSelect());
return false;
}
+
bool AArch64PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
return false;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 88c98865bbc6..1a17691fc584 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -417,14 +417,17 @@ int AArch64TTIImpl::getArithmeticInstrCost(
}
}
-int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
+ int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && IsComplex)
+ if (Ty->isVectorTy() && SE &&
+ !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
// In many cases the address computation is not merged into the instruction
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 24642cb1698e..849fd3d9b44a 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -104,7 +104,7 @@ public:
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- int getAddressComputationCost(Type *Ty, bool IsComplex);
+ int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index db84afacf30e..b86a283b40d4 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -9,45 +9,62 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cctype>
+#include <cstdint>
#include <cstdio>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
namespace {
-class AArch64Operand;
-
class AArch64AsmParser : public MCTargetAsmParser {
private:
StringRef Mnemonic; ///< Instruction mnemonic.
// Map of register aliases registers via the .req directive.
- StringMap<std::pair<bool, unsigned> > RegisterReqs;
+ StringMap<std::pair<bool, unsigned>> RegisterReqs;
AArch64TargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
@@ -118,6 +135,7 @@ public:
#include "AArch64GenAsmMatcher.inc"
};
bool IsILP32;
+
AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI) {
@@ -143,9 +161,6 @@ public:
MCSymbolRefExpr::VariantKind &DarwinRefKind,
int64_t &Addend);
};
-} // end anonymous namespace
-
-namespace {
/// AArch64Operand - Instances of this class represent a parsed AArch64 machine
/// instruction.
@@ -531,6 +546,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 2);
}
+
bool isImm0_7() const {
if (!isImm())
return false;
@@ -540,6 +556,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 8);
}
+
bool isImm1_8() const {
if (!isImm())
return false;
@@ -549,6 +566,7 @@ public:
int64_t Val = MCE->getValue();
return (Val > 0 && Val < 9);
}
+
bool isImm0_15() const {
if (!isImm())
return false;
@@ -558,6 +576,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 16);
}
+
bool isImm1_16() const {
if (!isImm())
return false;
@@ -567,6 +586,7 @@ public:
int64_t Val = MCE->getValue();
return (Val > 0 && Val < 17);
}
+
bool isImm0_31() const {
if (!isImm())
return false;
@@ -576,6 +596,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 32);
}
+
bool isImm1_31() const {
if (!isImm())
return false;
@@ -585,6 +606,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 1 && Val < 32);
}
+
bool isImm1_32() const {
if (!isImm())
return false;
@@ -594,6 +616,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 1 && Val < 33);
}
+
bool isImm0_63() const {
if (!isImm())
return false;
@@ -603,6 +626,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 64);
}
+
bool isImm1_63() const {
if (!isImm())
return false;
@@ -612,6 +636,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 1 && Val < 64);
}
+
bool isImm1_64() const {
if (!isImm())
return false;
@@ -621,6 +646,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 1 && Val < 65);
}
+
bool isImm0_127() const {
if (!isImm())
return false;
@@ -630,6 +656,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 128);
}
+
bool isImm0_255() const {
if (!isImm())
return false;
@@ -639,6 +666,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 256);
}
+
bool isImm0_65535() const {
if (!isImm())
return false;
@@ -648,6 +676,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 65536);
}
+
bool isImm32_63() const {
if (!isImm())
return false;
@@ -657,6 +686,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 32 && Val < 64);
}
+
bool isLogicalImm32() const {
if (!isImm())
return false;
@@ -669,6 +699,7 @@ public:
Val &= 0xFFFFFFFF;
return AArch64_AM::isLogicalImmediate(Val, 32);
}
+
bool isLogicalImm64() const {
if (!isImm())
return false;
@@ -677,6 +708,7 @@ public:
return false;
return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64);
}
+
bool isLogicalImm32Not() const {
if (!isImm())
return false;
@@ -686,6 +718,7 @@ public:
int64_t Val = ~MCE->getValue() & 0xFFFFFFFF;
return AArch64_AM::isLogicalImmediate(Val, 32);
}
+
bool isLogicalImm64Not() const {
if (!isImm())
return false;
@@ -694,7 +727,9 @@ public:
return false;
return AArch64_AM::isLogicalImmediate(~MCE->getValue(), 64);
}
+
bool isShiftedImm() const { return Kind == k_ShiftedImm; }
+
bool isAddSubImm() const {
if (!isShiftedImm() && !isImm())
return false;
@@ -737,6 +772,7 @@ public:
// code deal with it.
return true;
}
+
bool isAddSubImmNeg() const {
if (!isShiftedImm() && !isImm())
return false;
@@ -756,7 +792,9 @@ public:
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
return CE != nullptr && CE->getValue() < 0 && -CE->getValue() <= 0xfff;
}
+
bool isCondCode() const { return Kind == k_CondCode; }
+
bool isSIMDImmType10() const {
if (!isImm())
return false;
@@ -765,6 +803,7 @@ public:
return false;
return AArch64_AM::isAdvSIMDModImmType10(MCE->getValue());
}
+
bool isBranchTarget26() const {
if (!isImm())
return false;
@@ -776,6 +815,7 @@ public:
return false;
return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2));
}
+
bool isPCRelLabel19() const {
if (!isImm())
return false;
@@ -787,6 +827,7 @@ public:
return false;
return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2));
}
+
bool isBranchTarget14() const {
if (!isImm())
return false;
@@ -891,40 +932,49 @@ public:
bool isFPImm() const { return Kind == k_FPImm; }
bool isBarrier() const { return Kind == k_Barrier; }
bool isSysReg() const { return Kind == k_SysReg; }
+
bool isMRSSystemRegister() const {
if (!isSysReg()) return false;
return SysReg.MRSReg != -1U;
}
+
bool isMSRSystemRegister() const {
if (!isSysReg()) return false;
return SysReg.MSRReg != -1U;
}
+
bool isSystemPStateFieldWithImm0_1() const {
if (!isSysReg()) return false;
return (SysReg.PStateField == AArch64PState::PAN ||
SysReg.PStateField == AArch64PState::UAO);
}
+
bool isSystemPStateFieldWithImm0_15() const {
if (!isSysReg() || isSystemPStateFieldWithImm0_1()) return false;
return SysReg.PStateField != -1U;
}
+
bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
+
bool isVectorRegLo() const {
return Kind == k_Register && Reg.isVector &&
AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains(
Reg.RegNum);
}
+
bool isGPR32as64() const {
return Kind == k_Register && !Reg.isVector &&
AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum);
}
+
bool isWSeqPair() const {
return Kind == k_Register && !Reg.isVector &&
AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains(
Reg.RegNum);
}
+
bool isXSeqPair() const {
return Kind == k_Register && !Reg.isVector &&
AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID].contains(
@@ -957,19 +1007,25 @@ public:
bool isVectorIndex1() const {
return Kind == k_VectorIndex && VectorIndex.Val == 1;
}
+
bool isVectorIndexB() const {
return Kind == k_VectorIndex && VectorIndex.Val < 16;
}
+
bool isVectorIndexH() const {
return Kind == k_VectorIndex && VectorIndex.Val < 8;
}
+
bool isVectorIndexS() const {
return Kind == k_VectorIndex && VectorIndex.Val < 4;
}
+
bool isVectorIndexD() const {
return Kind == k_VectorIndex && VectorIndex.Val < 2;
}
+
bool isToken() const override { return Kind == k_Token; }
+
bool isTokenEqual(StringRef Str) const {
return Kind == k_Token && getToken() == Str;
}
@@ -1006,6 +1062,7 @@ public:
AArch64_AM::ShiftExtendType ET = getShiftExtendType();
return ET != AArch64_AM::UXTX && ET != AArch64_AM::SXTX;
}
+
bool isExtendLSL64() const {
if (!isExtend())
return false;
@@ -1836,11 +1893,10 @@ void AArch64Operand::print(raw_ostream &OS) const {
OS << "<prfop invalid #" << getPrefetch() << ">";
break;
}
- case k_PSBHint: {
+ case k_PSBHint:
OS << getPSBHintName();
break;
- }
- case k_ShiftExtend: {
+ case k_ShiftExtend:
OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
<< getShiftExtendAmount();
if (!hasShiftExtendAmount())
@@ -1848,7 +1904,6 @@ void AArch64Operand::print(raw_ostream &OS) const {
OS << '>';
break;
}
- }
}
/// @name Auto-generated Match Functions
@@ -2469,7 +2524,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
Expr = MCConstantExpr::create(op2, getContext()); \
Operands.push_back( \
AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
- } while (0)
+ } while (false)
if (Mnemonic == "ic") {
if (!Op.compare_lower("ialluis")) {
@@ -3979,7 +4034,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
}
-
switch (MatchResult) {
case Match_Success: {
// Perform range checking and other semantic validations
@@ -4550,7 +4604,6 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
return Match_InvalidOperand;
}
-
OperandMatchResultTy
AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
@@ -4601,7 +4654,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 ||
+ if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 ||
(isXReg && !XRegClass.contains(SecondReg)) ||
(isWReg && !WRegClass.contains(SecondReg))) {
Error(E,"expected second odd register of a "
@@ -4610,7 +4663,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
}
unsigned Pair = 0;
- if(isXReg) {
+ if (isXReg) {
Pair = RI->getMatchingSuperReg(FirstReg, AArch64::sube64,
&AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID]);
} else {
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
index 24e353cf4b96..bc2f7f181699 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -17,15 +17,12 @@
namespace llvm {
-class MCInst;
-class raw_ostream;
-
class AArch64Disassembler : public MCDisassembler {
public:
AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- ~AArch64Disassembler() {}
+ ~AArch64Disassembler() override = default;
MCDisassembler::DecodeStatus
getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes,
@@ -33,6 +30,6 @@ public:
raw_ostream &CStream) const override;
};
-} // namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index a1edb3cef46a..c954c0eb2c6b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -17,25 +17,30 @@
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
namespace {
+
class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
public:
AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian, bool IsILP32);
- ~AArch64ELFObjectWriter() override;
+ ~AArch64ELFObjectWriter() override = default;
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
bool IsILP32;
-private:
};
-}
+
+} // end anonymous namespace
AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
bool IsLittleEndian,
@@ -44,8 +49,6 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
/*HasRelocationAddend*/ true),
IsILP32(IsILP32) {}
-AArch64ELFObjectWriter::~AArch64ELFObjectWriter() {}
-
#define R_CLS(rtype) \
IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype
#define BAD_ILP32_MOV(lp64rtype) "ILP32 absolute MOV relocation not "\
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index f7058cdf2373..62dfa59483eb 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -15,15 +15,23 @@
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
@@ -37,13 +45,12 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
MCContext &Ctx;
const MCInstrInfo &MCII;
- AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
public:
AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: Ctx(ctx), MCII(mcii) {}
-
- ~AArch64MCCodeEmitter() override {}
+ AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) = delete;
+ void operator=(const AArch64MCCodeEmitter &) = delete;
+ ~AArch64MCCodeEmitter() override = default;
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
@@ -181,12 +188,6 @@ private:
} // end anonymous namespace
-MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new AArch64MCCodeEmitter(MCII, Ctx);
-}
-
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned
@@ -601,3 +602,9 @@ unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison(
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "AArch64GenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new AArch64MCCodeEmitter(MCII, Ctx);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 3e86a42d5be6..1b949b54590c 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -13,6 +13,7 @@
#include "AArch64TargetStreamer.h"
#include "llvm/MC/ConstantPools.h"
+
using namespace llvm;
//
@@ -21,7 +22,7 @@ using namespace llvm;
AArch64TargetStreamer::AArch64TargetStreamer(MCStreamer &S)
: MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {}
-AArch64TargetStreamer::~AArch64TargetStreamer() {}
+AArch64TargetStreamer::~AArch64TargetStreamer() = default;
// The constant pool handling is shared by all AArch64TargetStreamer
// implementations.
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index a8e6902c252b..4acd55eb6120 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -176,12 +176,14 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
+ const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
MCContext &Context = getObjFileLowering().getContext();
- MCSectionELF *ConfigSection =
- Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(ConfigSection);
+ if (!STM.isAmdHsaOS()) {
+ MCSectionELF *ConfigSection =
+ Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
+ OutStreamer->SwitchSection(ConfigSection);
+ }
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
getSIProgramInfo(KernelInfo, MF);
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 85cbadf0a570..5f651d4da5d2 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -269,7 +269,7 @@ unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
unsigned encodeWaitcnt(IsaVersion Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
- unsigned Waitcnt = getWaitcntBitMask(Version);;
+ unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 10e6297ef1ed..cc001b596785 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -338,14 +338,17 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
+ int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && IsComplex)
+ if (Ty->isVectorTy() && SE &&
+ !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
// In many cases the address computation is not merged into the instruction
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index d83228afb0ab..731a5adf3d73 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -104,7 +104,8 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- int getAddressComputationCost(Type *Val, bool IsComplex);
+ int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
+ const SCEV *Ptr);
int getFPOpCost(Type *Ty);
diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 903f92a04431..57ead973b56e 100644
--- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -8,23 +8,41 @@
//===----------------------------------------------------------------------===//
#include "Lanai.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
#include "MCTargetDesc/LanaiMCExpr.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
namespace llvm {
+
+// Auto-generated by TableGen
+static unsigned MatchRegisterName(StringRef Name);
+
namespace {
+
struct LanaiOperand;
class LanaiAsmParser : public MCTargetAsmParser {
@@ -80,9 +98,6 @@ private:
const MCSubtargetInfo &SubtargetInfo;
};
-// Auto-generated by TableGen
-static unsigned MatchRegisterName(llvm::StringRef Name);
-
// LanaiOperand - Instances of this class represented a parsed machine
// instruction
struct LanaiOperand : public MCParsedAsmOperand {
@@ -627,6 +642,8 @@ public:
}
};
+} // end anonymous namespace
+
bool LanaiAsmParser::ParseDirective(AsmToken /*DirectiveId*/) { return true; }
bool LanaiAsmParser::MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
@@ -680,11 +697,11 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseRegister() {
if (Lexer.getKind() == AsmToken::Identifier) {
RegNum = MatchRegisterName(Lexer.getTok().getIdentifier());
if (RegNum == 0)
- return 0;
+ return nullptr;
Parser.Lex(); // Eat identifier token
return LanaiOperand::createReg(RegNum, Start, End);
}
- return 0;
+ return nullptr;
}
bool LanaiAsmParser::ParseRegister(unsigned &RegNum, SMLoc &StartLoc,
@@ -701,15 +718,15 @@ bool LanaiAsmParser::ParseRegister(unsigned &RegNum, SMLoc &StartLoc,
std::unique_ptr<LanaiOperand> LanaiAsmParser::parseIdentifier() {
SMLoc Start = Parser.getTok().getLoc();
SMLoc End = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- const MCExpr *Res, *RHS = 0;
+ const MCExpr *Res, *RHS = nullptr;
LanaiMCExpr::VariantKind Kind = LanaiMCExpr::VK_Lanai_None;
if (Lexer.getKind() != AsmToken::Identifier)
- return 0;
+ return nullptr;
StringRef Identifier;
if (Parser.parseIdentifier(Identifier))
- return 0;
+ return nullptr;
// Check if identifier has a modifier
if (Identifier.equals_lower("hi"))
@@ -722,24 +739,24 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseIdentifier() {
if (Kind != LanaiMCExpr::VK_Lanai_None) {
if (Lexer.getKind() != AsmToken::LParen) {
Error(Lexer.getLoc(), "Expected '('");
- return 0;
+ return nullptr;
}
Lexer.Lex(); // lex '('
// Parse identifier
if (Parser.parseIdentifier(Identifier))
- return 0;
+ return nullptr;
}
// If addition parse the RHS.
if (Lexer.getKind() == AsmToken::Plus && Parser.parseExpression(RHS))
- return 0;
+ return nullptr;
// For variants parse the final ')'
if (Kind != LanaiMCExpr::VK_Lanai_None) {
if (Lexer.getKind() != AsmToken::RParen) {
Error(Lexer.getLoc(), "Expected ')'");
- return 0;
+ return nullptr;
}
Lexer.Lex(); // lex ')'
}
@@ -771,7 +788,7 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseImmediate() {
if (!Parser.parseExpression(ExprVal))
return LanaiOperand::createImm(ExprVal, Start, End);
default:
- return 0;
+ return nullptr;
}
}
@@ -1204,10 +1221,9 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/,
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#include "LanaiGenAsmMatcher.inc"
-} // namespace
extern "C" void LLVMInitializeLanaiAsmParser() {
RegisterMCAsmParser<LanaiAsmParser> x(getTheLanaiTarget());
}
-} // namespace llvm
+} // end namespace llvm
diff --git a/lib/Target/Lanai/Disassembler/LanaiDisassembler.h b/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
index a317cd88ad63..e0c19e8ea644 100644
--- a/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
+++ b/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
@@ -20,14 +20,11 @@
namespace llvm {
-class MCInst;
-class raw_ostream;
-
class LanaiDisassembler : public MCDisassembler {
public:
LanaiDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx);
- ~LanaiDisassembler() override {}
+ ~LanaiDisassembler() override = default;
// getInstruction - See MCDisassembler.
MCDisassembler::DecodeStatus
@@ -36,6 +33,6 @@ public:
raw_ostream &CStream) const override;
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LANAI_DISASSEMBLER_LANAIDISASSEMBLER_H
diff --git a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h b/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
index 1c9d186ad819..59904fbaa318 100644
--- a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
+++ b/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
@@ -14,10 +14,10 @@
#ifndef LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
#define LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
-class MCOperand;
class LanaiInstPrinter : public MCInstPrinter {
public:
@@ -28,14 +28,14 @@ public:
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemRiOperand(const MCInst *MI, int OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printMemRrOperand(const MCInst *MI, int OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printMemSplsOperand(const MCInst *MI, int OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printCCOperand(const MCInst *MI, int OpNo, raw_ostream &O);
void printAluOperand(const MCInst *MI, int OpNo, raw_ostream &O);
void printHi16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -60,6 +60,7 @@ private:
bool printMemoryStoreIncrement(const MCInst *MI, raw_ostream &Ostream,
StringRef Opcode, int AddOffset);
};
-} // namespace llvm
+
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
diff --git a/lib/Target/Lanai/LanaiISelLowering.cpp b/lib/Target/Lanai/LanaiISelLowering.cpp
index ae7870e07d42..d156294a0b0c 100644
--- a/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -11,31 +11,46 @@
//
//===----------------------------------------------------------------------===//
-#include "LanaiISelLowering.h"
-
#include "Lanai.h"
+#include "LanaiCondCode.h"
+#include "LanaiISelLowering.h"
#include "LanaiMachineFunctionInfo.h"
#include "LanaiSubtarget.h"
-#include "LanaiTargetMachine.h"
#include "LanaiTargetObjectFile.h"
+#include "MCTargetDesc/LanaiBaseInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstdlib>
+#include <utility>
#define DEBUG_TYPE "lanai-lower"
@@ -195,6 +210,7 @@ SDValue LanaiTargetLowering::LowerOperation(SDValue Op,
llvm_unreachable("unimplemented operand");
}
}
+
//===----------------------------------------------------------------------===//
// Lanai Inline Assembly Support
//===----------------------------------------------------------------------===//
@@ -244,7 +260,7 @@ LanaiTargetLowering::getSingleConstraintMatchWeight(
Value *CallOperandVal = Info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (CallOperandVal == nullptr)
return CW_Default;
// Look at the constraint type.
switch (*Constraint) {
@@ -270,7 +286,7 @@ LanaiTargetLowering::getSingleConstraintMatchWeight(
void LanaiTargetLowering::LowerAsmOperandForConstraint(
SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
- SDValue Result(0, 0);
+ SDValue Result(nullptr, 0);
// Only support length 1 constraints for now.
if (Constraint.length() > 1)
@@ -676,7 +692,7 @@ SDValue LanaiTargetLowering::LowerCCCCallTo(
} else {
assert(VA.isMemLoc());
- if (StackPtr.getNode() == 0)
+ if (StackPtr.getNode() == nullptr)
StackPtr = DAG.getCopyFromReg(Chain, DL, Lanai::SP,
getPointerTy(DAG.getDataLayout()));
@@ -1120,7 +1136,7 @@ const char *LanaiTargetLowering::getTargetNodeName(unsigned Opcode) const {
case LanaiISD::SMALL:
return "LanaiISD::SMALL";
default:
- return NULL;
+ return nullptr;
}
}
diff --git a/lib/Target/Lanai/LanaiRegisterInfo.h b/lib/Target/Lanai/LanaiRegisterInfo.h
index 8b84bbc460e8..c6e459076ebc 100644
--- a/lib/Target/Lanai/LanaiRegisterInfo.h
+++ b/lib/Target/Lanai/LanaiRegisterInfo.h
@@ -21,9 +21,6 @@
namespace llvm {
-class TargetInstrInfo;
-class Type;
-
struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
LanaiRegisterInfo();
@@ -32,7 +29,7 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
// Code Generation virtual methods.
const uint16_t *
- getCalleeSavedRegs(const MachineFunction *MF = 0) const override;
+ getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
@@ -42,7 +39,7 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum,
- RegScavenger *RS = NULL) const override;
+ RegScavenger *RS = nullptr) const override;
bool canRealignStack(const MachineFunction &MF) const override;
@@ -58,6 +55,6 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
int getDwarfRegNum(unsigned RegNum, bool IsEH) const;
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LANAI_LANAIREGISTERINFO_H
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
index e30d5e9a18eb..e02bba529bd5 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
@@ -9,20 +9,19 @@
#include "MCTargetDesc/LanaiBaseInfo.h"
#include "MCTargetDesc/LanaiFixupKinds.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
+
class LanaiELFObjectWriter : public MCELFObjectTargetWriter {
public:
explicit LanaiELFObjectWriter(uint8_t OSABI);
- ~LanaiELFObjectWriter() override;
+ ~LanaiELFObjectWriter() override = default;
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
@@ -30,14 +29,13 @@ protected:
bool needsRelocateWithSymbol(const MCSymbol &SD,
unsigned Type) const override;
};
-} // namespace
+
+} // end anonymous namespace
LanaiELFObjectWriter::LanaiELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit_=*/false, OSABI, ELF::EM_LANAI,
/*HasRelocationAddend=*/true) {}
-LanaiELFObjectWriter::~LanaiELFObjectWriter() {}
-
unsigned LanaiELFObjectWriter::getRelocType(MCContext & /*Ctx*/,
const MCValue & /*Target*/,
const MCFixup &Fixup,
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index ce68b7e24dba..f5b5335bb989 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -12,37 +12,38 @@
//===----------------------------------------------------------------------===//
#include "Lanai.h"
+#include "LanaiAluCode.h"
#include "MCTargetDesc/LanaiBaseInfo.h"
#include "MCTargetDesc/LanaiFixupKinds.h"
#include "MCTargetDesc/LanaiMCExpr.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
#define DEBUG_TYPE "mccodeemitter"
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace llvm {
+
namespace {
-class LanaiMCCodeEmitter : public MCCodeEmitter {
- LanaiMCCodeEmitter(const LanaiMCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const LanaiMCCodeEmitter &); // DO NOT IMPLEMENT
- const MCInstrInfo &InstrInfo;
- MCContext &Context;
+class LanaiMCCodeEmitter : public MCCodeEmitter {
public:
- LanaiMCCodeEmitter(const MCInstrInfo &MCII, MCContext &C)
- : InstrInfo(MCII), Context(C) {}
-
- ~LanaiMCCodeEmitter() override {}
+ LanaiMCCodeEmitter(const MCInstrInfo &MCII, MCContext &C) {}
+ LanaiMCCodeEmitter(const LanaiMCCodeEmitter &) = delete;
+ void operator=(const LanaiMCCodeEmitter &) = delete;
+ ~LanaiMCCodeEmitter() override = default;
// The functions below are called by TableGen generated functions for getting
// the binary encoding of instructions/opereands.
@@ -86,6 +87,8 @@ public:
const MCSubtargetInfo &STI) const;
};
+} // end anonymous namespace
+
Lanai::Fixups FixupKind(const MCExpr *Expr) {
if (isa<MCSymbolRefExpr>(Expr))
return Lanai::FIXUP_LANAI_21;
@@ -298,8 +301,8 @@ unsigned LanaiMCCodeEmitter::getBranchTargetOpValue(
}
#include "LanaiGenMCCodeEmitter.inc"
-} // namespace
-} // namespace llvm
+
+} // end namespace llvm
llvm::MCCodeEmitter *
llvm::createLanaiMCCodeEmitter(const MCInstrInfo &InstrInfo,
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index c2f8c0f7ad50..a47ff9ff3d61 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -11,16 +11,21 @@
//
//===----------------------------------------------------------------------===//
+#include "LanaiMCAsmInfo.h"
#include "LanaiMCTargetDesc.h"
-
#include "InstPrinter/LanaiInstPrinter.h"
-#include "LanaiMCAsmInfo.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+#include <cstdint>
+#include <string>
#define GET_INSTRINFO_MC_DESC
#include "LanaiGenInstrInfo.inc"
@@ -70,7 +75,7 @@ static MCInstPrinter *createLanaiMCInstPrinter(const Triple & /*T*/,
const MCRegisterInfo &MRI) {
if (SyntaxVariant == 0)
return new LanaiInstPrinter(MAI, MII, MRI);
- return 0;
+ return nullptr;
}
static MCRelocationInfo *createLanaiElfRelocation(const Triple &TheTriple,
@@ -79,6 +84,7 @@ static MCRelocationInfo *createLanaiElfRelocation(const Triple &TheTriple,
}
namespace {
+
class LanaiMCInstrAnalysis : public MCInstrAnalysis {
public:
explicit LanaiMCInstrAnalysis(const MCInstrInfo *Info)
@@ -107,6 +113,7 @@ public:
}
}
};
+
} // end anonymous namespace
static MCInstrAnalysis *createLanaiInstrAnalysis(const MCInstrInfo *Info) {
@@ -131,7 +138,7 @@ extern "C" void LLVMInitializeLanaiTargetMC() {
// Register the MC code emitter
TargetRegistry::RegisterMCCodeEmitter(getTheLanaiTarget(),
- llvm::createLanaiMCCodeEmitter);
+ createLanaiMCCodeEmitter);
// Register the ASM Backend
TargetRegistry::RegisterMCAsmBackend(getTheLanaiTarget(),
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index d3c88482f092..05acd25ae5fc 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -47,7 +47,7 @@ namespace llvm {
FCTIDZ, FCTIWZ,
/// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
- /// unsigned integers.
+ /// unsigned integers with round toward zero.
FCTIDUZ, FCTIWUZ,
/// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 03b2257a88a8..fbec8787ef8d 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1154,6 +1154,9 @@ defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
"fctid", "$frD, $frB", IIC_FPGeneral,
[]>, isPPC64;
+defm FCTIDU : XForm_26r<63, 942, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctidu", "$frD, $frB", IIC_FPGeneral,
+ []>, isPPC64;
defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
"fctidz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 99689f656c2d..ef7d2012a233 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -603,6 +603,12 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XForm_17<opcode, xo, OOL, IOL, asmstr, itin > {
+ let FRA = 0;
+}
+
// Used for QPX
class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index a7231bd2e2c0..90111bbea07d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2172,11 +2172,19 @@ let isCompare = 1, hasSideEffects = 0 in {
"fcmpu $crD, $fA, $fB", IIC_FPCompare>;
}
+def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+ "ftdiv $crD, $fA, $fB", IIC_FPCompare>;
+def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB),
+ "ftsqrt $crD, $fB", IIC_FPCompare>;
+
let Uses = [RM] in {
let hasSideEffects = 0 in {
defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiw", "$frD, $frB", IIC_FPGeneral,
[]>;
+ defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwu", "$frD, $frB", IIC_FPGeneral,
+ []>;
defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiwz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctiwz f64:$frB))]>;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index fd2189397279..7f72ab17f619 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16985,10 +16985,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
}
- if (Cond.getOpcode() == ISD::SETCC) {
+ if (Cond.getOpcode() == ISD::SETCC)
if (SDValue NewCond = LowerSETCC(Cond, DAG))
Cond = NewCond;
- }
// (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
@@ -18289,6 +18288,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
/// constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
SDValue SrcOp, SDValue ShAmt,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT SVT = ShAmt.getSimpleValueType();
assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
@@ -18306,27 +18306,32 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
}
- const X86Subtarget &Subtarget =
- static_cast<const X86Subtarget &>(DAG.getSubtarget());
- if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
- ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
- // Let the shuffle legalizer expand this shift amount node.
+ // Need to build a vector containing shift amount.
+ // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
+ // +=================+============+=======================================+
+ // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
+ // +=================+============+=======================================+
+ // | i64 | Yes, No | Use ShAmt as lowest elt |
+ // | i32 | Yes | zero-extend in-reg |
+ // | (i32 zext(i16)) | Yes | zero-extend in-reg |
+ // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
+ // +=================+============+=======================================+
+
+ if (SVT == MVT::i64)
+ ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
+ else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
+ ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
SDValue Op0 = ShAmt.getOperand(0);
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0);
- ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, Subtarget, DAG);
+ ShAmt = DAG.getZeroExtendVectorInReg(Op0, SDLoc(Op0), MVT::v2i64);
+ } else if (Subtarget.hasSSE41() &&
+ ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
+ ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
} else {
- // Need to build a vector containing shift amount.
- // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
- SmallVector<SDValue, 4> ShOps;
- ShOps.push_back(ShAmt);
- if (SVT == MVT::i32) {
- ShOps.push_back(DAG.getConstant(0, dl, SVT));
- ShOps.push_back(DAG.getUNDEF(SVT));
- }
- ShOps.push_back(DAG.getUNDEF(SVT));
-
- MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
- ShAmt = DAG.getBuildVector(BVT, dl, ShOps);
+ SmallVector<SDValue, 4> ShOps = {ShAmt, DAG.getConstant(0, dl, SVT),
+ DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
+ ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
}
// The return type has to be a 128-bit type with the same element
@@ -19014,7 +19019,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
}
case VSHIFT:
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
+ Op.getOperand(1), Op.getOperand(2), Subtarget,
+ DAG);
case COMPRESS_EXPAND_IN_REG: {
SDValue Mask = Op.getOperand(3);
SDValue DataToCompress = Op.getOperand(1);
@@ -21276,7 +21282,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
else if (EltVT.bitsLT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
- return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, DAG);
+ return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
}
}
@@ -25951,12 +25957,11 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ bool FloatDomain,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
- bool FloatDomain = MaskVT.isFloatingPoint() ||
- (!Subtarget.hasAVX2() && MaskVT.is256BitVector());
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
@@ -26067,11 +26072,11 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// permute instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ bool FloatDomain,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
- bool FloatDomain = MaskVT.isFloatingPoint();
bool ContainsZeros = false;
SmallBitVector Zeroable(NumMaskElts, false);
@@ -26211,11 +26216,10 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// shuffle instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- SDValue &V1, SDValue &V2,
+ bool FloatDomain, SDValue &V1, SDValue &V2,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
bool IsUnary) {
- bool FloatDomain = MaskVT.isFloatingPoint();
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
if (MaskVT.is128BitVector()) {
@@ -26310,13 +26314,13 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ bool FloatDomain,
SDValue &V1, SDValue &V2,
SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
- bool FloatDomain = MaskVT.isFloatingPoint();
// Attempt to match against PALIGNR byte rotate.
if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
@@ -26594,8 +26598,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleSrcVT,
- ShuffleVT)) {
+ if (matchUnaryVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget, Shuffle,
+ ShuffleSrcVT, ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26609,8 +26613,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
- if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Subtarget, Shuffle,
- ShuffleVT, PermuteImm)) {
+ if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget,
+ Shuffle, ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26626,8 +26630,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- if (matchBinaryVectorShuffle(MaskVT, Mask, V1, V2, Subtarget, Shuffle,
- ShuffleVT, UnaryShuffle)) {
+ if (matchBinaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, Subtarget,
+ Shuffle, ShuffleVT, UnaryShuffle)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26643,8 +26647,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
- if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, V1, V2, DL, DAG, Subtarget,
- Shuffle, ShuffleVT, PermuteImm)) {
+ if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, DL,
+ DAG, Subtarget, Shuffle, ShuffleVT,
+ PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -28742,6 +28747,27 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
DAG.getConstant(Imm, DL, MVT::i8)));
return true;
}
+ case ISD::EXTRACT_SUBVECTOR: {
+ unsigned EltSize = EltVT.getSizeInBits();
+ if (EltSize != 32 && EltSize != 64)
+ return false;
+ MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
+ // Only change element size, not type.
+ if (VT.isInteger() != OpEltVT.isInteger())
+ return false;
+ uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
+ // Op0 needs to be bitcasted to a larger vector with the same element type.
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = MVT::getVectorVT(EltVT,
+ Op0.getSimpleValueType().getSizeInBits() / EltSize);
+ Op0 = DAG.getBitcast(Op0VT, Op0);
+ DCI.AddToWorklist(Op0.getNode());
+ DCI.CombineTo(OrigOp.getNode(),
+ DAG.getNode(Opcode, DL, VT, Op0,
+ DAG.getConstant(Imm, DL, MVT::i8)));
+ return true;
+ }
}
return false;
@@ -30921,6 +30947,59 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
}
+/// Check if truncation with saturation form type \p SrcVT to \p DstVT
+/// is valid for the given \p Subtarget.
+static bool
+isSATValidOnSubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasAVX512())
+ return false;
+ EVT SrcElVT = SrcVT.getScalarType();
+ EVT DstElVT = DstVT.getScalarType();
+ if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
+ return false;
+ if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
+ return false;
+ if (SrcVT.is512BitVector() || Subtarget.hasVLX())
+ return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
+ return false;
+}
+
+/// Detect a pattern of truncation with saturation:
+/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched or the unsupported on the current target.
+static SDValue
+detectUSatPattern(SDValue In, EVT VT, const X86Subtarget &Subtarget) {
+ if (In.getOpcode() != ISD::UMIN)
+ return SDValue();
+
+ EVT InVT = In.getValueType();
+ // FIXME: Scalar type may be supported if we move it to vector register.
+ if (!InVT.isVector() || !InVT.isSimple())
+ return SDValue();
+
+ if (!isSATValidOnSubtarget(InVT, VT, Subtarget))
+ return SDValue();
+
+ //Saturation with truncation. We truncate from InVT to VT.
+ assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
+ "Unexpected types for truncate operation");
+
+ SDValue SrcVal;
+ APInt C;
+ if (ISD::isConstantSplatVector(In.getOperand(0).getNode(), C))
+ SrcVal = In.getOperand(1);
+ else if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C))
+ SrcVal = In.getOperand(0);
+ else
+ return SDValue();
+
+ // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
+ // the element size of the destination type.
+ return (C == ((uint64_t)1 << VT.getScalarSizeInBits()) - 1) ?
+ SrcVal : SDValue();
+}
+
/// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
/// X86ISD::AVG instruction.
@@ -31487,6 +31566,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
+ if (SDValue Val =
+ detectUSatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
+ return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
+ dl, Val, St->getBasePtr(),
+ St->getMemoryVT(), St->getMemOperand(), DAG);
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
@@ -31967,7 +32052,8 @@ combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
static SDValue
-combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG,
+combineVectorTruncationWithPACKSS(SDNode *N, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG,
SmallVector<SDValue, 8> &Regs) {
assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32);
EVT OutVT = N->getValueType(0);
@@ -31976,8 +32062,10 @@ combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG,
// Shift left by 16 bits, then arithmetic-shift right by 16 bits.
SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32);
for (auto &Reg : Regs) {
- Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, DAG);
- Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, DAG);
+ Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt,
+ Subtarget, DAG);
+ Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt,
+ Subtarget, DAG);
}
for (unsigned i = 0, e = Regs.size() / 2; i < e; i++)
@@ -32046,7 +32134,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasSSE41() || OutSVT == MVT::i8)
return combineVectorTruncationWithPACKUS(N, DAG, SubVec);
else if (InSVT == MVT::i32)
- return combineVectorTruncationWithPACKSS(N, DAG, SubVec);
+ return combineVectorTruncationWithPACKSS(N, Subtarget, DAG, SubVec);
else
return SDValue();
}
@@ -32104,6 +32192,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
+ // Try the truncation with unsigned saturation.
+ if (SDValue Val = detectUSatPattern(Src, VT, Subtarget))
+ return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Val);
+
// The bitcast source is a direct mmx result.
// Detect bitcasts between i32 to x86mmx
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index d7792e296a58..de4839432b9a 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -80,9 +80,12 @@ unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) {
unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
if (Vector) {
- if (ST->hasAVX512()) return 512;
- if (ST->hasAVX()) return 256;
- if (ST->hasSSE1()) return 128;
+ if (ST->hasAVX512())
+ return 512;
+ if (ST->hasAVX())
+ return 256;
+ if (ST->hasSSE1())
+ return 128;
return 0;
}
@@ -211,11 +214,9 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for AVX512DQ lowering tricks for custom cases.
- if (ST->hasDQI()) {
- if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD,
- LT.second))
+ if (ST->hasDQI())
+ if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX512BWCostTable[] = {
{ ISD::MUL, MVT::v64i8, 11 }, // extend/pmullw/trunc sequence.
@@ -225,37 +226,38 @@ int X86TTIImpl::getArithmeticInstrCost(
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
{ ISD::SDIV, MVT::v64i8, 64*20 },
{ ISD::SDIV, MVT::v32i16, 32*20 },
- { ISD::SDIV, MVT::v16i32, 16*20 },
- { ISD::SDIV, MVT::v8i64, 8*20 },
{ ISD::UDIV, MVT::v64i8, 64*20 },
- { ISD::UDIV, MVT::v32i16, 32*20 },
- { ISD::UDIV, MVT::v16i32, 16*20 },
- { ISD::UDIV, MVT::v8i64, 8*20 },
+ { ISD::UDIV, MVT::v32i16, 32*20 }
};
// Look for AVX512BW lowering tricks for custom cases.
- if (ST->hasBWI()) {
- if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD,
- LT.second))
+ if (ST->hasBWI())
+ if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX512CostTable[] = {
- { ISD::SHL, MVT::v16i32, 1 },
- { ISD::SRL, MVT::v16i32, 1 },
- { ISD::SRA, MVT::v16i32, 1 },
- { ISD::SHL, MVT::v8i64, 1 },
- { ISD::SRL, MVT::v8i64, 1 },
- { ISD::SRA, MVT::v8i64, 1 },
-
- { ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence.
- { ISD::MUL, MVT::v16i8, 5 }, // extend/pmullw/trunc sequence.
+ { ISD::SHL, MVT::v16i32, 1 },
+ { ISD::SRL, MVT::v16i32, 1 },
+ { ISD::SRA, MVT::v16i32, 1 },
+ { ISD::SHL, MVT::v8i64, 1 },
+ { ISD::SRL, MVT::v8i64, 1 },
+ { ISD::SRA, MVT::v8i64, 1 },
+
+ { ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v16i8, 5 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v16i32, 1 }, // pmulld
+ { ISD::MUL, MVT::v8i64, 8 }, // 3*pmuludq/3*shift/2*add
+
+ // Vectorizing division is a bad idea. See the SSE2 table for more comments.
+ { ISD::SDIV, MVT::v16i32, 16*20 },
+ { ISD::SDIV, MVT::v8i64, 8*20 },
+ { ISD::UDIV, MVT::v16i32, 16*20 },
+ { ISD::UDIV, MVT::v8i64, 8*20 }
};
- if (ST->hasAVX512()) {
+ if (ST->hasAVX512())
if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
@@ -315,10 +317,9 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for XOP lowering tricks.
- if (ST->hasXOP()) {
+ if (ST->hasXOP())
if (const auto *Entry = CostTableLookup(XOPCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX2CustomCostTable[] = {
{ ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
@@ -334,6 +335,8 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::MUL, MVT::v32i8, 17 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v16i8, 7 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v8i32, 1 }, // pmulld
+ { ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add
{ ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
@@ -344,11 +347,10 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for AVX2 lowering tricks for custom cases.
- if (ST->hasAVX2()) {
+ if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVXCustomCostTable[] = {
{ ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence.
@@ -372,24 +374,10 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for AVX2 lowering tricks for custom cases.
- if (ST->hasAVX()) {
+ if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVXCustomCostTable, ISD,
LT.second))
return LT.first * Entry->Cost;
- }
-
- static const CostTblEntry SSE42FloatCostTable[] = {
- { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::f64, 22 }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::v2f64, 22 }, // Nehalem from http://www.agner.org/
- };
-
- if (ST->hasSSE42()) {
- if (const auto *Entry = CostTableLookup(SSE42FloatCostTable, ISD,
- LT.second))
- return LT.first * Entry->Cost;
- }
static const CostTblEntry
SSE2UniformCostTable[] = {
@@ -452,6 +440,17 @@ int X86TTIImpl::getArithmeticInstrCost(
ISD = ISD::MUL;
}
+ static const CostTblEntry SSE42CostTable[] = {
+ { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::f64, 22 }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::v2f64, 22 }, // Nehalem from http://www.agner.org/
+ };
+
+ if (ST->hasSSE42())
+ if (const auto *Entry = CostTableLookup(SSE42CostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+
static const CostTblEntry SSE41CostTable[] = {
{ ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence.
{ ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence.
@@ -471,44 +470,39 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v16i16, 2*14 }, // pblendvb sequence.
{ ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend.
{ ISD::SRA, MVT::v8i32, 2*12 }, // Shift each lane + blend.
+
+ { ISD::MUL, MVT::v4i32, 1 } // pmulld
};
- if (ST->hasSSE41()) {
+ if (ST->hasSSE41())
if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry SSE2CostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
{ ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
- { ISD::SHL, MVT::v32i8, 2*26 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SHL, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v8i32, 2*2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence.
{ ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
{ ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
- { ISD::SRL, MVT::v32i8, 2*26 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRL, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRL, MVT::v8i32, 2*16 }, // Shift each lane + blend.
{ ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence.
{ ISD::SRL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
{ ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
- { ISD::SRA, MVT::v32i8, 2*54 }, // unpacked cmpgtb sequence.
{ ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRA, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRA, MVT::v8i32, 2*16 }, // Shift each lane + blend.
{ ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence.
{ ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence.
{ ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle
+ { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add
{ ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/
@@ -531,10 +525,9 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::UDIV, MVT::v2i64, 2*20 },
};
- if (ST->hasSSE2()) {
+ if (ST->hasSSE2())
if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
@@ -553,307 +546,278 @@ int X86TTIImpl::getArithmeticInstrCost(
// A v4i64 multiply is custom lowered as two split v2i64 vectors that then
// are lowered as a series of long multiplies(3), shifts(3) and adds(2)
// Because we believe v4i64 to be a legal type, we must also include the
- // split factor of two in the cost table. Therefore, the cost here is 16
+ // extract+insert in the cost table. Therefore, the cost here is 18
// instead of 8.
- { ISD::MUL, MVT::v4i64, 16 },
+ { ISD::MUL, MVT::v4i64, 18 },
};
// Look for AVX1 lowering tricks.
- if (ST->hasAVX() && !ST->hasAVX2()) {
- MVT VT = LT.second;
-
- if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, VT))
+ if (ST->hasAVX() && !ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
- // Custom lowering of vectors.
- static const CostTblEntry CustomLowered[] = {
- // A v2i64/v4i64 and multiply is custom lowered as a series of long
- // multiplies(3), shifts(3) and adds(2).
- { ISD::MUL, MVT::v2i64, 8 },
- { ISD::MUL, MVT::v4i64, 8 },
- { ISD::MUL, MVT::v8i64, 8 }
- };
- if (const auto *Entry = CostTableLookup(CustomLowered, ISD, LT.second))
- return LT.first * Entry->Cost;
-
- // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
- // 2x pmuludq, 2x shuffle.
- if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
- !ST->hasSSE41())
- return LT.first * 6;
-
- static const CostTblEntry SSE1FloatCostTable[] = {
+ static const CostTblEntry SSE1CostTable[] = {
{ ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
};
if (ST->hasSSE1())
- if (const auto *Entry = CostTableLookup(SSE1FloatCostTable, ISD,
- LT.second))
+ if (const auto *Entry = CostTableLookup(SSE1CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
+
// Fallback to the default implementation.
return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
}
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
- if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) {
- // 64-bit packed float vectors (v2f32) are widened to type v4f32.
- // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
- static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
- { TTI::SK_Reverse, MVT::v32i8, 1 } // vpermb
- };
-
- if (ST->hasVBMI())
- if (const auto *Entry =
- CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ // 64-bit packed float vectors (v2f32) are widened to type v4f32.
+ // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ // For Broadcasts we are splatting the first element from the first input
+ // register, so only need to reference that input and all the output
+ // registers are the same.
+ if (Kind == TTI::SK_Broadcast)
+ LT.first = 1;
+
+ // We are going to permute multiple sources and the result will be in multiple
+ // destinations. Providing an accurate cost only for splits where the element
+ // type remains the same.
+ if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) {
+ MVT LegalVT = LT.second;
+ if (LegalVT.getVectorElementType().getSizeInBits() ==
+ Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
+ LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
+
+ unsigned VecTySize = DL.getTypeStoreSize(Tp);
+ unsigned LegalVTSize = LegalVT.getStoreSize();
+ // Number of source vectors after legalization:
+ unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
+ // Number of destination vectors after legalization:
+ unsigned NumOfDests = LT.first;
+
+ Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(),
+ LegalVT.getVectorNumElements());
+
+ unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
+ return NumOfShuffles *
+ getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr);
+ }
- static const CostTblEntry AVX512BWShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
- { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
- { TTI::SK_Reverse, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128
- // + 2*pshufb + vinserti64x4
- };
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ }
- if (ST->hasBWI())
- if (const auto *Entry =
- CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ // For 2-input shuffles, we must account for splitting the 2 inputs into many.
+ if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) {
+ // We assume that source and destination have the same vector type.
+ int NumOfDests = LT.first;
+ int NumOfShufflesPerDest = LT.first * 2 - 1;
+ LT.first = NumOfDests * NumOfShufflesPerDest;
+ }
- static const CostTblEntry AVX512ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
- { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
- { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
- { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
- };
+ static const CostTblEntry AVX512VBMIShuffleTbl[] = {
+ { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
+ { TTI::SK_Reverse, MVT::v32i8, 1 }, // vpermb
- if (ST->hasAVX512())
- if (const auto *Entry =
- CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ { TTI::SK_PermuteSingleSrc, MVT::v64i8, 1 }, // vpermb
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, 1 }, // vpermb
- static const CostTblEntry AVX2ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
- { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
- { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
- { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
- { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
- { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
+ { TTI::SK_PermuteTwoSrc, MVT::v64i8, 1 }, // vpermt2b
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, 1 }, // vpermt2b
+ { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 } // vpermt2b
+ };
- { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
- { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb
- };
+ if (ST->hasVBMI())
+ if (const auto *Entry =
+ CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX2())
- if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ static const CostTblEntry AVX512BWShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v32i16, 1 }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v64i8, 1 }, // vpbroadcastb
+
+ { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
+ { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
+ { TTI::SK_Reverse, MVT::v64i8, 6 }, // vextracti64x4 + 2*vperm2i128
+ // + 2*pshufb + vinserti64x4
+
+ { TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v64i8, 8 }, // extend to v32i16
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, 3 }, // vpermw + zext/trunc
+
+ { TTI::SK_PermuteTwoSrc, MVT::v32i16, 1 }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v16i16, 1 }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, 3 }, // zext + vpermt2w + trunc
+ { TTI::SK_PermuteTwoSrc, MVT::v64i8, 19 }, // 6 * v32i8 + 1
+ { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 } // zext + vpermt2w + trunc
+ };
- static const CostTblEntry AVX1ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
- // + vinsertf128
- { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
- // + vinsertf128
-
- { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd
- { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
- { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
- { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
- { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
- { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor
- };
+ if (ST->hasBWI())
+ if (const auto *Entry =
+ CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ static const CostTblEntry AVX512ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v8f64, 1 }, // vbroadcastpd
+ { TTI::SK_Broadcast, MVT::v16f32, 1 }, // vbroadcastps
+ { TTI::SK_Broadcast, MVT::v8i64, 1 }, // vpbroadcastq
+ { TTI::SK_Broadcast, MVT::v16i32, 1 }, // vpbroadcastd
+
+ { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
+ { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
+ { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
+ { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
+
+ { TTI::SK_PermuteSingleSrc, MVT::v8f64, 1 }, // vpermpd
+ { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
+ { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // vpermpd
+ { TTI::SK_PermuteSingleSrc, MVT::v16f32, 1 }, // vpermps
+ { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
+ { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // vpermps
+ { TTI::SK_PermuteSingleSrc, MVT::v8i64, 1 }, // vpermq
+ { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
+ { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // vpermq
+ { TTI::SK_PermuteSingleSrc, MVT::v16i32, 1 }, // vpermd
+ { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
+ { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // vpermd
+ { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
+
+ { TTI::SK_PermuteTwoSrc, MVT::v8f64, 1 }, // vpermt2pd
+ { TTI::SK_PermuteTwoSrc, MVT::v16f32, 1 }, // vpermt2ps
+ { TTI::SK_PermuteTwoSrc, MVT::v8i64, 1 }, // vpermt2q
+ { TTI::SK_PermuteTwoSrc, MVT::v16i32, 1 }, // vpermt2d
+ { TTI::SK_PermuteTwoSrc, MVT::v4f64, 1 }, // vpermt2pd
+ { TTI::SK_PermuteTwoSrc, MVT::v8f32, 1 }, // vpermt2ps
+ { TTI::SK_PermuteTwoSrc, MVT::v4i64, 1 }, // vpermt2q
+ { TTI::SK_PermuteTwoSrc, MVT::v8i32, 1 }, // vpermt2d
+ { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, // vpermt2pd
+ { TTI::SK_PermuteTwoSrc, MVT::v4f32, 1 }, // vpermt2ps
+ { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // vpermt2q
+ { TTI::SK_PermuteTwoSrc, MVT::v4i32, 1 } // vpermt2d
+ };
- static const CostTblEntry SSE41ShuffleTbl[] = {
- { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw
- { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
- { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
- { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
- { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
- { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb
- };
-
- if (ST->hasSSE41())
- if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX512())
+ if (const auto *Entry = CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- static const CostTblEntry SSSE3ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
- { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
+ static const CostTblEntry AVX2ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v4f64, 1 }, // vbroadcastpd
+ { TTI::SK_Broadcast, MVT::v8f32, 1 }, // vbroadcastps
+ { TTI::SK_Broadcast, MVT::v4i64, 1 }, // vpbroadcastq
+ { TTI::SK_Broadcast, MVT::v8i32, 1 }, // vpbroadcastd
+ { TTI::SK_Broadcast, MVT::v16i16, 1 }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v32i8, 1 }, // vpbroadcastb
+
+ { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
+ { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
+ { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
+ { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
+ { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
+
+ { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
+ { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb
+ };
- { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
- { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por
- };
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- if (ST->hasSSSE3())
- if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ static const CostTblEntry AVX1ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Broadcast, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Broadcast, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Broadcast, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Broadcast, MVT::v16i16, 3 }, // vpshuflw + vpshufd + vinsertf128
+ { TTI::SK_Broadcast, MVT::v32i8, 2 }, // vpshufb + vinsertf128
+
+ { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
+ // + vinsertf128
+ { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
+ // + vinsertf128
+
+ { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd
+ { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
+ { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
+ { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
+ { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
+ { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor
+ };
- static const CostTblEntry SSE2ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
- { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
- { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
- { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
- { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + packus
-
- { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
- { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
- { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
- { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
- { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por
- };
-
- if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- static const CostTblEntry SSE1ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
- { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
- };
+ static const CostTblEntry SSE41ShuffleTbl[] = {
+ { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
+ { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb
+ };
- if (ST->hasSSE1())
- if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- } else if (Kind == TTI::SK_PermuteTwoSrc) {
- // We assume that source and destination have the same vector type.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- int NumOfDests = LT.first;
- int NumOfShufflesPerDest = LT.first * 2 - 1;
- int NumOfShuffles = NumOfDests * NumOfShufflesPerDest;
-
- static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v64i8, 1}, // vpermt2b
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 1}, // vpermt2b
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1} // vpermt2b
- };
-
- if (ST->hasVBMI())
- if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return NumOfShuffles * Entry->Cost;
-
- static const CostTblEntry AVX512BWShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v32i16, 1}, // vpermt2w
- {ISD::VECTOR_SHUFFLE, MVT::v16i16, 1}, // vpermt2w
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, // vpermt2w
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 3}, // zext + vpermt2w + trunc
- {ISD::VECTOR_SHUFFLE, MVT::v64i8, 19}, // 6 * v32i8 + 1
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // zext + vpermt2w + trunc
- };
-
- if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return NumOfShuffles * Entry->Cost;
-
- static const CostTblEntry AVX512ShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v8f64, 1}, // vpermt2pd
- {ISD::VECTOR_SHUFFLE, MVT::v16f32, 1}, // vpermt2ps
- {ISD::VECTOR_SHUFFLE, MVT::v8i64, 1}, // vpermt2q
- {ISD::VECTOR_SHUFFLE, MVT::v16i32, 1}, // vpermt2d
- {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vpermt2pd
- {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vpermt2ps
- {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vpermt2q
- {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vpermt2d
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // vpermt2pd
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, // vpermt2ps
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // vpermt2q
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1} // vpermt2d
- };
+ static const CostTblEntry SSSE3ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v8i16, 1 }, // pshufb
+ { TTI::SK_Broadcast, MVT::v16i8, 1 }, // pshufb
- if (ST->hasAVX512())
- if (const auto *Entry =
- CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
- return NumOfShuffles * Entry->Cost;
-
- } else if (Kind == TTI::SK_PermuteSingleSrc) {
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- if (LT.first == 1) {
-
- static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v64i8, 1}, // vpermb
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 1} // vpermb
- };
-
- if (ST->hasVBMI())
- if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return Entry->Cost;
-
- static const CostTblEntry AVX512BWShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v32i16, 1}, // vpermw
- {ISD::VECTOR_SHUFFLE, MVT::v16i16, 1}, // vpermw
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, // vpermw
- {ISD::VECTOR_SHUFFLE, MVT::v64i8, 8}, // extend to v32i16
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 3} // vpermw + zext/trunc
- };
-
- if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return Entry->Cost;
-
- static const CostTblEntry AVX512ShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v8f64, 1}, // vpermpd
- {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vpermpd
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // vpermpd
- {ISD::VECTOR_SHUFFLE, MVT::v16f32, 1}, // vpermps
- {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vpermps
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, // vpermps
- {ISD::VECTOR_SHUFFLE, MVT::v8i64, 1}, // vpermq
- {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vpermq
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // vpermq
- {ISD::VECTOR_SHUFFLE, MVT::v16i32, 1}, // vpermd
- {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vpermd
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, // vpermd
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1} // pshufb
- };
-
- if (ST->hasAVX512())
- if (const auto *Entry =
- CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
- return Entry->Cost;
-
- } else {
- // We are going to permute multiple sources and the result will be in
- // multiple destinations. Providing an accurate cost only for splits where
- // the element type remains the same.
-
- MVT LegalVT = LT.second;
- if (LegalVT.getVectorElementType().getSizeInBits() ==
- Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
- LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
-
- unsigned VecTySize = DL.getTypeStoreSize(Tp);
- unsigned LegalVTSize = LegalVT.getStoreSize();
- // Number of source vectors after legalization:
- unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
- // Number of destination vectors after legalization:
- unsigned NumOfDests = LT.first;
-
- Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(),
- LegalVT.getVectorNumElements());
-
- unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
- return NumOfShuffles *
- getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr);
- }
- }
- }
+ { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
+ { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
+
+ { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por
+ };
+
+ if (ST->hasSSSE3())
+ if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
+
+ static const CostTblEntry SSE2ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v2f64, 1 }, // shufpd
+ { TTI::SK_Broadcast, MVT::v2i64, 1 }, // pshufd
+ { TTI::SK_Broadcast, MVT::v4i32, 1 }, // pshufd
+ { TTI::SK_Broadcast, MVT::v8i16, 2 }, // pshuflw + pshufd
+ { TTI::SK_Broadcast, MVT::v16i8, 3 }, // unpck + pshuflw + pshufd
+
+ { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
+ { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
+ { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
+ { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
+ { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + packus
+
+ { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
+ { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por
+ };
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
+
+ static const CostTblEntry SSE1ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
+ };
+
+ if (ST->hasSSE1())
+ if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
@@ -1623,17 +1587,29 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
return Cost+LT.first;
}
-int X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
- if (Ty->isVectorTy() && IsComplex)
- return NumVectorInstToHideOverhead;
+ // Cost modeling of Strided Access Computation is hidden by the indexing
+ // modes of X86 regardless of the stride value. We dont believe that there
+ // is a difference between constant strided access in gerenal and constant
+ // strided value which is less than or equal to 64.
+ // Even in the case of (loop invariant) stride whose value is not known at
+ // compile time, the address computation will not incur more than one extra
+ // ADD instruction.
+ if (Ty->isVectorTy() && SE) {
+ if (!BaseT::isStridedAccess(Ptr))
+ return NumVectorInstToHideOverhead;
+ if (!BaseT::getConstantStrideStep(SE, Ptr))
+ return 1;
+ }
- return BaseT::getAddressComputationCost(Ty, IsComplex);
+ return BaseT::getAddressComputationCost(Ty, SE, Ptr);
}
int X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index f6bcb9f569e4..c013805f4321 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -71,7 +71,8 @@ public:
unsigned AddressSpace);
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask, unsigned Alignment);
- int getAddressComputationCost(Type *PtrTy, bool IsComplex);
+ int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
+ const SCEV *Ptr);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF);
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 6dd95f8dcd55..6b32f6c31f72 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -36,7 +36,10 @@
using namespace llvm;
-STATISTIC(NumImported, "Number of functions imported");
+STATISTIC(NumImportedFunctions, "Number of functions imported");
+STATISTIC(NumImportedModules, "Number of modules imported from");
+STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
+STATISTIC(NumLiveSymbols, "Number of live symbols in index");
/// Limit on instruction count of imported functions.
static cl::opt<unsigned> ImportInstrLimit(
@@ -69,6 +72,9 @@ static cl::opt<float> ImportColdMultiplier(
static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
cl::desc("Print imported functions"));
+static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
+ cl::desc("Compute dead symbols"));
+
// Temporary allows the function import pass to disable always linking
// referenced discardable symbols.
static cl::opt<bool>
@@ -105,78 +111,6 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
namespace {
-// Return true if the Summary describes a GlobalValue that can be externally
-// referenced, i.e. it does not need renaming (linkage is not local) or renaming
-// is possible (does not have a section for instance).
-static bool canBeExternallyReferenced(const GlobalValueSummary &Summary) {
- if (!Summary.needsRenaming())
- return true;
-
- if (Summary.noRename())
- // Can't externally reference a global that needs renaming if has a section
- // or is referenced from inline assembly, for example.
- return false;
-
- return true;
-}
-
-// Return true if \p GUID describes a GlobalValue that can be externally
-// referenced, i.e. it does not need renaming (linkage is not local) or
-// renaming is possible (does not have a section for instance).
-static bool canBeExternallyReferenced(const ModuleSummaryIndex &Index,
- GlobalValue::GUID GUID) {
- auto Summaries = Index.findGlobalValueSummaryList(GUID);
- if (Summaries == Index.end())
- return true;
- if (Summaries->second.size() != 1)
- // If there are multiple globals with this GUID, then we know it is
- // not a local symbol, and it is necessarily externally referenced.
- return true;
-
- // We don't need to check for the module path, because if it can't be
- // externally referenced and we call it, it is necessarilly in the same
- // module
- return canBeExternallyReferenced(**Summaries->second.begin());
-}
-
-// Return true if the global described by \p Summary can be imported in another
-// module.
-static bool eligibleForImport(const ModuleSummaryIndex &Index,
- const GlobalValueSummary &Summary) {
- if (!canBeExternallyReferenced(Summary))
- // Can't import a global that needs renaming if has a section for instance.
- // FIXME: we may be able to import it by copying it without promotion.
- return false;
-
- // Don't import functions that are not viable to inline.
- if (Summary.isNotViableToInline())
- return false;
-
- // Check references (and potential calls) in the same module. If the current
- // value references a global that can't be externally referenced it is not
- // eligible for import. First check the flag set when we have possible
- // opaque references (e.g. inline asm calls), then check the call and
- // reference sets.
- if (Summary.hasInlineAsmMaybeReferencingInternal())
- return false;
- bool AllRefsCanBeExternallyReferenced =
- llvm::all_of(Summary.refs(), [&](const ValueInfo &VI) {
- return canBeExternallyReferenced(Index, VI.getGUID());
- });
- if (!AllRefsCanBeExternallyReferenced)
- return false;
-
- if (auto *FuncSummary = dyn_cast<FunctionSummary>(&Summary)) {
- bool AllCallsCanBeExternallyReferenced = llvm::all_of(
- FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) {
- return canBeExternallyReferenced(Index, Edge.first.getGUID());
- });
- if (!AllCallsCanBeExternallyReferenced)
- return false;
- }
- return true;
-}
-
/// Given a list of possible callee implementation for a call site, select one
/// that fits the \p Threshold.
///
@@ -214,7 +148,7 @@ selectCallee(const ModuleSummaryIndex &Index,
if (Summary->instCount() > Threshold)
return false;
- if (!eligibleForImport(Index, *Summary))
+ if (Summary->notEligibleToImport())
return false;
return true;
@@ -346,7 +280,8 @@ static void computeImportForFunction(
static void ComputeImportForModule(
const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,
FunctionImporter::ImportMapTy &ImportList,
- StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
+ StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr,
+ const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr) {
// Worklist contains the list of function imported in this module, for which
// we will analyse the callees and may import further down the callgraph.
SmallVector<EdgeInfo, 128> Worklist;
@@ -354,6 +289,10 @@ static void ComputeImportForModule(
// Populate the worklist with the import for the functions in the current
// module
for (auto &GVSummary : DefinedGVSummaries) {
+ if (DeadSymbols && DeadSymbols->count(GVSummary.first)) {
+ DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n");
+ continue;
+ }
auto *Summary = GVSummary.second;
if (auto *AS = dyn_cast<AliasSummary>(Summary))
Summary = &AS->getAliasee();
@@ -393,14 +332,15 @@ void llvm::ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
StringMap<FunctionImporter::ImportMapTy> &ImportLists,
- StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
+ StringMap<FunctionImporter::ExportSetTy> &ExportLists,
+ const DenseSet<GlobalValue::GUID> *DeadSymbols) {
// For each module that has function defined, compute the import/export lists.
for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
auto &ImportList = ImportLists[DefinedGVSummaries.first()];
DEBUG(dbgs() << "Computing import for Module '"
<< DefinedGVSummaries.first() << "'\n");
ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList,
- &ExportLists);
+ &ExportLists, DeadSymbols);
}
// When computing imports we added all GUIDs referenced by anything
@@ -462,6 +402,86 @@ void llvm::ComputeCrossModuleImportForModule(
#endif
}
+DenseSet<GlobalValue::GUID> llvm::computeDeadSymbols(
+ const ModuleSummaryIndex &Index,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ if (!ComputeDead)
+ return DenseSet<GlobalValue::GUID>();
+ if (GUIDPreservedSymbols.empty())
+ // Don't do anything when nothing is live, this is friendly with tests.
+ return DenseSet<GlobalValue::GUID>();
+ DenseSet<GlobalValue::GUID> LiveSymbols = GUIDPreservedSymbols;
+ SmallVector<GlobalValue::GUID, 128> Worklist;
+ Worklist.reserve(LiveSymbols.size() * 2);
+ for (auto GUID : LiveSymbols) {
+ DEBUG(dbgs() << "Live root: " << GUID << "\n");
+ Worklist.push_back(GUID);
+ }
+ // Add values flagged in the index as live roots to the worklist.
+ for (const auto &Entry : Index) {
+ bool IsLiveRoot = llvm::any_of(
+ Entry.second,
+ [&](const std::unique_ptr<llvm::GlobalValueSummary> &Summary) {
+ return Summary->liveRoot();
+ });
+ if (!IsLiveRoot)
+ continue;
+ DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n");
+ Worklist.push_back(Entry.first);
+ }
+
+ while (!Worklist.empty()) {
+ auto GUID = Worklist.pop_back_val();
+ auto It = Index.findGlobalValueSummaryList(GUID);
+ if (It == Index.end()) {
+ DEBUG(dbgs() << "Not in index: " << GUID << "\n");
+ continue;
+ }
+
+ // FIXME: we should only make the prevailing copy live here
+ for (auto &Summary : It->second) {
+ for (auto Ref : Summary->refs()) {
+ auto RefGUID = Ref.getGUID();
+ if (LiveSymbols.insert(RefGUID).second) {
+ DEBUG(dbgs() << "Marking live (ref): " << RefGUID << "\n");
+ Worklist.push_back(RefGUID);
+ }
+ }
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) {
+ for (auto Call : FS->calls()) {
+ auto CallGUID = Call.first.getGUID();
+ if (LiveSymbols.insert(CallGUID).second) {
+ DEBUG(dbgs() << "Marking live (call): " << CallGUID << "\n");
+ Worklist.push_back(CallGUID);
+ }
+ }
+ }
+ if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
+ auto AliaseeGUID = AS->getAliasee().getOriginalName();
+ if (LiveSymbols.insert(AliaseeGUID).second) {
+ DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n");
+ Worklist.push_back(AliaseeGUID);
+ }
+ }
+ }
+ }
+ DenseSet<GlobalValue::GUID> DeadSymbols;
+ DeadSymbols.reserve(
+ std::min(Index.size(), Index.size() - LiveSymbols.size()));
+ for (auto &Entry : Index) {
+ auto GUID = Entry.first;
+ if (!LiveSymbols.count(GUID)) {
+ DEBUG(dbgs() << "Marking dead: " << GUID << "\n");
+ DeadSymbols.insert(GUID);
+ }
+ }
+ DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and "
+ << DeadSymbols.size() << " symbols Dead \n");
+ NumDeadSymbols += DeadSymbols.size();
+ NumLiveSymbols += LiveSymbols.size();
+ return DeadSymbols;
+}
+
/// Compute the set of summaries needed for a ThinLTO backend compilation of
/// \p ModulePath.
void llvm::gatherImportedSummariesForModule(
@@ -625,7 +645,6 @@ Expected<bool> FunctionImporter::importFunctions(
// now, before linking it (otherwise this will be a noop).
if (Error Err = SrcModule->materializeMetadata())
return std::move(Err);
- UpgradeDebugInfo(*SrcModule);
auto &ImportGUIDs = FunctionsToImportPerModule->second;
// Find the globals to import
@@ -698,6 +717,10 @@ Expected<bool> FunctionImporter::importFunctions(
}
}
+ // Upgrade debug info after we're done materializing all the globals and we
+ // have loaded all the required metadata!
+ UpgradeDebugInfo(*SrcModule);
+
// Link in the specified functions.
if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport))
return true;
@@ -717,9 +740,10 @@ Expected<bool> FunctionImporter::importFunctions(
report_fatal_error("Function Import: link error");
ImportedCount += GlobalsToImport.size();
+ NumImportedModules++;
}
- NumImported += ImportedCount;
+ NumImportedFunctions += ImportedCount;
DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
<< DestModule.getModuleIdentifier() << "\n");
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index 2948878cffc4..f4742aaf748f 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -27,9 +27,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndexYAML.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/TrailingObjects.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
@@ -52,6 +55,20 @@ static cl::opt<bool> AvoidReuse(
cl::desc("Try to avoid reuse of byte array addresses using aliases"),
cl::Hidden, cl::init(true));
+static cl::opt<std::string> ClSummaryAction(
+ "lowertypetests-summary-action",
+ cl::desc("What to do with the summary when running this pass"), cl::Hidden);
+
+static cl::opt<std::string> ClReadSummary(
+ "lowertypetests-read-summary",
+ cl::desc("Read summary from given YAML file before running pass"),
+ cl::Hidden);
+
+static cl::opt<std::string> ClWriteSummary(
+ "lowertypetests-write-summary",
+ cl::desc("Write summary to given YAML file after running pass"),
+ cl::Hidden);
+
bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
if (Offset < ByteOffset)
return false;
@@ -66,38 +83,6 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
return Bits.count(BitOffset);
}
-bool BitSetInfo::containsValue(
- const DataLayout &DL,
- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V,
- uint64_t COffset) const {
- if (auto GV = dyn_cast<GlobalObject>(V)) {
- auto I = GlobalLayout.find(GV);
- if (I == GlobalLayout.end())
- return false;
- return containsGlobalOffset(I->second + COffset);
- }
-
- if (auto GEP = dyn_cast<GEPOperator>(V)) {
- APInt APOffset(DL.getPointerSizeInBits(0), 0);
- bool Result = GEP->accumulateConstantOffset(DL, APOffset);
- if (!Result)
- return false;
- COffset += APOffset.getZExtValue();
- return containsValue(DL, GlobalLayout, GEP->getPointerOperand(), COffset);
- }
-
- if (auto Op = dyn_cast<Operator>(V)) {
- if (Op->getOpcode() == Instruction::BitCast)
- return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset);
-
- if (Op->getOpcode() == Instruction::Select)
- return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) &&
- containsValue(DL, GlobalLayout, Op->getOperand(2), COffset);
- }
-
- return false;
-}
-
void BitSetInfo::print(raw_ostream &OS) const {
OS << "offset " << ByteOffset << " size " << BitSize << " align "
<< (1 << AlignLog2);
@@ -204,7 +189,7 @@ struct ByteArrayInfo {
std::set<uint64_t> Bits;
uint64_t BitSize;
GlobalVariable *ByteArray;
- Constant *Mask;
+ GlobalVariable *MaskGlobal;
};
/// A POD-like structure that we use to store a global reference together with
@@ -241,6 +226,9 @@ public:
class LowerTypeTestsModule {
Module &M;
+ // This is for testing purposes only.
+ std::unique_ptr<ModuleSummaryIndex> OwnedSummary;
+
bool LinkerSubsectionsViaSymbols;
Triple::ArchType Arch;
Triple::OSType OS;
@@ -248,6 +236,7 @@ class LowerTypeTestsModule {
IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());
IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
IntegerType *Int32Ty = Type::getInt32Ty(M.getContext());
PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty);
IntegerType *Int64Ty = Type::getInt64Ty(M.getContext());
@@ -259,6 +248,37 @@ class LowerTypeTestsModule {
// Mapping from type identifiers to the call sites that test them.
DenseMap<Metadata *, std::vector<CallInst *>> TypeTestCallSites;
+ /// This structure describes how to lower type tests for a particular type
+ /// identifier. It is either built directly from the global analysis (during
+ /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type
+ /// identifier summaries and external symbol references (in ThinLTO backends).
+ struct TypeIdLowering {
+ TypeTestResolution::Kind TheKind;
+
+ /// All except Unsat: the start address within the combined global.
+ Constant *OffsetedGlobal;
+
+ /// ByteArray, Inline, AllOnes: log2 of the required global alignment
+ /// relative to the start address.
+ Constant *AlignLog2;
+
+ /// ByteArray, Inline, AllOnes: size of the memory region covering members
+ /// of this type identifier as a multiple of 2^AlignLog2.
+ Constant *Size;
+
+ /// ByteArray, Inline, AllOnes: range of the size expressed as a bit width.
+ unsigned SizeBitWidth;
+
+ /// ByteArray: the byte array to test the address against.
+ Constant *TheByteArray;
+
+ /// ByteArray: the bit mask to apply to bytes loaded from the byte array.
+ Constant *BitMask;
+
+ /// Inline: the bit mask to test the address against.
+ Constant *InlineBits;
+ };
+
std::vector<ByteArrayInfo> ByteArrayInfos;
Function *WeakInitializerFn = nullptr;
@@ -268,15 +288,13 @@ class LowerTypeTestsModule {
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
ByteArrayInfo *createByteArray(BitSetInfo &BSI);
void allocateByteArrays();
- Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL,
Value *BitOffset);
void lowerTypeTestCalls(
ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
- Value *
- lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
- Constant *CombinedGlobal,
- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+ Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
+ const TypeIdLowering &TIL);
void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,
ArrayRef<GlobalTypeMember *> Globals);
unsigned getJumpTableEntrySize();
@@ -302,6 +320,7 @@ class LowerTypeTestsModule {
public:
LowerTypeTestsModule(Module &M);
+ ~LowerTypeTestsModule();
bool lower();
};
@@ -380,7 +399,7 @@ ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) {
BAI->Bits = BSI.Bits;
BAI->BitSize = BSI.BitSize;
BAI->ByteArray = ByteArrayGlobal;
- BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty);
+ BAI->MaskGlobal = MaskGlobal;
return BAI;
}
@@ -399,8 +418,9 @@ void LowerTypeTestsModule::allocateByteArrays() {
uint8_t Mask;
BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);
- BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask));
- cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent();
+ BAI->MaskGlobal->replaceAllUsesWith(
+ ConstantExpr::getIntToPtr(ConstantInt::get(Int8Ty, Mask), Int8PtrTy));
+ BAI->MaskGlobal->eraseFromParent();
}
Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes);
@@ -435,101 +455,121 @@ void LowerTypeTestsModule::allocateByteArrays() {
ByteArraySizeBytes = BAB.Bytes.size();
}
-/// Build a test that bit BitOffset is set in BSI, where
-/// BitSetGlobal is a global containing the bits in BSI.
-Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
- ByteArrayInfo *&BAI,
+/// Build a test that bit BitOffset is set in the type identifier that was
+/// lowered to TIL, which must be either an Inline or a ByteArray.
+Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
+ const TypeIdLowering &TIL,
Value *BitOffset) {
- if (BSI.BitSize <= 64) {
+ if (TIL.TheKind == TypeTestResolution::Inline) {
// If the bit set is sufficiently small, we can avoid a load by bit testing
// a constant.
- IntegerType *BitsTy;
- if (BSI.BitSize <= 32)
- BitsTy = Int32Ty;
- else
- BitsTy = Int64Ty;
-
- uint64_t Bits = 0;
- for (auto Bit : BSI.Bits)
- Bits |= uint64_t(1) << Bit;
- Constant *BitsConst = ConstantInt::get(BitsTy, Bits);
- return createMaskedBitTest(B, BitsConst, BitOffset);
+ return createMaskedBitTest(B, TIL.InlineBits, BitOffset);
} else {
- if (!BAI) {
- ++NumByteArraysCreated;
- BAI = createByteArray(BSI);
- }
-
- Constant *ByteArray = BAI->ByteArray;
- Type *Ty = BAI->ByteArray->getValueType();
+ Constant *ByteArray = TIL.TheByteArray;
if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
// Each use of the byte array uses a different alias. This makes the
// backend less likely to reuse previously computed byte array addresses,
// improving the security of the CFI mechanism based on this pass.
- ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,
- GlobalValue::PrivateLinkage, "bits_use",
- ByteArray, &M);
+ ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage,
+ "bits_use", ByteArray, &M);
}
- Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);
+ Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset);
Value *Byte = B.CreateLoad(ByteAddr);
- Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);
+ Value *ByteAndMask =
+ B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty));
return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
}
}
+static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL,
+ Value *V, uint64_t COffset) {
+ if (auto GV = dyn_cast<GlobalObject>(V)) {
+ SmallVector<MDNode *, 2> Types;
+ GV->getMetadata(LLVMContext::MD_type, Types);
+ for (MDNode *Type : Types) {
+ if (Type->getOperand(1) != TypeId)
+ continue;
+ uint64_t Offset =
+ cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+ ->getZExtValue();
+ if (COffset == Offset)
+ return true;
+ }
+ return false;
+ }
+
+ if (auto GEP = dyn_cast<GEPOperator>(V)) {
+ APInt APOffset(DL.getPointerSizeInBits(0), 0);
+ bool Result = GEP->accumulateConstantOffset(DL, APOffset);
+ if (!Result)
+ return false;
+ COffset += APOffset.getZExtValue();
+ return isKnownTypeIdMember(TypeId, DL, GEP->getPointerOperand(), COffset);
+ }
+
+ if (auto Op = dyn_cast<Operator>(V)) {
+ if (Op->getOpcode() == Instruction::BitCast)
+ return isKnownTypeIdMember(TypeId, DL, Op->getOperand(0), COffset);
+
+ if (Op->getOpcode() == Instruction::Select)
+ return isKnownTypeIdMember(TypeId, DL, Op->getOperand(1), COffset) &&
+ isKnownTypeIdMember(TypeId, DL, Op->getOperand(2), COffset);
+ }
+
+ return false;
+}
+
/// Lower a llvm.type.test call to its implementation. Returns the value to
/// replace the call with.
-Value *LowerTypeTestsModule::lowerBitSetCall(
- CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
- Constant *CombinedGlobalIntAddr,
- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
+ const TypeIdLowering &TIL) {
+ if (TIL.TheKind == TypeTestResolution::Unsat)
+ return ConstantInt::getFalse(M.getContext());
+
Value *Ptr = CI->getArgOperand(0);
const DataLayout &DL = M.getDataLayout();
-
- if (BSI.containsValue(DL, GlobalLayout, Ptr))
+ if (isKnownTypeIdMember(TypeId, DL, Ptr, 0))
return ConstantInt::getTrue(M.getContext());
- Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
- CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
-
BasicBlock *InitialBB = CI->getParent();
IRBuilder<> B(CI);
Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
- if (BSI.isSingleOffset())
+ Constant *OffsetedGlobalAsInt =
+ ConstantExpr::getPtrToInt(TIL.OffsetedGlobal, IntPtrTy);
+ if (TIL.TheKind == TypeTestResolution::Single)
return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
- Value *BitOffset;
- if (BSI.AlignLog2 == 0) {
- BitOffset = PtrOffset;
- } else {
- // We need to check that the offset both falls within our range and is
- // suitably aligned. We can check both properties at the same time by
- // performing a right rotate by log2(alignment) followed by an integer
- // comparison against the bitset size. The rotate will move the lower
- // order bits that need to be zero into the higher order bits of the
- // result, causing the comparison to fail if they are nonzero. The rotate
- // also conveniently gives us a bit offset to use during the load from
- // the bitset.
- Value *OffsetSHR =
- B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2));
- Value *OffsetSHL = B.CreateShl(
- PtrOffset,
- ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2));
- BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
- }
-
- Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);
+ // We need to check that the offset both falls within our range and is
+ // suitably aligned. We can check both properties at the same time by
+ // performing a right rotate by log2(alignment) followed by an integer
+ // comparison against the bitset size. The rotate will move the lower
+ // order bits that need to be zero into the higher order bits of the
+ // result, causing the comparison to fail if they are nonzero. The rotate
+ // also conveniently gives us a bit offset to use during the load from
+ // the bitset.
+ Value *OffsetSHR =
+ B.CreateLShr(PtrOffset, ConstantExpr::getZExt(TIL.AlignLog2, IntPtrTy));
+ Value *OffsetSHL = B.CreateShl(
+ PtrOffset, ConstantExpr::getZExt(
+ ConstantExpr::getSub(
+ ConstantInt::get(Int8Ty, DL.getPointerSizeInBits(0)),
+ TIL.AlignLog2),
+ IntPtrTy));
+ Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
+
+ Constant *BitSizeConst = ConstantExpr::getZExt(TIL.Size, IntPtrTy);
Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);
// If the bit set is all ones, testing against it is unnecessary.
- if (BSI.isAllOnes())
+ if (TIL.TheKind == TypeTestResolution::AllOnes)
return OffsetInRange;
TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
@@ -537,7 +577,7 @@ Value *LowerTypeTestsModule::lowerBitSetCall(
// Now that we know that the offset is in range and aligned, load the
// appropriate bit from the bitset.
- Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset);
+ Value *Bit = createBitSetTest(ThenB, TIL, BitOffset);
// The value we want is 0 if we came directly from the initial block
// (having failed the range or alignment checks), or the loaded bit if
@@ -622,11 +662,7 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
void LowerTypeTestsModule::lowerTypeTestCalls(
ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
- Constant *CombinedGlobalIntAddr =
- ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);
- DenseMap<GlobalObject *, uint64_t> GlobalObjLayout;
- for (auto &P : GlobalLayout)
- GlobalObjLayout[P.first->getGlobal()] = P.second;
+ CombinedGlobalAddr = ConstantExpr::getBitCast(CombinedGlobalAddr, Int8PtrTy);
// For each type identifier in this disjoint set...
for (Metadata *TypeId : TypeIds) {
@@ -640,13 +676,43 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
BSI.print(dbgs());
});
- ByteArrayInfo *BAI = nullptr;
+ TypeIdLowering TIL;
+ TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(
+ Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)),
+ TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2);
+ if (BSI.isAllOnes()) {
+ TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single
+ : TypeTestResolution::AllOnes;
+ TIL.SizeBitWidth = (BSI.BitSize <= 256) ? 8 : 32;
+ TIL.Size = ConstantInt::get((BSI.BitSize <= 256) ? Int8Ty : Int32Ty,
+ BSI.BitSize);
+ } else if (BSI.BitSize <= 64) {
+ TIL.TheKind = TypeTestResolution::Inline;
+ TIL.SizeBitWidth = (BSI.BitSize <= 32) ? 5 : 6;
+ TIL.Size = ConstantInt::get(Int8Ty, BSI.BitSize);
+ uint64_t InlineBits = 0;
+ for (auto Bit : BSI.Bits)
+ InlineBits |= uint64_t(1) << Bit;
+ if (InlineBits == 0)
+ TIL.TheKind = TypeTestResolution::Unsat;
+ else
+ TIL.InlineBits = ConstantInt::get(
+ (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits);
+ } else {
+ TIL.TheKind = TypeTestResolution::ByteArray;
+ TIL.SizeBitWidth = (BSI.BitSize <= 256) ? 8 : 32;
+ TIL.Size = ConstantInt::get((BSI.BitSize <= 256) ? Int8Ty : Int32Ty,
+ BSI.BitSize);
+ ++NumByteArraysCreated;
+ ByteArrayInfo *BAI = createByteArray(BSI);
+ TIL.TheByteArray = BAI->ByteArray;
+ TIL.BitMask = BAI->MaskGlobal;
+ }
// Lower each call to llvm.type.test for this type identifier.
for (CallInst *CI : TypeTestCallSites[TypeId]) {
++NumTypeTestCallsLowered;
- Value *Lowered =
- lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalObjLayout);
+ Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
CI->replaceAllUsesWith(Lowered);
CI->eraseFromParent();
}
@@ -1080,6 +1146,22 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
/// Lower all type tests in this module.
LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
+ // Handle the command-line summary arguments. This code is for testing
+ // purposes only, so we handle errors directly.
+ if (!ClSummaryAction.empty()) {
+ OwnedSummary = make_unique<ModuleSummaryIndex>();
+ if (!ClReadSummary.empty()) {
+ ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
+ ": ");
+ auto ReadSummaryFile =
+ ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
+
+ yaml::Input In(ReadSummaryFile->getBuffer());
+ In >> *OwnedSummary;
+ ExitOnErr(errorCodeToError(In.error()));
+ }
+ }
+
Triple TargetTriple(M.getTargetTriple());
LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
Arch = TargetTriple.getArch();
@@ -1087,6 +1169,20 @@ LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
ObjectFormat = TargetTriple.getObjectFormat();
}
+LowerTypeTestsModule::~LowerTypeTestsModule() {
+ if (ClSummaryAction.empty() || ClWriteSummary.empty())
+ return;
+
+ ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
+ ": ");
+ std::error_code EC;
+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
+ ExitOnErr(errorCodeToError(EC));
+
+ yaml::Output Out(OS);
+ Out << *OwnedSummary;
+}
+
bool LowerTypeTestsModule::lower() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f863d192fc2f..b29ed3c87451 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1637,6 +1637,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::cos:
+ case Intrinsic::amdgcn_cos: {
+ Value *SrcSrc;
+ Value *Src = II->getArgOperand(0);
+ if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
+ match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) {
+ // cos(-x) -> cos(x)
+ // cos(fabs(x)) -> cos(x)
+ II->setArgOperand(0, SrcSrc);
+ return II;
+ }
+
+ break;
+ }
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6a7cb0e45c63..1d5528398776 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -514,7 +514,8 @@ struct AddressSanitizer : public FunctionPass {
void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
Value *Addr, uint32_t TypeSize, bool IsWrite,
Value *SizeArgument, bool UseCalls, uint32_t Exp);
- void instrumentUnusualSizeOrAlignment(Instruction *I, Value *Addr,
+ void instrumentUnusualSizeOrAlignment(Instruction *I,
+ Instruction *InsertBefore, Value *Addr,
uint32_t TypeSize, bool IsWrite,
Value *SizeArgument, bool UseCalls,
uint32_t Exp);
@@ -1056,20 +1057,18 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
return nullptr;
*IsWrite = false;
}
- // Only instrument if the mask is constant for now.
- if (isa<ConstantVector>(CI->getOperand(2 + OpOffset))) {
- auto BasePtr = CI->getOperand(0 + OpOffset);
- auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
- *TypeSize = DL.getTypeStoreSizeInBits(Ty);
- if (auto AlignmentConstant =
- dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
- *Alignment = (unsigned)AlignmentConstant->getZExtValue();
- else
- *Alignment = 1; // No alignment guarantees. We probably got Undef
- if (MaybeMask)
- *MaybeMask = CI->getOperand(2 + OpOffset);
- PtrOperand = BasePtr;
- }
+
+ auto BasePtr = CI->getOperand(0 + OpOffset);
+ auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
+ *TypeSize = DL.getTypeStoreSizeInBits(Ty);
+ if (auto AlignmentConstant =
+ dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
+ *Alignment = (unsigned)AlignmentConstant->getZExtValue();
+ else
+ *Alignment = 1; // No alignment guarantees. We probably got Undef
+ if (MaybeMask)
+ *MaybeMask = CI->getOperand(2 + OpOffset);
+ PtrOperand = BasePtr;
}
}
@@ -1130,24 +1129,25 @@ void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
}
static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I,
- Value *Addr, unsigned Alignment,
- unsigned Granularity, uint32_t TypeSize,
- bool IsWrite, Value *SizeArgument,
- bool UseCalls, uint32_t Exp) {
+ Instruction *InsertBefore, Value *Addr,
+ unsigned Alignment, unsigned Granularity,
+ uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls,
+ uint32_t Exp) {
// Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check
// if the data is properly aligned.
if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
TypeSize == 128) &&
(Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
- return Pass->instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr,
- UseCalls, Exp);
- Pass->instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr,
- UseCalls, Exp);
+ return Pass->instrumentAddress(I, InsertBefore, Addr, TypeSize, IsWrite,
+ nullptr, UseCalls, Exp);
+ Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeSize,
+ IsWrite, nullptr, UseCalls, Exp);
}
static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
const DataLayout &DL, Type *IntptrTy,
- ConstantVector *Mask, Instruction *I,
+ Value *Mask, Instruction *I,
Value *Addr, unsigned Alignment,
unsigned Granularity, uint32_t TypeSize,
bool IsWrite, Value *SizeArgument,
@@ -1157,15 +1157,30 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
unsigned Num = VTy->getVectorNumElements();
auto Zero = ConstantInt::get(IntptrTy, 0);
for (unsigned Idx = 0; Idx < Num; ++Idx) {
- // dyn_cast as we might get UndefValue
- auto Masked = dyn_cast<ConstantInt>(Mask->getOperand(Idx));
- if (Masked && Masked->isAllOnesValue()) {
+ Value *InstrumentedAddress = nullptr;
+ Instruction *InsertBefore = I;
+ if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
+ // dyn_cast as we might get UndefValue
+ if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
+ if (Masked->isNullValue())
+ // Mask is constant false, so no instrumentation needed.
+ continue;
+ // If we have a true or undef value, fall through to doInstrumentAddress
+ // with InsertBefore == I
+ }
+ } else {
IRBuilder<> IRB(I);
- auto InstrumentedAddress =
- IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
- doInstrumentAddress(Pass, I, InstrumentedAddress, Alignment, Granularity,
- ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp);
+ Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
+ TerminatorInst *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
+ InsertBefore = ThenTerm;
}
+
+ IRBuilder<> IRB(InsertBefore);
+ InstrumentedAddress =
+ IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
+ doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment,
+ Granularity, ElemTypeSize, IsWrite, SizeArgument,
+ UseCalls, Exp);
}
}
@@ -1220,12 +1235,11 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
unsigned Granularity = 1 << Mapping.Scale;
if (MaybeMask) {
- auto Mask = cast<ConstantVector>(MaybeMask);
- instrumentMaskedLoadOrStore(this, DL, IntptrTy, Mask, I, Addr, Alignment,
- Granularity, TypeSize, IsWrite, nullptr,
- UseCalls, Exp);
+ instrumentMaskedLoadOrStore(this, DL, IntptrTy, MaybeMask, I, Addr,
+ Alignment, Granularity, TypeSize, IsWrite,
+ nullptr, UseCalls, Exp);
} else {
- doInstrumentAddress(this, I, Addr, Alignment, Granularity, TypeSize,
+ doInstrumentAddress(this, I, I, Addr, Alignment, Granularity, TypeSize,
IsWrite, nullptr, UseCalls, Exp);
}
}
@@ -1342,9 +1356,9 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
// to report the actual access size.
void AddressSanitizer::instrumentUnusualSizeOrAlignment(
- Instruction *I, Value *Addr, uint32_t TypeSize, bool IsWrite,
- Value *SizeArgument, bool UseCalls, uint32_t Exp) {
- IRBuilder<> IRB(I);
+ Instruction *I, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize,
+ bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) {
+ IRBuilder<> IRB(InsertBefore);
Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
if (UseCalls) {
@@ -1358,8 +1372,8 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment(
Value *LastByte = IRB.CreateIntToPtr(
IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
Addr->getType());
- instrumentAddress(I, I, Addr, 8, IsWrite, Size, false, Exp);
- instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false, Exp);
+ instrumentAddress(I, InsertBefore, Addr, 8, IsWrite, Size, false, Exp);
+ instrumentAddress(I, InsertBefore, LastByte, 8, IsWrite, Size, false, Exp);
}
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 9485bfd7c296..0137378b828b 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1572,6 +1572,13 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Assign value numbers to the new instructions.
for (Instruction *I : NewInsts) {
+ // Instructions that have been inserted in predecessor(s) to materialize
+ // the load address do not retain their original debug locations. Doing
+ // so could lead to confusing (but correct) source attributions.
+ // FIXME: How do we retain source locations without causing poor debugging
+ // behavior?
+ I->setDebugLoc(DebugLoc());
+
// FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
// ordering issues. If a block hasn't been processed yet, we would be
@@ -1601,8 +1608,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range))
NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
- // Transfer DebugLoc.
- NewLoad->setDebugLoc(LI->getDebugLoc());
+ // We do not propagate the old load's debug location, because the new
+ // load now lives in a different BB, and we want to avoid a jumpy line
+ // table.
+ // FIXME: How do we retain source locations without causing poor debugging
+ // behavior?
// Add the newly created load.
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 1cc5c8f0da84..6ef9d0561322 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -408,6 +408,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
CurAST->deleteValue(&I);
I.eraseFromParent();
}
+ Changed = true;
continue;
}
@@ -766,6 +767,14 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
// Move the new node to the Preheader, before its terminator.
I.moveBefore(Preheader->getTerminator());
+ // Do not retain debug locations when we are moving instructions to different
+ // basic blocks, because we want to avoid jumpy line tables. Calls, however,
+ // need to retain their debug locs because they may be inlined.
+ // FIXME: How do we retain source locations without causing poor debugging
+ // behavior?
+ if (!isa<CallInst>(I))
+ I.setDebugLoc(DebugLoc());
+
if (isa<LoadInst>(I))
++NumMovedLoads;
else if (isa<CallInst>(I))
@@ -911,14 +920,23 @@ bool llvm::promoteLoopAccessesToScalars(
//
// If at least one store is guaranteed to execute, both properties are
// satisfied, and promotion is legal.
+ //
// This, however, is not a necessary condition. Even if no store/load is
- // guaranteed to execute, we can still establish these properties:
- // (p1) by proving that hoisting the load into the preheader is
- // safe (i.e. proving dereferenceability on all paths through the loop). We
+ // guaranteed to execute, we can still establish these properties.
+ // We can establish (p1) by proving that hoisting the load into the preheader
+ // is safe (i.e. proving dereferenceability on all paths through the loop). We
// can use any access within the alias set to prove dereferenceability,
// since they're all must alias.
- // (p2) by proving the memory is thread-local, so the memory model
+ //
+ // There are two ways establish (p2):
+ // a) Prove the location is thread-local. In this case the memory model
// requirement does not apply, and stores are safe to insert.
+ // b) Prove a store dominates every exit block. In this case, if an exit
+ // blocks is reached, the original dynamic path would have taken us through
+ // the store, so inserting a store into the exit block is safe. Note that this
+ // is different from the store being guaranteed to execute. For instance,
+ // if an exception is thrown on the first iteration of the loop, the original
+ // store is never executed, but the exit blocks are not executed either.
bool DereferenceableInPH = false;
bool SafeToInsertStore = false;
@@ -1000,6 +1018,17 @@ bool llvm::promoteLoopAccessesToScalars(
}
}
+ // If a store dominates all exit blocks, it is safe to sink.
+ // As explained above, if an exit block was executed, a dominating
+ // store must have been been executed at least once, so we are not
+ // introducing stores on paths that did not have them.
+ // Note that this only looks at explicit exit blocks. If we ever
+ // start sinking stores into unwind edges (see above), this will break.
+ if (!SafeToInsertStore)
+ SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
+ return DT->dominates(Store->getParent(), Exit);
+ });
+
// If the store is not guaranteed to execute, we may still get
// deref info through it.
if (!DereferenceableInPH) {
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index fd167db11789..2743574ecca6 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -997,7 +997,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
/// Check if the given conditional branch is based on the comparison between
/// a variable and zero, and if the variable is non-zero, the control yields to
/// the loop entry. If the branch matches the behavior, the variable involved
-/// in the comparion is returned. This function will be called to see if the
+/// in the comparison is returned. This function will be called to see if the
/// precondition and postcondition of the loop are in desirable form.
static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
if (!BI || !BI->isConditional())
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index 90309d7ebba6..f64354497771 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -283,8 +283,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// sinked.
for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) {
Instruction *I = &*II++;
- if (!L.hasLoopInvariantOperands(I) ||
- !canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr))
+ if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr))
continue;
if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
Changed = true;
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index 440e36767edf..678d02e05d42 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -56,12 +56,9 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
if (!isPerformingImport() && !isModuleExporting())
return false;
- // If we are exporting, we need to see whether this value is marked
- // as NoRename in the summary. If we are importing, we may not have
- // a summary in the distributed backend case (only summaries for values
- // importes as defs, not references, are included in the index passed
- // to the distributed backends).
if (isPerformingImport()) {
+ assert((!GlobalsToImport->count(SGV) || !isNonRenamableLocal(*SGV)) &&
+ "Attempting to promote non-renamable local");
// We don't know for sure yet if we are importing this value (as either
// a reference or a def), since we are simply walking all values in the
// module. But by necessity if we end up importing it and it is local,
@@ -77,13 +74,28 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
assert(Summaries->second.size() == 1 && "Local has more than one summary");
auto Linkage = Summaries->second.front()->linkage();
if (!GlobalValue::isLocalLinkage(Linkage)) {
- assert(!Summaries->second.front()->noRename());
+ assert(!isNonRenamableLocal(*SGV) &&
+ "Attempting to promote non-renamable local");
return true;
}
return false;
}
+#ifndef NDEBUG
+bool FunctionImportGlobalProcessing::isNonRenamableLocal(
+ const GlobalValue &GV) const {
+ if (!GV.hasLocalLinkage())
+ return false;
+ // This needs to stay in sync with the logic in buildModuleSummaryIndex.
+ if (GV.hasSection())
+ return true;
+ if (Used.count(const_cast<GlobalValue *>(&GV)))
+ return true;
+ return false;
+}
+#endif
+
std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV,
bool DoPromote) {
// For locals that must be promoted to global scope, ensure that
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8cde0c4cd607..31daba2248aa 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6785,22 +6785,19 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
return Cost;
}
-/// \brief Check whether the address computation for a non-consecutive memory
-/// access looks like an unlikely candidate for being merged into the indexing
-/// mode.
+/// \brief Gets Address Access SCEV after verifying that the access pattern
+/// is loop invariant except the induction variable dependence.
///
-/// We look for a GEP which has one index that is an induction variable and all
-/// other indices are loop invariant. If the stride of this access is also
-/// within a small bound we decide that this address computation can likely be
-/// merged into the addressing mode.
-/// In all other cases, we identify the address computation as complex.
-static bool isLikelyComplexAddressComputation(Value *Ptr,
- LoopVectorizationLegality *Legal,
- ScalarEvolution *SE,
- const Loop *TheLoop) {
+/// This SCEV can be sent to the Target in order to estimate the address
+/// calculation cost.
+static const SCEV *getAddressAccessSCEV(
+ Value *Ptr,
+ LoopVectorizationLegality *Legal,
+ ScalarEvolution *SE,
+ const Loop *TheLoop) {
auto *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (!Gep)
- return true;
+ return nullptr;
// We are looking for a gep with all loop invariant indices except for one
// which should be an induction variable.
@@ -6809,33 +6806,11 @@ static bool isLikelyComplexAddressComputation(Value *Ptr,
Value *Opd = Gep->getOperand(i);
if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
!Legal->isInductionVariable(Opd))
- return true;
+ return nullptr;
}
- // Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step
- // can likely be merged into the address computation.
- unsigned MaxMergeDistance = 64;
-
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr));
- if (!AddRec)
- return true;
-
- // Check the step is constant.
- const SCEV *Step = AddRec->getStepRecurrence(*SE);
- // Calculate the pointer stride and check if it is consecutive.
- const auto *C = dyn_cast<SCEVConstant>(Step);
- if (!C)
- return true;
-
- const APInt &APStepVal = C->getAPInt();
-
- // Huge step value - give up.
- if (APStepVal.getBitWidth() > 64)
- return true;
-
- int64_t StepVal = APStepVal.getSExtValue();
-
- return StepVal > MaxMergeDistance;
+ // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV.
+ return SE->getSCEV(Ptr);
}
static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
@@ -7063,12 +7038,12 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
unsigned Cost = 0;
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
- // True if the memory instruction's address computation is complex.
- bool IsComplexComputation =
- isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
+ // Figure out whether the access is strided and get the stride value
+ // if it's known in compile time
+ const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop);
// Get the cost of the scalar memory instruction and address computation.
- Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
+ Cost += VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
Cost += VF *
TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment, AS);
diff --git a/test/Analysis/CostModel/AArch64/bswap.ll b/test/Analysis/CostModel/AArch64/bswap.ll
new file mode 100644
index 000000000000..a97127a631d8
--- /dev/null
+++ b/test/Analysis/CostModel/AArch64/bswap.ll
@@ -0,0 +1,70 @@
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu < %s | FileCheck %s
+
+; Verify the cost of bswap instructions.
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+
+declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
+declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
+
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+
+define i16 @bswap_i16(i16 %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_i16':
+; CHECK: Found an estimated cost of 1 for instruction: %bswap
+ %bswap = tail call i16 @llvm.bswap.i16(i16 %a)
+ ret i16 %bswap
+}
+
+define i32 @bswap_i32(i32 %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_i32':
+; CHECK: Found an estimated cost of 1 for instruction: %bswap
+ %bswap = tail call i32 @llvm.bswap.i32(i32 %a)
+ ret i32 %bswap
+}
+
+define i64 @bswap_i64(i64 %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_i64':
+; CHECK: Found an estimated cost of 1 for instruction: %bswap
+ %bswap = tail call i64 @llvm.bswap.i64(i64 %a)
+ ret i64 %bswap
+}
+
+define <2 x i32> @bswap_v2i32(<2 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_v2i32':
+; CHECK: Found an estimated cost of 8 for instruction: %bswap
+ %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+ ret <2 x i32> %bswap
+}
+
+define <4 x i16> @bswap_v4i16(<4 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_v4i16':
+; CHECK: Found an estimated cost of 22 for instruction: %bswap
+ %bswap = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %a)
+ ret <4 x i16> %bswap
+}
+
+define <2 x i64> @bswap_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_v2i64':
+; CHECK: Found an estimated cost of 8 for instruction: %bswap
+ %bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
+ ret <2 x i64> %bswap
+}
+
+define <4 x i32> @bswap_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_v4i32':
+; CHECK: Found an estimated cost of 22 for instruction: %bswap
+ %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
+ ret <4 x i32> %bswap
+}
+
+define <8 x i16> @bswap_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_v8i16':
+; CHECK: Found an estimated cost of 50 for instruction: %bswap
+ %bswap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
+ ret <8 x i16> %bswap
+}
diff --git a/test/Analysis/CostModel/AArch64/falkor.ll b/test/Analysis/CostModel/AArch64/falkor.ll
new file mode 100644
index 000000000000..e9563191f077
--- /dev/null
+++ b/test/Analysis/CostModel/AArch64/falkor.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -cost-model -analyze -mcpu=falkor | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: vectorInstrCost
+define void @vectorInstrCost() {
+
+ ; Vector extracts - extracting the first element should have a zero cost;
+ ; all other elements should have a cost of two.
+ ;
+ ; CHECK: cost of 0 {{.*}} extractelement <2 x i64> undef, i32 0
+ ; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 1
+ %t1 = extractelement <2 x i64> undef, i32 0
+ %t2 = extractelement <2 x i64> undef, i32 1
+
+ ; Vector inserts - inserting the first element should have a zero cost; all
+ ; other elements should have a cost of two.
+ ;
+ ; CHECK: cost of 0 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 0
+ ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 1
+ %t3 = insertelement <2 x i64> undef, i64 undef, i32 0
+ %t4 = insertelement <2 x i64> undef, i64 undef, i32 1
+
+ ret void
+}
diff --git a/test/Analysis/CostModel/AArch64/gep.ll b/test/Analysis/CostModel/AArch64/gep.ll
index f3d83c133027..08bfc3d21238 100644
--- a/test/Analysis/CostModel/AArch64/gep.ll
+++ b/test/Analysis/CostModel/AArch64/gep.ll
@@ -1,9 +1,9 @@
-; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mcpu=kryo < %s | FileCheck %s
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
-define i8 @test1(i8* %p, i32 %i) {
+define i8 @test1(i8* %p) {
; CHECK-LABEL: test1
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 0
@@ -11,7 +11,7 @@ define i8 @test1(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test2(i16* %p, i32 %i) {
+define i16 @test2(i16* %p) {
; CHECK-LABEL: test2
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 0
@@ -19,7 +19,7 @@ define i16 @test2(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test3(i32* %p, i32 %i) {
+define i32 @test3(i32* %p) {
; CHECK-LABEL: test3
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 0
@@ -27,7 +27,7 @@ define i32 @test3(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test4(i64* %p, i32 %i) {
+define i64 @test4(i64* %p) {
; CHECK-LABEL: test4
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 0
@@ -35,7 +35,7 @@ define i64 @test4(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test5(i8* %p, i32 %i) {
+define i8 @test5(i8* %p) {
; CHECK-LABEL: test5
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 1024
@@ -43,7 +43,7 @@ define i8 @test5(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test6(i16* %p, i32 %i) {
+define i16 @test6(i16* %p) {
; CHECK-LABEL: test6
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 1024
@@ -51,7 +51,7 @@ define i16 @test6(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test7(i32* %p, i32 %i) {
+define i32 @test7(i32* %p) {
; CHECK-LABEL: test7
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 1024
@@ -59,7 +59,7 @@ define i32 @test7(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test8(i64* %p, i32 %i) {
+define i64 @test8(i64* %p) {
; CHECK-LABEL: test8
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 1024
@@ -67,7 +67,7 @@ define i64 @test8(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test9(i8* %p, i32 %i) {
+define i8 @test9(i8* %p) {
; CHECK-LABEL: test9
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 4096
@@ -75,7 +75,7 @@ define i8 @test9(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test10(i16* %p, i32 %i) {
+define i16 @test10(i16* %p) {
; CHECK-LABEL: test10
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 4096
@@ -83,7 +83,7 @@ define i16 @test10(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test11(i32* %p, i32 %i) {
+define i32 @test11(i32* %p) {
; CHECK-LABEL: test11
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 4096
@@ -91,7 +91,7 @@ define i32 @test11(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test12(i64* %p, i32 %i) {
+define i64 @test12(i64* %p) {
; CHECK-LABEL: test12
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 4096
@@ -99,7 +99,7 @@ define i64 @test12(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test13(i8* %p, i32 %i) {
+define i8 @test13(i8* %p) {
; CHECK-LABEL: test13
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 -64
@@ -107,7 +107,7 @@ define i8 @test13(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test14(i16* %p, i32 %i) {
+define i16 @test14(i16* %p) {
; CHECK-LABEL: test14
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 -64
@@ -115,7 +115,7 @@ define i16 @test14(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test15(i32* %p, i32 %i) {
+define i32 @test15(i32* %p) {
; CHECK-LABEL: test15
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 -64
@@ -123,7 +123,7 @@ define i32 @test15(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test16(i64* %p, i32 %i) {
+define i64 @test16(i64* %p) {
; CHECK-LABEL: test16
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 -64
@@ -131,7 +131,7 @@ define i64 @test16(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test17(i8* %p, i32 %i) {
+define i8 @test17(i8* %p) {
; CHECK-LABEL: test17
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 -1024
@@ -139,7 +139,7 @@ define i8 @test17(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test18(i16* %p, i32 %i) {
+define i16 @test18(i16* %p) {
; CHECK-LABEL: test18
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 -1024
@@ -147,7 +147,7 @@ define i16 @test18(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test19(i32* %p, i32 %i) {
+define i32 @test19(i32* %p) {
; CHECK-LABEL: test19
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 -1024
@@ -155,7 +155,7 @@ define i32 @test19(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test20(i64* %p, i32 %i) {
+define i64 @test20(i64* %p) {
; CHECK-LABEL: test20
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 -1024
@@ -195,7 +195,7 @@ define i64 @test24(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test25(i8* %p, i32 %i) {
+define i8 @test25(i8* %p) {
; CHECK-LABEL: test25
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 -128
@@ -203,7 +203,7 @@ define i8 @test25(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test26(i16* %p, i32 %i) {
+define i16 @test26(i16* %p) {
; CHECK-LABEL: test26
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 -128
@@ -211,7 +211,7 @@ define i16 @test26(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test27(i32* %p, i32 %i) {
+define i32 @test27(i32* %p) {
; CHECK-LABEL: test27
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 -128
@@ -219,7 +219,7 @@ define i32 @test27(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test28(i64* %p, i32 %i) {
+define i64 @test28(i64* %p) {
; CHECK-LABEL: test28
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 -128
@@ -227,7 +227,7 @@ define i64 @test28(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test29(i8* %p, i32 %i) {
+define i8 @test29(i8* %p) {
; CHECK-LABEL: test29
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 -256
@@ -235,7 +235,7 @@ define i8 @test29(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test30(i16* %p, i32 %i) {
+define i16 @test30(i16* %p) {
; CHECK-LABEL: test30
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 -256
@@ -243,7 +243,7 @@ define i16 @test30(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test31(i32* %p, i32 %i) {
+define i32 @test31(i32* %p) {
; CHECK-LABEL: test31
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 -256
@@ -251,7 +251,7 @@ define i32 @test31(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test32(i64* %p, i32 %i) {
+define i64 @test32(i64* %p) {
; CHECK-LABEL: test32
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 -256
@@ -259,7 +259,7 @@ define i64 @test32(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test33(i8* %p, i32 %i) {
+define i8 @test33(i8* %p) {
; CHECK-LABEL: test33
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 -512
@@ -267,7 +267,7 @@ define i8 @test33(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test34(i16* %p, i32 %i) {
+define i16 @test34(i16* %p) {
; CHECK-LABEL: test34
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 -512
@@ -275,7 +275,7 @@ define i16 @test34(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test35(i32* %p, i32 %i) {
+define i32 @test35(i32* %p) {
; CHECK-LABEL: test35
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 -512
@@ -283,7 +283,7 @@ define i32 @test35(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test36(i64* %p, i32 %i) {
+define i64 @test36(i64* %p) {
; CHECK-LABEL: test36
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 -512
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll
index 7319efb413d6..b7a615f55cde 100644
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -436,7 +436,7 @@ define i32 @mul(i32 %arg) {
%A = mul <2 x i64> undef, undef
; SSSE3: cost of 16 {{.*}} %B = mul
; SSE42: cost of 16 {{.*}} %B = mul
- ; AVX: cost of 16 {{.*}} %B = mul
+ ; AVX: cost of 18 {{.*}} %B = mul
; AVX2: cost of 8 {{.*}} %B = mul
; AVX512F: cost of 8 {{.*}} %B = mul
; AVX512BW: cost of 8 {{.*}} %B = mul
@@ -444,7 +444,7 @@ define i32 @mul(i32 %arg) {
%B = mul <4 x i64> undef, undef
; SSSE3: cost of 32 {{.*}} %C = mul
; SSE42: cost of 32 {{.*}} %C = mul
- ; AVX: cost of 32 {{.*}} %C = mul
+ ; AVX: cost of 36 {{.*}} %C = mul
; AVX2: cost of 16 {{.*}} %C = mul
; AVX512F: cost of 8 {{.*}} %C = mul
; AVX512BW: cost of 8 {{.*}} %C = mul
diff --git a/test/Analysis/CostModel/X86/shuffle-broadcast.ll b/test/Analysis/CostModel/X86/shuffle-broadcast.ll
index a829a47f89f2..86cf7569a728 100644
--- a/test/Analysis/CostModel/X86/shuffle-broadcast.ll
+++ b/test/Analysis/CostModel/X86/shuffle-broadcast.ll
@@ -18,14 +18,150 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
%V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
; SSE: cost of 1 {{.*}} %V256 = shufflevector
- ; AVX: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
%V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
; SSE: cost of 1 {{.*}} %V512 = shufflevector
- ; AVX: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V512 = shufflevector
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
%V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
ret void
}
+
+; CHECK-LABEL: 'test_vXi64'
+define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) {
+ ; SSE: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXf32'
+define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) {
+ ; SSE: cost of 1 {{.*}} %V64 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V64 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V64 = shufflevector
+ %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi32'
+define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) {
+ ; SSE: cost of 1 {{.*}} %V64 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V64 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V64 = shufflevector
+ %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi16'
+define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) {
+ ; SSE2: cost of 2 {{.*}} %V128 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+
+ ; SSE2: cost of 2 {{.*}} %V256 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V256 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 3 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+
+ ; SSE2: cost of 2 {{.*}} %V512 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V512 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 3 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512F: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi8'
+define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) {
+ ; SSE2: cost of 3 {{.*}} %V128 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+
+ ; SSE2: cost of 3 {{.*}} %V256 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V256 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+
+ ; SSE2: cost of 3 {{.*}} %V512 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V512 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512F: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+
+ ret void
+}
diff --git a/test/Analysis/CostModel/X86/vdiv-cost.ll b/test/Analysis/CostModel/X86/vdiv-cost.ll
index c8e4557cbefd..a45bb4b3d0d5 100644
--- a/test/Analysis/CostModel/X86/vdiv-cost.ll
+++ b/test/Analysis/CostModel/X86/vdiv-cost.ll
@@ -1,13 +1,20 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512DQ
define <4 x i32> @test1(<4 x i32> %a) {
%div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
ret <4 x i32> %div
; CHECK: 'Cost Model Analysis' for function 'test1':
-; SSE2: Found an estimated cost of 15 for instruction: %div
-; AVX2: Found an estimated cost of 15 for instruction: %div
+; SSE: Found an estimated cost of 15 for instruction: %div
+; AVX: Found an estimated cost of 15 for instruction: %div
+; AVX512: Found an estimated cost of 15 for instruction: %div
}
define <8 x i32> @test2(<8 x i32> %a) {
@@ -15,8 +22,10 @@ define <8 x i32> @test2(<8 x i32> %a) {
ret <8 x i32> %div
; CHECK: 'Cost Model Analysis' for function 'test2':
-; SSE2: Found an estimated cost of 30 for instruction: %div
+; SSE: Found an estimated cost of 30 for instruction: %div
+; AVX1: Found an estimated cost of 30 for instruction: %div
; AVX2: Found an estimated cost of 15 for instruction: %div
+; AVX512: Found an estimated cost of 15 for instruction: %div
}
define <8 x i16> @test3(<8 x i16> %a) {
@@ -24,8 +33,9 @@ define <8 x i16> @test3(<8 x i16> %a) {
ret <8 x i16> %div
; CHECK: 'Cost Model Analysis' for function 'test3':
-; SSE2: Found an estimated cost of 6 for instruction: %div
-; AVX2: Found an estimated cost of 6 for instruction: %div
+; SSE: Found an estimated cost of 6 for instruction: %div
+; AVX: Found an estimated cost of 6 for instruction: %div
+; AVX512: Found an estimated cost of 6 for instruction: %div
}
define <16 x i16> @test4(<16 x i16> %a) {
@@ -33,8 +43,10 @@ define <16 x i16> @test4(<16 x i16> %a) {
ret <16 x i16> %div
; CHECK: 'Cost Model Analysis' for function 'test4':
-; SSE2: Found an estimated cost of 12 for instruction: %div
+; SSE: Found an estimated cost of 12 for instruction: %div
+; AVX1: Found an estimated cost of 12 for instruction: %div
; AVX2: Found an estimated cost of 6 for instruction: %div
+; AVX512: Found an estimated cost of 6 for instruction: %div
}
define <8 x i16> @test5(<8 x i16> %a) {
@@ -42,8 +54,9 @@ define <8 x i16> @test5(<8 x i16> %a) {
ret <8 x i16> %div
; CHECK: 'Cost Model Analysis' for function 'test5':
-; SSE2: Found an estimated cost of 6 for instruction: %div
-; AVX2: Found an estimated cost of 6 for instruction: %div
+; SSE: Found an estimated cost of 6 for instruction: %div
+; AVX: Found an estimated cost of 6 for instruction: %div
+; AVX512: Found an estimated cost of 6 for instruction: %div
}
define <16 x i16> @test6(<16 x i16> %a) {
@@ -51,8 +64,10 @@ define <16 x i16> @test6(<16 x i16> %a) {
ret <16 x i16> %div
; CHECK: 'Cost Model Analysis' for function 'test6':
-; SSE2: Found an estimated cost of 12 for instruction: %div
+; SSE: Found an estimated cost of 12 for instruction: %div
+; AVX1: Found an estimated cost of 12 for instruction: %div
; AVX2: Found an estimated cost of 6 for instruction: %div
+; AVX512: Found an estimated cost of 6 for instruction: %div
}
define <16 x i8> @test7(<16 x i8> %a) {
@@ -60,8 +75,9 @@ define <16 x i8> @test7(<16 x i8> %a) {
ret <16 x i8> %div
; CHECK: 'Cost Model Analysis' for function 'test7':
-; SSE2: Found an estimated cost of 320 for instruction: %div
-; AVX2: Found an estimated cost of 320 for instruction: %div
+; SSE: Found an estimated cost of 320 for instruction: %div
+; AVX: Found an estimated cost of 320 for instruction: %div
+; AVX512: Found an estimated cost of 320 for instruction: %div
}
define <4 x i32> @test8(<4 x i32> %a) {
@@ -69,8 +85,9 @@ define <4 x i32> @test8(<4 x i32> %a) {
ret <4 x i32> %div
; CHECK: 'Cost Model Analysis' for function 'test8':
-; SSE2: Found an estimated cost of 19 for instruction: %div
-; AVX2: Found an estimated cost of 15 for instruction: %div
+; SSE: Found an estimated cost of 19 for instruction: %div
+; AVX: Found an estimated cost of 15 for instruction: %div
+; AVX512: Found an estimated cost of 15 for instruction: %div
}
define <8 x i32> @test9(<8 x i32> %a) {
@@ -78,8 +95,10 @@ define <8 x i32> @test9(<8 x i32> %a) {
ret <8 x i32> %div
; CHECK: 'Cost Model Analysis' for function 'test9':
-; SSE2: Found an estimated cost of 38 for instruction: %div
+; SSE: Found an estimated cost of 38 for instruction: %div
+; AVX1: Found an estimated cost of 38 for instruction: %div
; AVX2: Found an estimated cost of 15 for instruction: %div
+; AVX512: Found an estimated cost of 15 for instruction: %div
}
define <8 x i32> @test10(<8 x i32> %a) {
@@ -87,6 +106,17 @@ define <8 x i32> @test10(<8 x i32> %a) {
ret <8 x i32> %div
; CHECK: 'Cost Model Analysis' for function 'test10':
-; SSE2: Found an estimated cost of 160 for instruction: %div
-; AVX2: Found an estimated cost of 160 for instruction: %div
+; SSE: Found an estimated cost of 160 for instruction: %div
+; AVX: Found an estimated cost of 160 for instruction: %div
+; AVX512: Found an estimated cost of 160 for instruction: %div
+}
+
+define <16 x i32> @test11(<16 x i32> %a) {
+ %div = sdiv <16 x i32> %a, <i32 8, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7, i32 8, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+ ret <16 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test11':
+; SSE: Found an estimated cost of 320 for instruction: %div
+; AVX: Found an estimated cost of 320 for instruction: %div
+; AVX512: Found an estimated cost of 320 for instruction: %div
}
diff --git a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index e53e40b57e1d..888164df75f5 100644
--- a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -1,9 +1,12 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK -check-prefix=XOP --check-prefix=XOPAVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK -check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
; Verify the cost of vector arithmetic shift right instructions.
@@ -17,6 +20,7 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <2 x i64> %a, %b
ret <2 x i64> %shift
@@ -28,17 +32,31 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <4 x i64> %a, %b
ret <4 x i64> %shift
}
+define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64':
+; SSE2: Found an estimated cost of 48 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 8 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <8 x i64> %a, %b
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <4 x i32> %a, %b
@@ -51,18 +69,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <8 x i32> %a, %b
ret <8 x i32> %shift
}
+define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <16 x i32> %a, %b
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <8 x i16> %a, %b
ret <8 x i16> %shift
@@ -74,17 +107,32 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <16 x i16> %a, %b
ret <16 x i16> %shift
}
+define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <32 x i16> %a, %b
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, %b
ret <16 x i8> %shift
@@ -96,11 +144,26 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE41: Found an estimated cost of 48 for instruction: %shift
; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512F: Found an estimated cost of 24 for instruction: %shift
+; AVX512BW: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, %b
ret <32 x i8> %shift
}
+define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8':
+; SSE2: Found an estimated cost of 216 for instruction: %shift
+; SSE41: Found an estimated cost of 96 for instruction: %shift
+; AVX: Found an estimated cost of 96 for instruction: %shift
+; AVX2: Found an estimated cost of 48 for instruction: %shift
+; AVX512F: Found an estimated cost of 48 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <64 x i8> %a, %b
+ ret <64 x i8> %shift
+}
+
;
; Uniform Variable Shifts
;
@@ -111,6 +174,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
%shift = ashr <2 x i64> %a, %splat
@@ -123,18 +187,33 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = ashr <4 x i64> %a, %splat
ret <4 x i64> %shift
}
+define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64':
+; SSE2: Found an estimated cost of 48 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 8 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+ %shift = ashr <8 x i64> %a, %splat
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -148,6 +227,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -155,12 +235,27 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
ret <8 x i32> %shift
}
+define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %shift = ashr <16 x i32> %a, %splat
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i16> %a, %splat
@@ -173,18 +268,34 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i16> %a, %splat
ret <16 x i16> %shift
}
+define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %shift = ashr <32 x i16> %a, %splat
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i8> %a, %splat
@@ -197,12 +308,27 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE41: Found an estimated cost of 48 for instruction: %shift
; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
%shift = ashr <32 x i8> %a, %splat
ret <32 x i8> %shift
}
+define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8':
+; SSE2: Found an estimated cost of 216 for instruction: %shift
+; SSE41: Found an estimated cost of 96 for instruction: %shift
+; AVX: Found an estimated cost of 96 for instruction: %shift
+; AVX2: Found an estimated cost of 48 for instruction: %shift
+; AVX512F: Found an estimated cost of 48 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %shift = ashr <64 x i8> %a, %splat
+ ret <64 x i8> %shift
+}
+
;
; Constant Shifts
;
@@ -213,6 +339,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <2 x i64> %a, <i64 1, i64 7>
ret <2 x i64> %shift
@@ -224,17 +351,31 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
ret <4 x i64> %shift
}
+define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64':
+; SSE2: Found an estimated cost of 48 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 8 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
@@ -247,18 +388,33 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
ret <8 x i32> %shift
}
+define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
@@ -270,17 +426,32 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <16 x i16> %shift
}
+define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
@@ -292,11 +463,25 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; SSE41: Found an estimated cost of 48 for instruction: %shift
; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <32 x i8> %shift
}
+define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8':
+; SSE2: Found an estimated cost of 216 for instruction: %shift
+; SSE41: Found an estimated cost of 96 for instruction: %shift
+; AVX: Found an estimated cost of 96 for instruction: %shift
+; AVX2: Found an estimated cost of 48 for instruction: %shift
+; AVX512F: Found an estimated cost of 48 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <64 x i8> %shift
+}
+
;
; Uniform Constant Shifts
;
@@ -307,6 +492,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <2 x i64> %a, <i64 7, i64 7>
ret <2 x i64> %shift
@@ -318,17 +504,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
ret <4 x i64> %shift
}
+define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 8 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -341,18 +541,33 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <8 x i32> %shift
}
+define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
@@ -364,17 +579,32 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
+define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
; SSE2: Found an estimated cost of 4 for instruction: %shift
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
@@ -386,7 +616,21 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}
+
+define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 48 for instruction: %shift
+; AVX512F: Found an estimated cost of 48 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <64 x i8> %shift
+}
diff --git a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index 6d028268ea55..b3382253739f 100644
--- a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -1,9 +1,12 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
; Verify the cost of vector logical shift right instructions.
@@ -17,6 +20,7 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <2 x i64> %a, %b
@@ -29,18 +33,33 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i64> %a, %b
ret <4 x i64> %shift
}
+define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <8 x i64> %a, %b
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
; SSE41: Found an estimated cost of 11 for instruction: %shift
; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i32> %a, %b
@@ -53,18 +72,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE41: Found an estimated cost of 22 for instruction: %shift
; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <8 x i32> %a, %b
ret <8 x i32> %shift
}
+define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 44 for instruction: %shift
+; AVX: Found an estimated cost of 44 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <16 x i32> %a, %b
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <8 x i16> %a, %b
ret <8 x i16> %shift
@@ -76,17 +110,32 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <16 x i16> %a, %b
ret <16 x i16> %shift
}
+define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = lshr <32 x i16> %a, %b
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 12 for instruction: %shift
+; AVX512: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <16 x i8> %a, %b
ret <16 x i8> %shift
@@ -98,11 +147,25 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <32 x i8> %a, %b
ret <32 x i8> %shift
}
+define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8':
+; SSE2: Found an estimated cost of 104 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = lshr <64 x i8> %a, %b
+ ret <64 x i8> %shift
+}
+
;
; Uniform Variable Shifts
;
@@ -113,6 +176,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -126,6 +190,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -133,12 +198,27 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
ret <4 x i64> %shift
}
+define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+ %shift = lshr <8 x i64> %a, %splat
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
; SSE41: Found an estimated cost of 11 for instruction: %shift
; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -152,6 +232,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE41: Found an estimated cost of 22 for instruction: %shift
; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -159,12 +240,27 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
ret <8 x i32> %shift
}
+define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 44 for instruction: %shift
+; AVX: Found an estimated cost of 44 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %shift = lshr <16 x i32> %a, %splat
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i16> %a, %splat
@@ -177,18 +273,34 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
%shift = lshr <16 x i16> %a, %splat
ret <16 x i16> %shift
}
+define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %shift = lshr <32 x i16> %a, %splat
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 12 for instruction: %shift
+; AVX512: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = lshr <16 x i8> %a, %splat
@@ -201,12 +313,27 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
%shift = lshr <32 x i8> %a, %splat
ret <32 x i8> %shift
}
+define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8':
+; SSE2: Found an estimated cost of 104 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %shift = lshr <64 x i8> %a, %splat
+ ret <64 x i8> %shift
+}
+
;
; Constant Shifts
;
@@ -217,6 +344,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <2 x i64> %a, <i64 1, i64 7>
@@ -229,18 +357,33 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
ret <4 x i64> %shift
}
+define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
; SSE41: Found an estimated cost of 11 for instruction: %shift
; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
@@ -253,18 +396,33 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; SSE41: Found an estimated cost of 22 for instruction: %shift
; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
ret <8 x i32> %shift
}
+define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 44 for instruction: %shift
+; AVX: Found an estimated cost of 44 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
@@ -276,17 +434,32 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <16 x i16> %shift
}
+define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 12 for instruction: %shift
+; AVX512: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
@@ -298,11 +471,25 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <32 x i8> %shift
}
+define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8':
+; SSE2: Found an estimated cost of 104 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <64 x i8> %shift
+}
+
;
; Uniform Constant Shifts
;
@@ -313,6 +500,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <2 x i64> %a, <i64 7, i64 7>
@@ -325,18 +513,33 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
ret <4 x i64> %shift
}
+define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -349,18 +552,33 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <8 x i32> %shift
}
+define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
@@ -372,17 +590,32 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
+define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
@@ -394,7 +627,21 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}
+
+define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <64 x i8> %shift
+}
diff --git a/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index 60ba3adea42a..804c5a76c319 100644
--- a/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -1,9 +1,12 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
; Verify the cost of vector shift left instructions.
@@ -18,6 +21,7 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <2 x i64> %a, %b
@@ -30,18 +34,33 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <4 x i64> %a, %b
ret <4 x i64> %shift
}
+define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <8 x i64> %a, %b
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 10 for instruction: %shift
; SSE41: Found an estimated cost of 10 for instruction: %shift
; AVX: Found an estimated cost of 10 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <4 x i32> %a, %b
@@ -54,18 +73,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE41: Found an estimated cost of 20 for instruction: %shift
; AVX: Found an estimated cost of 20 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i32> %a, %b
ret <8 x i32> %shift
}
+define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32':
+; SSE2: Found an estimated cost of 40 for instruction: %shift
+; SSE41: Found an estimated cost of 40 for instruction: %shift
+; AVX: Found an estimated cost of 40 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <16 x i32> %a, %b
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i16> %a, %b
ret <8 x i16> %shift
@@ -77,17 +111,32 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <16 x i16> %a, %b
ret <16 x i16> %shift
}
+define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = shl <32 x i16> %a, %b
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
; SSE41: Found an estimated cost of 11 for instruction: %shift
; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, %b
ret <16 x i8> %shift
@@ -99,11 +148,25 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE41: Found an estimated cost of 22 for instruction: %shift
; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, %b
ret <32 x i8> %shift
}
+define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8':
+; SSE2: Found an estimated cost of 104 for instruction: %shift
+; SSE41: Found an estimated cost of 44 for instruction: %shift
+; AVX: Found an estimated cost of 44 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = shl <64 x i8> %a, %b
+ ret <64 x i8> %shift
+}
+
;
; Uniform Variable Shifts
;
@@ -114,6 +177,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -127,6 +191,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -134,12 +199,27 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
ret <4 x i64> %shift
}
+define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+ %shift = shl <8 x i64> %a, %splat
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 10 for instruction: %shift
; SSE41: Found an estimated cost of 10 for instruction: %shift
; AVX: Found an estimated cost of 10 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -153,6 +233,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE41: Found an estimated cost of 20 for instruction: %shift
; AVX: Found an estimated cost of 20 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -160,12 +241,27 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
ret <8 x i32> %shift
}
+define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32':
+; SSE2: Found an estimated cost of 40 for instruction: %shift
+; SSE41: Found an estimated cost of 40 for instruction: %shift
+; AVX: Found an estimated cost of 40 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %shift = shl <16 x i32> %a, %splat
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i16> %a, %splat
@@ -178,18 +274,34 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i16> %a, %splat
ret <16 x i16> %shift
}
+define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %shift = shl <32 x i16> %a, %splat
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
; SSE41: Found an estimated cost of 11 for instruction: %shift
; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i8> %a, %splat
@@ -202,12 +314,27 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE41: Found an estimated cost of 22 for instruction: %shift
; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
%shift = shl <32 x i8> %a, %splat
ret <32 x i8> %shift
}
+define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8':
+; SSE2: Found an estimated cost of 104 for instruction: %shift
+; SSE41: Found an estimated cost of 44 for instruction: %shift
+; AVX: Found an estimated cost of 44 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %shift = shl <64 x i8> %a, %splat
+ ret <64 x i8> %shift
+}
+
;
; Constant Shifts
;
@@ -218,6 +345,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <2 x i64> %a, <i64 1, i64 7>
@@ -230,18 +358,33 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
ret <4 x i64> %shift
}
+define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
; SSE2: Found an estimated cost of 6 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
@@ -254,18 +397,33 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
ret <8 x i32> %shift
}
+define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32':
+; SSE2: Found an estimated cost of 24 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
@@ -277,18 +435,34 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <16 x i16> %shift
}
+define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512F: Found an estimated cost of 2 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
; SSE41: Found an estimated cost of 11 for instruction: %shift
; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
@@ -300,11 +474,25 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; SSE41: Found an estimated cost of 22 for instruction: %shift
; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <32 x i8> %shift
}
+define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8':
+; SSE2: Found an estimated cost of 104 for instruction: %shift
+; SSE41: Found an estimated cost of 44 for instruction: %shift
+; AVX: Found an estimated cost of 44 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <64 x i8> %shift
+}
+
;
; Uniform Constant Shifts
;
@@ -315,6 +503,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <2 x i64> %a, <i64 7, i64 7>
@@ -327,18 +516,33 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
ret <4 x i64> %shift
}
+define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -351,18 +555,33 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <8 x i32> %shift
}
+define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
@@ -374,18 +593,34 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
+define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512F: Found an estimated cost of 2 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
@@ -397,11 +632,25 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}
+define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <64 x i8> %shift
+}
+
;
; Special Cases
;
diff --git a/test/Bitcode/summary_version.ll b/test/Bitcode/summary_version.ll
index dfb9e9b15e7b..81025a221bb1 100644
--- a/test/Bitcode/summary_version.ll
+++ b/test/Bitcode/summary_version.ll
@@ -2,7 +2,7 @@
; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK: <VERSION op0=2/>
+; CHECK: <VERSION op0=3/>
diff --git a/test/Bitcode/thinlto-function-summary.ll b/test/Bitcode/thinlto-function-summary.ll
index e42c55c1c2eb..594aaab566d1 100644
--- a/test/Bitcode/thinlto-function-summary.ll
+++ b/test/Bitcode/thinlto-function-summary.ll
@@ -10,7 +10,7 @@
; BC-NEXT: <PERMODULE {{.*}} op0=1 op1=0
; BC-NEXT: <PERMODULE {{.*}} op0=2 op1=0
; BC-NEXT: <PERMODULE {{.*}} op0=3 op1=7
-; BC-NEXT: <PERMODULE {{.*}} op0=4 op1=32
+; BC-NEXT: <PERMODULE {{.*}} op0=4 op1=16
; BC-NEXT: <ALIAS {{.*}} op0=5 op1=0 op2=3
; BC-NEXT: </GLOBALVAL_SUMMARY_BLOCK
; BC-NEXT: <VALUE_SYMTAB
diff --git a/test/Bitcode/thinlto-summary-section.ll b/test/Bitcode/thinlto-summary-section.ll
index d120622db819..3d67279617ec 100644
--- a/test/Bitcode/thinlto-summary-section.ll
+++ b/test/Bitcode/thinlto-summary-section.ll
@@ -4,8 +4,10 @@
; RUN: llvm-lto -thinlto -o %t2 %t.o
; RUN: llvm-bcanalyzer -dump %t2.thinlto.bc | FileCheck %s --check-prefix=COMBINED
-; CHECK: <PERMODULE {{.*}} op1=16
-; COMBINED-DAG: <COMBINED {{.*}} op2=16
-define void @functionWithSection() section "some_section" {
+; Flags should be 0x17 (23) for local linkage (0x3) and not being importable
+; (0x10) due to local linkage plus having a section.
+; CHECK: <PERMODULE {{.*}} op1=23
+; COMBINED-DAG: <COMBINED {{.*}} op2=23
+define internal void @functionWithSection() section "some_section" {
ret void
}
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-instructionselect.mir b/test/CodeGen/AArch64/GlobalISel/arm64-instructionselect.mir
index 22210e49bd77..ece5a858b49c 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-instructionselect.mir
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-instructionselect.mir
@@ -2836,13 +2836,13 @@ registers:
# CHECK: body:
# CHECK: %wzr = SUBSWrr %0, %0, implicit-def %nzcv
-# CHECK: %1 = CSINCWr %wzr, %wzr, 0, implicit %nzcv
+# CHECK: %1 = CSINCWr %wzr, %wzr, 1, implicit %nzcv
# CHECK: %xzr = SUBSXrr %2, %2, implicit-def %nzcv
-# CHECK: %3 = CSINCWr %wzr, %wzr, 2, implicit %nzcv
+# CHECK: %3 = CSINCWr %wzr, %wzr, 3, implicit %nzcv
# CHECK: %xzr = SUBSXrr %4, %4, implicit-def %nzcv
-# CHECK: %5 = CSINCWr %wzr, %wzr, 1, implicit %nzcv
+# CHECK: %5 = CSINCWr %wzr, %wzr, 0, implicit %nzcv
body: |
bb.0:
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
index e023e32bb7b1..15b4012f383d 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -52,10 +52,10 @@ define void @allocai64() {
; CHECK: body:
;
; ABI/constant lowering and IR-level entry basic block.
-; CHECK: {{bb.[0-9]+}}:
+; CHECK: {{bb.[0-9]+}} (%ir-block.{{[0-9]+}}):
;
; Make sure we have one successor and only one.
-; CHECK-NEXT: successors: %[[END:bb.[0-9]+]](0x80000000)
+; CHECK-NEXT: successors: %[[END:bb.[0-9]+.end]](0x80000000)
;
; Check that we emit the correct branch.
; CHECK: G_BR %[[END]]
@@ -74,10 +74,10 @@ end:
; CHECK: body:
;
; ABI/constant lowering and IR-level entry basic block.
-; CHECK: {{bb.[0-9]+}}:
+; CHECK: {{bb.[0-9]+}} (%ir-block.{{[0-9]+}}):
; Make sure we have two successors
-; CHECK-NEXT: successors: %[[TRUE:bb.[0-9]+]](0x40000000),
-; CHECK: %[[FALSE:bb.[0-9]+]](0x40000000)
+; CHECK-NEXT: successors: %[[TRUE:bb.[0-9]+.true]](0x40000000),
+; CHECK: %[[FALSE:bb.[0-9]+.false]](0x40000000)
;
; CHECK: [[ADDR:%.*]](p0) = COPY %x0
;
@@ -100,6 +100,74 @@ false:
ret void
}
+; Tests for switch.
+; This gets lowered to a very straightforward sequence of comparisons for now.
+; CHECK-LABEL: name: switch
+; CHECK: body:
+;
+; CHECK: {{bb.[0-9]+.entry}}:
+; CHECK-NEXT: successors: %[[BB_CASE100:bb.[0-9]+.case100]](0x40000000), %[[BB_NOTCASE100_CHECKNEXT:bb.[0-9]+.entry]](0x40000000)
+; CHECK: %0(s32) = COPY %w0
+; CHECK: %[[reg100:[0-9]+]](s32) = G_CONSTANT i32 100
+; CHECK: %[[reg200:[0-9]+]](s32) = G_CONSTANT i32 200
+; CHECK: %[[reg0:[0-9]+]](s32) = G_CONSTANT i32 0
+; CHECK: %[[reg1:[0-9]+]](s32) = G_CONSTANT i32 1
+; CHECK: %[[reg2:[0-9]+]](s32) = G_CONSTANT i32 2
+; CHECK: %[[regicmp100:[0-9]+]](s1) = G_ICMP intpred(eq), %[[reg100]](s32), %0
+; CHECK: G_BRCOND %[[regicmp100]](s1), %[[BB_CASE100]]
+; CHECK: G_BR %[[BB_NOTCASE100_CHECKNEXT]]
+;
+; CHECK: [[BB_CASE100]]:
+; CHECK-NEXT: successors: %[[BB_RET:bb.[0-9]+.return]](0x80000000)
+; CHECK: %[[regretc100:[0-9]+]](s32) = G_ADD %0, %[[reg1]]
+; CHECK: G_BR %[[BB_RET]]
+; CHECK: [[BB_NOTCASE100_CHECKNEXT]]:
+; CHECK-NEXT: successors: %[[BB_CASE200:bb.[0-9]+.case200]](0x40000000), %[[BB_NOTCASE200_CHECKNEXT:bb.[0-9]+.entry]](0x40000000)
+; CHECK: %[[regicmp200:[0-9]+]](s1) = G_ICMP intpred(eq), %[[reg200]](s32), %0
+; CHECK: G_BRCOND %[[regicmp200]](s1), %[[BB_CASE200]]
+; CHECK: G_BR %[[BB_NOTCASE200_CHECKNEXT]]
+;
+; CHECK: [[BB_CASE200]]:
+; CHECK-NEXT: successors: %[[BB_RET:bb.[0-9]+.return]](0x80000000)
+; CHECK: %[[regretc200:[0-9]+]](s32) = G_ADD %0, %[[reg2]]
+; CHECK: G_BR %[[BB_RET]]
+; CHECK: [[BB_NOTCASE200_CHECKNEXT]]:
+; CHECK-NEXT: successors: %[[BB_DEFAULT:bb.[0-9]+.default]](0x80000000)
+; CHECK: G_BR %[[BB_DEFAULT]]
+;
+; CHECK: [[BB_DEFAULT]]:
+; CHECK-NEXT: successors: %[[BB_RET]](0x80000000)
+; CHECK: %[[regretdefault:[0-9]+]](s32) = G_ADD %0, %[[reg0]]
+; CHECK: G_BR %[[BB_RET]]
+;
+; CHECK: [[BB_RET]]:
+; CHECK-NEXT: %[[regret:[0-9]+]](s32) = PHI %[[regretdefault]](s32), %[[BB_DEFAULT]], %[[regretc100]](s32), %[[BB_CASE100]]
+; CHECK: %w0 = COPY %[[regret]](s32)
+; CHECK: RET_ReallyLR implicit %w0
+define i32 @switch(i32 %argc) {
+entry:
+ switch i32 %argc, label %default [
+ i32 100, label %case100
+ i32 200, label %case200
+ ]
+
+default:
+ %tmp0 = add i32 %argc, 0
+ br label %return
+
+case100:
+ %tmp1 = add i32 %argc, 1
+ br label %return
+
+case200:
+ %tmp2 = add i32 %argc, 2
+ br label %return
+
+return:
+ %res = phi i32 [ %tmp0, %default ], [ %tmp1, %case100 ], [ %tmp2, %case200 ]
+ ret i32 %res
+}
+
; Tests for or.
; CHECK-LABEL: name: ori64
; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0
@@ -223,11 +291,11 @@ define i64* @trivial_bitcast(i8* %a) {
; CHECK-LABEL: name: trivial_bitcast_with_copy
; CHECK: [[A:%[0-9]+]](p0) = COPY %x0
-; CHECK: G_BR %[[CAST:bb\.[0-9]+]]
+; CHECK: G_BR %[[CAST:bb\.[0-9]+.cast]]
; CHECK: [[CAST]]:
; CHECK: {{%[0-9]+}}(p0) = COPY [[A]]
-; CHECK: G_BR %[[END:bb\.[0-9]+]]
+; CHECK: G_BR %[[END:bb\.[0-9]+.end]]
; CHECK: [[END]]:
define i64* @trivial_bitcast_with_copy(i8* %a) {
@@ -324,8 +392,8 @@ define void @intrinsics(i32 %cur, i32 %bits) {
}
; CHECK-LABEL: name: test_phi
-; CHECK: G_BRCOND {{%.*}}, %[[TRUE:bb\.[0-9]+]]
-; CHECK: G_BR %[[FALSE:bb\.[0-9]+]]
+; CHECK: G_BRCOND {{%.*}}, %[[TRUE:bb\.[0-9]+.true]]
+; CHECK: G_BR %[[FALSE:bb\.[0-9]+.false]]
; CHECK: [[TRUE]]:
; CHECK: [[RES1:%[0-9]+]](s32) = G_LOAD
@@ -933,7 +1001,7 @@ define void @test_large_const(i128* %addr) {
; correct.
define i8* @test_const_placement() {
; CHECK-LABEL: name: test_const_placement
-; CHECK: bb.{{[0-9]+}}:
+; CHECK: bb.{{[0-9]+}} (%ir-block.{{[0-9]+}}):
; CHECK: [[VAL_INT:%[0-9]+]](s32) = G_CONSTANT i32 42
; CHECK: [[VAL:%[0-9]+]](p0) = G_INTTOPTR [[VAL_INT]](s32)
; CHECK: G_BR
diff --git a/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll b/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
index 9051b2388fce..718364af2aca 100644
--- a/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
+++ b/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
@@ -8,8 +8,8 @@ declare i32 @llvm.eh.typeid.for(i8*)
; CHECK: name: bar
; CHECK: body:
-; CHECK-NEXT: bb.1:
-; CHECK: successors: %[[GOOD:bb.[0-9]+]]{{.*}}%[[BAD:bb.[0-9]+]]
+; CHECK-NEXT: bb.1 (%ir-block.0):
+; CHECK: successors: %[[GOOD:bb.[0-9]+.continue]]{{.*}}%[[BAD:bb.[0-9]+.broken]]
; CHECK: EH_LABEL
; CHECK: %w0 = COPY
; CHECK: BL @foo, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0, implicit-def %w0
diff --git a/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll b/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
index 4a3696501fd8..727c189721fa 100644
--- a/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=true -aarch64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s
+; RUN: llc -o - %s -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh | FileCheck %s
; Check that the LOH analysis does not crash when the analysed chained
; contains instructions that are filtered out.
;
diff --git a/test/CodeGen/AArch64/arm64-collect-loh-str.ll b/test/CodeGen/AArch64/arm64-collect-loh-str.ll
index e3df4182ddca..773286ef1d72 100644
--- a/test/CodeGen/AArch64/arm64-collect-loh-str.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh-str.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s
+; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s
; Test case for <rdar://problem/15942912>.
; AdrpAddStr cannot be used when the store uses same
; register as address and value. Indeed, the related
diff --git a/test/CodeGen/AArch64/arm64-collect-loh.ll b/test/CodeGen/AArch64/arm64-collect-loh.ll
index b697b6eced3d..c7ba989d933e 100644
--- a/test/CodeGen/AArch64/arm64-collect-loh.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s
-; RUN: llc -mtriple=arm64-linux-gnu -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s --check-prefix=CHECK-ELF
+; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s
+; RUN: llc -o - %s -mtriple=arm64-linux-gnu -O2 | FileCheck %s --check-prefix=CHECK-ELF
; CHECK-ELF-NOT: .loh
; CHECK-ELF-NOT: AdrpAdrp
@@ -633,11 +633,14 @@ define void @setL(<1 x i8> %t) {
; a tuple register to appear in the lowering. Thus, the target
; cpu is required to have the problem reproduced.
; CHECK-LABEL: _uninterestingSub
+; CHECK: [[LOH_LABEL0:Lloh[0-9]+]]:
; CHECK: adrp [[ADRP_REG:x[0-9]+]], [[CONSTPOOL:lCPI[0-9]+_[0-9]+]]@PAGE
-; CHECK-NEXT: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF]
+; CHECK: [[LOH_LABEL1:Lloh[0-9]+]]:
+; CHECK: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF]
; The tuple comes from the next instruction.
; CHECK-NEXT: tbl.16b v{{[0-9]+}}, { v{{[0-9]+}}, v{{[0-9]+}} }, v[[IDX]]
; CHECK: ret
+; CHECK: .loh AdrpLdr [[LOH_LABEL0]], [[LOH_LABEL1]]
define void @uninterestingSub(i8* nocapture %row) #0 {
%tmp = bitcast i8* %row to <16 x i8>*
%tmp1 = load <16 x i8>, <16 x i8>* %tmp, align 16
@@ -664,10 +667,10 @@ entry:
if.then.i:
ret void
if.end.i:
-; CHECK: .loh AdrpAdrp Lloh91, Lloh93
-; CHECK: .loh AdrpLdr Lloh91, Lloh92
-; CHECK: .loh AdrpLdrGot Lloh93, Lloh95
-; CHECK: .loh AdrpLdrGot Lloh94, Lloh96
+; CHECK: .loh AdrpLdrGot
+; CHECK: .loh AdrpLdrGot
+; CHECK: .loh AdrpAdrp
+; CHECK: .loh AdrpLdr
%mul.i.i.i = fmul double undef, 1.000000e-06
%add.i.i.i = fadd double undef, %mul.i.i.i
%sub.i.i = fsub double %add.i.i.i, undef
diff --git a/test/CodeGen/AArch64/loh.mir b/test/CodeGen/AArch64/loh.mir
new file mode 100644
index 000000000000..1d08ebdc5790
--- /dev/null
+++ b/test/CodeGen/AArch64/loh.mir
@@ -0,0 +1,193 @@
+# RUN: llc -o /dev/null %s -mtriple=aarch64-apple-ios -run-pass=aarch64-collect-loh -debug-only=aarch64-collect-loh 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+ define void @func0() { ret void }
+
+ declare void @extfunc()
+
+ @g0 = external global i32
+ @g1 = external global i32
+ @g2 = external global i32
+ @g3 = external global i32
+ @g4 = external global i32
+ @g5 = external global i32
+...
+---
+# Check various LOH variants. Remember that the algorithms walks the basic
+# blocks backwards.
+# CHECK-LABEL: ********** AArch64 Collect LOH **********
+# CHECK-LABEL: Looking in function func0
+name: func0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK: Adding MCLOH_AdrpAdrp:
+ ; CHECK-NEXT: %X1<def> = ADRP <ga:@g3>
+ ; CHECK-NEXT: %X1<def> = ADRP <ga:@g4>
+ ; CHECK-NEXT: Adding MCLOH_AdrpAdrp:
+ ; CHECK-NEXT: %X1<def> = ADRP <ga:@g2>
+ ; CHECK-NEXT: %X1<def> = ADRP <ga:@g3>
+ ; CHECK-NEXT: Adding MCLOH_AdrpAdrp:
+ ; CHECK-NEXT: %X0<def> = ADRP <ga:@g0>
+ ; CHECK-NEXT: %X0<def> = ADRP <ga:@g1>
+ %x0 = ADRP target-flags(aarch64-page) @g0
+ %x0 = ADRP target-flags(aarch64-page) @g1
+ %x1 = ADRP target-flags(aarch64-page) @g2
+ %x1 = ADRP target-flags(aarch64-page) @g3
+ %x1 = ADRP target-flags(aarch64-page) @g4
+
+ bb.1:
+ ; CHECK-NEXT: Adding MCLOH_AdrpAdd:
+ ; CHECK-NEXT: %X20<def> = ADRP <ga:@g0>
+ ; CHECK-NEXT: %X3<def> = ADDXri %X20, <ga:@g0>
+ ; CHECK-NEXT: Adding MCLOH_AdrpAdd:
+ ; CHECK-NEXT: %X1<def> = ADRP <ga:@g0>
+ ; CHECK-NEXT: %X1<def> = ADDXri %X1, <ga:@g0>
+ %x1 = ADRP target-flags(aarch64-page) @g0
+ %x9 = SUBXri undef %x11, 5, 0 ; should not affect MCLOH formation
+ %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g0, 0
+ %x20 = ADRP target-flags(aarch64-page) @g0
+ BL @extfunc, csr_aarch64_aapcs ; should not clobber X20
+ %x3 = ADDXri %x20, target-flags(aarch64-pageoff) @g0, 0
+
+ bb.2:
+ ; CHECK-NOT: MCLOH_AdrpAdd
+ %x9 = ADRP target-flags(aarch64-page) @g0
+ BL @extfunc, csr_aarch64_aapcs ; clobbers x9
+ ; Verification requires the use of 'undef' in front of the clobbered %x9
+ %x9 = ADDXri undef %x9, target-flags(aarch64-pageoff) @g0, 0
+
+ bb.3:
+ ; CHECK-NOT: MCLOH_AdrpAdd
+ %x10 = ADRP target-flags(aarch64-page) @g0
+ HINT 0, implicit def %x10 ; clobbers x10
+ %x10 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0
+
+ bb.4:
+ ; Cannot produce a LOH for multiple users
+ ; CHECK-NOT: MCLOH_AdrpAdd
+ %x10 = ADRP target-flags(aarch64-page) @g0
+ HINT 0, implicit def %x10 ; clobbers x10
+ %x11 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0
+ %x12 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0
+
+ bb.5:
+ ; CHECK-NEXT: Adding MCLOH_AdrpLdr:
+ ; CHECK-NEXT: %X5<def> = ADRP <ga:@g2>
+ ; CHECK-NEXT: %S6<def> = LDRSui %X5, <ga:@g2>
+ ; CHECK-NEXT: Adding MCLOH_AdrpLdr:
+ ; CHECK-NEXT: %X4<def> = ADRP <ga:@g2>
+ ; CHECK-NEXT: %X4<def> = LDRXui %X4, <ga:@g2>
+ %x4 = ADRP target-flags(aarch64-page) @g2
+ %x4 = LDRXui %x4, target-flags(aarch64-pageoff) @g2
+ %x5 = ADRP target-flags(aarch64-page) @g2
+ %s6 = LDRSui %x5, target-flags(aarch64-pageoff) @g2
+
+ bb.6:
+ ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot:
+ ; CHECK-NEXT: %X5<def> = ADRP <ga:@g2>
+ ; CHECK-NEXT: %X6<def> = LDRXui %X5, <ga:@g2>
+ ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot:
+ ; CHECK-NEXT: %X4<def> = ADRP <ga:@g2>
+ ; CHECK-NEXT: %X4<def> = LDRXui %X4, <ga:@g2>
+ %x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2
+ %x4 = LDRXui %x4, target-flags(aarch64-pageoff, aarch64-got) @g2
+ %x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2
+ %x6 = LDRXui %x5, target-flags(aarch64-pageoff, aarch64-got) @g2
+
+ bb.7:
+ ; CHECK-NOT: Adding MCLOH_AdrpLdrGot:
+ ; Loading a float value from a GOT table makes no sense so this should not
+ ; produce an LOH.
+ %x11 = ADRP target-flags(aarch64-page, aarch64-got) @g5
+ %s11 = LDRSui %x11, target-flags(aarch64-pageoff, aarch64-got) @g5
+
+ bb.8:
+ ; CHECK-NEXT: Adding MCLOH_AdrpAddLdr:
+ ; CHECK-NEXT: %X7<def> = ADRP <ga:@g3>[TF=1]
+ ; CHECK-NEXT: %X8<def> = ADDXri %X7, <ga:@g3>
+ ; CHECK-NEXT: %D1<def> = LDRDui %X8, 8
+ %x7 = ADRP target-flags(aarch64-page) @g3
+ %x8 = ADDXri %x7, target-flags(aarch64-pageoff) @g3, 0
+ %d1 = LDRDui %x8, 8
+
+ bb.9:
+ ; CHECK-NEXT: Adding MCLOH_AdrpAdd:
+ ; CHECK-NEXT: %X3<def> = ADRP <ga:@g3>
+ ; CHECK-NEXT: %X3<def> = ADDXri %X3, <ga:@g3>
+ ; CHECK-NEXT: Adding MCLOH_AdrpAdd:
+ ; CHECK-NEXT: %X5<def> = ADRP <ga:@g3>
+ ; CHECK-NEXT: %X2<def> = ADDXri %X5, <ga:@g3>
+ ; CHECK-NEXT: Adding MCLOH_AdrpAddStr:
+ ; CHECK-NEXT: %X1<def> = ADRP <ga:@g3>
+ ; CHECK-NEXT: %X1<def> = ADDXri %X1, <ga:@g3>
+ ; CHECK-NEXT: STRXui %XZR, %X1, 16
+ %x1 = ADRP target-flags(aarch64-page) @g3
+ %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g3, 0
+ STRXui %xzr, %x1, 16
+
+ ; This sequence should just produce an AdrpAdd (not AdrpAddStr)
+ %x5 = ADRP target-flags(aarch64-page) @g3
+ %x2 = ADDXri %x5, target-flags(aarch64-pageoff) @g3, 0
+ STRXui %x2, undef %x11, 16
+
+ ; This sequence should just produce an AdrpAdd (not AdrpAddStr)
+ %x3 = ADRP target-flags(aarch64-page) @g3
+ %x3 = ADDXri %x3, target-flags(aarch64-pageoff) @g3, 0
+ STRXui %x3, %x3, 16
+
+ bb.10:
+ ; CHECK-NEXT: Adding MCLOH_AdrpLdr:
+ ; CHECK-NEXT: %X2<def> = ADRP <ga:@g3>
+ ; CHECK-NEXT: %X2<def> = LDRXui %X2, <ga:@g3>
+ ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotLdr:
+ ; CHECK-NEXT: %X1<def> = ADRP <ga:@g4>
+ ; CHECK-NEXT: %X1<def> = LDRXui %X1, <ga:@g4>
+ ; CHECK-NEXT: %X1<def> = LDRXui %X1, 24
+ %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4
+ %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4
+ %x1 = LDRXui %x1, 24
+ ; Should just produce a MCLOH_AdrpLdr (not MCLOH_AdrpLdrGotLdr)
+ %x2 = ADRP target-flags(aarch64-page) @g3
+ %x2 = LDRXui %x2, target-flags(aarch64-pageoff) @g3
+ %x2 = LDRXui %x2, 24
+
+ bb.11:
+ ; CHECK-NEXT: Adding MCLOH_AdrpLdr
+ ; CHECK-NEXT: %X5<def> = ADRP <ga:@g1>
+ ; CHECK-NEXT: %X5<def> = LDRXui %X5, <ga:@g1>
+ ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotStr:
+ ; CHECK-NEXT: %X1<def> = ADRP <ga:@g4>
+ ; CHECK-NEXT: %X1<def> = LDRXui %X1, <ga:@g4>
+ ; CHECK-NEXT: STRXui %XZR, %X1, 32
+ %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4
+ %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4
+ STRXui %xzr, %x1, 32
+ ; Should just produce a MCLOH_AdrpLdr (not MCLOH_AdrpLdrGotStr)
+ %x5 = ADRP target-flags(aarch64-page) @g1
+ %x5 = LDRXui %x5, target-flags(aarch64-pageoff) @g1
+ STRXui undef %x11, %x5, 32
+
+ bb.12:
+ ; CHECK-NOT: MCLOH_AdrpAdrp
+ ; CHECK: Adding MCLOH_AdrpAddLdr
+ ; %X9<def> = ADRP <ga:@g4>
+ ; %X9<def> = ADDXri %X9, <ga:@g4>
+ ; %X5<def> = LDRXui %X9, 0
+ %x9 = ADRP target-flags(aarch64-page, aarch64-got) @g4
+ %x9 = ADDXri %x9, target-flags(aarch64-pageoff, aarch64-got) @g4, 0
+ %x5 = LDRXui %x9, 0
+ %x9 = ADRP target-flags(aarch64-page, aarch64-got) @g5
+
+ bb.13:
+ successors: %bb.14
+ ; Cannot produce a LOH for multiple users
+ ; CHECK-NOT: MCLOH_AdrpAdd
+ %x10 = ADRP target-flags(aarch64-page) @g0
+ %x11 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0
+ B %bb.14
+
+ bb.14:
+ liveins: %x10
+ %x12 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0
+...
diff --git a/test/CodeGen/AArch64/machine-scheduler.mir b/test/CodeGen/AArch64/machine-scheduler.mir
index e7e0dda53c57..933afdb6da9b 100644
--- a/test/CodeGen/AArch64/machine-scheduler.mir
+++ b/test/CodeGen/AArch64/machine-scheduler.mir
@@ -21,8 +21,9 @@
# CHECK: LDRWui %x0, 0
# CHECK: LDRWui %x0, 1
# CHECK: STRWui %w1, %x0, 2
-name: load_imp-def
-body: |
+name: load_imp-def
+tracksRegLiveness: true
+body: |
bb.0.entry:
liveins: %w1, %x0
%w8 = LDRWui %x0, 1, implicit-def %x8 :: (load 4 from %ir.0)
diff --git a/test/CodeGen/AMDGPU/hsa-func.ll b/test/CodeGen/AMDGPU/hsa-func.ll
index 28c8b5d73b02..d9662b69b126 100644
--- a/test/CodeGen/AMDGPU/hsa-func.ll
+++ b/test/CodeGen/AMDGPU/hsa-func.ll
@@ -30,12 +30,11 @@
; ELF: Type: Function (0x2)
; ELF: }
+; HSA: .text
; HSA: .hsa_code_object_version 2,1
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
-; HSA: .text
-
; HSA-NOT: .amdgpu_hsa_kernel simple
; HSA: {{^}}simple:
; HSA: .amd_kernel_code_t
diff --git a/test/CodeGen/AMDGPU/hsa.ll b/test/CodeGen/AMDGPU/hsa.ll
index 78a5cdb576f5..12c15441c0f5 100644
--- a/test/CodeGen/AMDGPU/hsa.ll
+++ b/test/CodeGen/AMDGPU/hsa.ll
@@ -34,12 +34,12 @@
; ELF: Type: AMDGPU_HSA_KERNEL (0xA)
; ELF: }
+; HSA-NOT: .AMDGPU.config
+; HSA: .text
; HSA: .hsa_code_object_version 2,1
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
-; HSA: .text
-
; HSA: .amdgpu_hsa_kernel simple
; HSA: {{^}}simple:
; HSA: .amd_kernel_code_t
diff --git a/test/CodeGen/Generic/cfi-sections.ll b/test/CodeGen/Generic/cfi-sections.ll
new file mode 100644
index 000000000000..6e721d6df706
--- /dev/null
+++ b/test/CodeGen/Generic/cfi-sections.ll
@@ -0,0 +1,39 @@
+; When using Itanium ABI, do not emit .debug_frame.
+; RUNT: llc -mtriple=i386--linux -o - < %s | FileCheck %s -check-prefix=WITHOUT
+; RUNT: llc -mtriple=armv7-netbsd-eabi -o - < %s | FileCheck %s -check-prefix=WITHOUT
+
+; When using EHABI, do emit .debug_frame.
+; RUN: llc -mtriple=arm-linux -mcpu=cortex-a7 -mattr=v7 -o - < %s | FileCheck %s -check-prefix=WITH
+
+; REQUIRES: x86-registered-target
+; REQUIRES: arm-registered-target
+
+; WITH: .cfi_sections .debug_frame
+; WITHOUT-NOT: .cfi_sections
+
+define i32 @foo() #0 !dbg !7 {
+ %1 = call i32 @bar()
+ %2 = call i32 @bar()
+ %3 = add nsw i32 %1, %2
+ ret i32 %3
+}
+
+declare i32 @bar() #1
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+soft-float,+strict-align,-crypto,-neon" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+soft-float,+strict-align,-crypto,-neon" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "cfi-sections.cc", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{i32 1, !"min_enum_size", i32 4}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10}
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/CodeGen/MIR/AArch64/spill-fold.mir b/test/CodeGen/MIR/AArch64/spill-fold.mir
new file mode 100644
index 000000000000..05e7f7521ed5
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/spill-fold.mir
@@ -0,0 +1,82 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
+--- |
+ define i64 @test_subreg_spill_fold() { ret i64 0 }
+ define i64 @test_subreg_spill_fold2() { ret i64 0 }
+ define i64 @test_subreg_spill_fold3() { ret i64 0 }
+ define i64 @test_subreg_fill_fold() { ret i64 0 }
+ define double @test_subreg_fill_fold2() { ret double 0.0 }
+...
+---
+# CHECK-LABEL: name: test_subreg_spill_fold
+# Ensure that the spilled subreg COPY is eliminated and folded into the spill store.
+name: test_subreg_spill_fold
+registers:
+ - { id: 0, class: gpr64 }
+body: |
+ bb.0:
+ ; CHECK: STRXui %xzr, %stack.0, 0 :: (store 8 into %stack.0)
+ undef %0.sub_32 = COPY %wzr
+ INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp
+ %x0 = COPY %0
+ RET_ReallyLR implicit %x0
+...
+---
+# CHECK-LABEL: name: test_subreg_spill_fold2
+# Similar to test_subreg_spill_fold, but with a vreg0 register class not containing %WZR.
+name: test_subreg_spill_fold2
+registers:
+ - { id: 0, class: gpr64sp }
+body: |
+ bb.0:
+ ; CHECK: STRXui %xzr, %stack.0, 0 :: (store 8 into %stack.0)
+ undef %0.sub_32 = COPY %wzr
+ INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp
+ %x0 = ADDXri %0, 1, 0
+ RET_ReallyLR implicit %x0
+...
+---
+# CHECK-LABEL: name: test_subreg_spill_fold3
+# Similar to test_subreg_spill_fold, but with a cross register class copy.
+name: test_subreg_spill_fold3
+registers:
+ - { id: 0, class: fpr64 }
+body: |
+ bb.0:
+ ; CHECK: STRXui %xzr, %stack.0, 0 :: (store 8 into %stack.0)
+ undef %0.ssub = COPY %wzr
+ INLINEASM $nop, 1, 12, implicit-def dead %d0, 12, implicit-def dead %d1, 12, implicit-def dead %d2, 12, implicit-def dead %d3, 12, implicit-def dead %d4, 12, implicit-def dead %d5, 12, implicit-def dead %d6, 12, implicit-def dead %d7, 12, implicit-def dead %d8, 12, implicit-def dead %d9, 12, implicit-def dead %d10, 12, implicit-def dead %d11, 12, implicit-def dead %d12, 12, implicit-def dead %d13, 12, implicit-def dead %d14, 12, implicit-def dead %d15, 12, implicit-def dead %d16, 12, implicit-def dead %d17, 12, implicit-def dead %d18, 12, implicit-def dead %d19, 12, implicit-def dead %d20, 12, implicit-def dead %d21, 12, implicit-def dead %d22, 12, implicit-def dead %d23, 12, implicit-def dead %d24, 12, implicit-def dead %d25, 12, implicit-def dead %d26, 12, implicit-def dead %d27, 12, implicit-def dead %d28, 12, implicit-def dead %d29, 12, implicit-def dead %d30, 12, implicit-def %d31
+ %x0 = COPY %0
+ RET_ReallyLR implicit %x0
+...
+---
+# CHECK-LABEL: name: test_subreg_fill_fold
+# Ensure that the filled COPY is eliminated and folded into the fill load.
+name: test_subreg_fill_fold
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr64 }
+body: |
+ bb.0:
+ %0 = COPY %wzr
+ INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp
+ ; CHECK: undef %1.sub_32 = LDRWui %stack.0, 0 :: (load 4 from %stack.0)
+ undef %1.sub_32 = COPY %0
+ %x0 = COPY %1
+ RET_ReallyLR implicit %x0
+...
+---
+# CHECK-LABEL: name: test_subreg_fill_fold2
+# Similar to test_subreg_fill_fold, but with a cross-class copy.
+name: test_subreg_fill_fold2
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: fpr64 }
+body: |
+ bb.0:
+ %0 = COPY %wzr
+ INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp
+ ; CHECK: undef %1.ssub = LDRSui %stack.0, 0 :: (load 4 from %stack.0)
+ undef %1.ssub = COPY %0
+ %d0 = COPY %1
+ RET_ReallyLR implicit %d0
+...
diff --git a/test/CodeGen/MIR/X86/basic-block-liveins.mir b/test/CodeGen/MIR/X86/basic-block-liveins.mir
index 35f5512936ba..b347368a94b1 100644
--- a/test/CodeGen/MIR/X86/basic-block-liveins.mir
+++ b/test/CodeGen/MIR/X86/basic-block-liveins.mir
@@ -22,7 +22,8 @@
...
---
-name: test
+name: test
+tracksRegLiveness: true
body: |
; CHECK-LABEL: bb.0.body:
; CHECK-NEXT: liveins: %edi, %esi
@@ -33,7 +34,8 @@ body: |
RETQ %eax
...
---
-name: test2
+name: test2
+tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test2
; Verify that we can have multiple lists of liveins that will be merged into
@@ -48,7 +50,8 @@ body: |
RETQ %eax
...
---
-name: test3
+name: test3
+tracksRegLiveness: true
body: |
; Verify that we can have an empty list of liveins.
; CHECK-LABEL: name: test3
diff --git a/test/CodeGen/MIR/X86/machine-verifier.mir b/test/CodeGen/MIR/X86/machine-verifier.mir
index c56bab8c998c..7421146c22ed 100644
--- a/test/CodeGen/MIR/X86/machine-verifier.mir
+++ b/test/CodeGen/MIR/X86/machine-verifier.mir
@@ -10,7 +10,8 @@
...
---
-name: inc
+name: inc
+tracksRegLiveness: true
body: |
bb.0.entry:
liveins: %edi
diff --git a/test/CodeGen/NVPTX/tid-range.ll b/test/CodeGen/NVPTX/tid-range.ll
new file mode 100644
index 000000000000..3dc4008810a1
--- /dev/null
+++ b/test/CodeGen/NVPTX/tid-range.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=nvptx64 | FileCheck %s
+declare i32 @get_register()
+
+define i1 @test1() {
+entry:
+ %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !0
+ %cmp = icmp eq i32 %call, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: test1(
+; CHECK: setp.eq.s32 %p1, %r1, 1;
+; CHECK: selp.u32 %[[R:.+]], 1, 0, %p1;
+; CHECK: st.param.b32 [func_retval0+0], %[[R]];
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+
+!0 = !{ i32 0, i32 3 }
diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll b/test/CodeGen/X86/GlobalISel/irtranslator-call.ll
index 6cda38aa94fe..425d2609380e 100644
--- a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll
+++ b/test/CodeGen/X86/GlobalISel/irtranslator-call.ll
@@ -24,7 +24,7 @@ define void @test_void_return() {
; CHECK-NEXT: hasVAStart: false
; CHECK-NEXT: hasMustTailInVarArgFunc: false
; CHECK-NEXT: body:
-; CHECK-NEXT: bb.1:
+; CHECK-NEXT: bb.1.entry:
; CHECK-NEXT: RET 0
entry:
ret void
diff --git a/tes